import csv import random from array import array import glob # indexes into data arrays CENTER = 0 TABLE = 1 MACHINE = 2 YES = 3 NO = 4 # read in the original spreadsheet converted to CSV def read_csv_comprehensive(filename): reader = csv.reader(file(filename)) data = [] for row in reader: center = int(row[3]) table = int(row[4]) machine = int(row[5]) no = int(row[6]) yes = int(row[8]) data.append([center, table, machine, yes, no]) return data; # read in a data file in my simple format making a dictionary keyed off the center id def read_csv_simple(filename): reader = csv.reader(file(filename)) data = {} for row in reader: center = int(row[0]) table = int(row[1]) machine = int(row[2]) yes = int(row[3]) no = int(row[4]) if data.has_key(center): data[center].append([center, table, machine, yes, no]) else: data[center] = [[center, table, machine, yes, no]] return data; # write out a data file in my simple format (data must be flat) def write_csv_simple(filename, data): writer = csv.writer(file(filename,"w")) for row in data: writer.writerow(row) # return a list of the voting center ids def create_center_set(data): return data.keys() # return the data records accociated with center def records_in_center(data, center): return data[center] # return the number of yes votes in data def yes_votes(data): sum = 0 for rec in data: sum += rec[YES] return sum # return the number of no votes in data def no_votes(data): sum = 0 for rec in data: sum += rec[NO] return sum # return a lists of the total votes on each machine in data def machine_totals(data): l = [] for rec in data: l.append(rec[YES]+rec[NO]) return l # return an array with Y's and N's corresponding to the number of yes and no votes def make_deck(data): yes_list = array('c', 'Y') * yes_votes(data) no_list = array('c', 'N') * no_votes(data) return yes_list + no_list # return a list of the votes assigned to each voting machine def deal(deck,machines): dealt = 0 l = [] for m in machines: l.append(deck[dealt:dealt+m]) dealt += m return l # silmulate one voting center def simulate_center(data, center): data = records_in_center(data, center) deck = make_deck(data) random.shuffle(deck); hands = deal(deck,machine_totals(data)) i = 0 l = [] for rec in data: yes = hands[i].count('Y') no = hands[i].count('N') l.append([rec[CENTER], rec[TABLE], rec[MACHINE], yes, no]) i += 1 return l # simulate an entire election using the data and the list of center ids def simulate_election(data, centers): l = [] for center in centers: l = l + simulate_center(data, center) return l # run n full simulations def run_simulations(n): data = read_csv_simple("votes-simple.csv") centers = create_center_set(data) for i in range(n): print i output = simulate_election(data, centers) write_csv_simple("votes-random-" + str(i) +".csv", output) # are there duplicate elements in l? def dups(l): set = {} map(set.__setitem__, l, []) return len(set.keys()) != len(l) # find the statistic we're interested in for one file def file_stats(filename): data = read_csv_simple(filename) centers = create_center_set(data) count, yescount, nocount = 0, 0, 0 length = len(centers) for center in centers: recs = records_in_center(data,center) yes, no =[], [] for rec in recs: yes.append(rec[YES]) no.append(rec[NO]) #replace depending on statistic; this computes cap-consistent precints if yes.count(max(yes)) > 1: count += 1 return count # print and return stats on all files in directory def gather_stats(): g = glob.glob("*.csv") s = [] for f in g: t = count_collide(f) print t s.append(t) return s