Below is the code I am using to generate the crunched numbers for Albany County, for example my 2021 Albany County Election Spreadsheet

Below is the code I am using to generate the crunched numbers for Albany County, for example my 2021 Albany County Election Spreadsheet. Text is converted using pdftotext -layout 2021GeneralRecanvass.txt which converts the PDF to a structured text file for easier parsing. The Excel Writer library is used for extensive formatting of the file for easy reading and parsing. View this as a separate page for syntax highlighting.

from io import StringIO
import re
import pandas as pd
import numpy as np

# paths
# electionResultText = '/home/andy/Desktop/2021GeneralRecanvass.txt'
# enrollmentXLS = '/home/andy/enroll/nov21-Enrollment/AlbanyED_nov21.xlsx' 
# outputPath = '/tmp/2021_albany_county_races.xlsx'

# electionResultText = '/home/andy/Documents/GIS.Data/election.districts/2020GeneralRecanvass.txt'
# enrollmentXLS = '/home/andy/enroll/nov20-Enrollment/AlbanyED_nov20.xlsx' 
# outputPath = '/tmp/2020_albany_county_races.xlsx'

electionResultText = '/home/andy/Documents/GIS.Data/election.districts/2019'
enrollmentXLS = '/home/andy/enroll/nov19-Enrollment/AlbanyED_nov19.xlsx' 
outputPath = '/tmp/2018_albany_county_races.xlsx'

# number to excel column letter
def letter(colNum):
    import math
    
    if colNum > 26:
        return chr(ord('@')+math.floor(colNum/26))+chr(ord('@')+colNum%26)
    else:
        return chr(ord('@')+colNum)

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None) # show everything when previewing

with open(electionResultText) as f:
    data = f.read()

enroll = pd.read_excel(enrollmentXLS, header=4)
enroll = enroll[(enroll['STATUS']=='Active')][['ELECTION DIST', 'TOTAL']].dropna()
enroll['ELECTION DIST'] = enroll['ELECTION DIST'].astype(str).str.replace('  ',' ')
enroll['ELECTION DIST']=enroll['ELECTION DIST'].str.strip()
enroll['Enrollment'] = enroll['TOTAL']
enroll.drop(labels=['TOTAL'], axis=1, inplace=True)

# array containing election result dataframes
er = {}
    
# split each race which is divided by ten or more equal signs
for raceData in re.split('={10,}',data):

    # blank out old data frame
    df = None

    # split lines
    rows = raceData.split('\n')

    race = ""
    candidates = {}
    startAt = 0
    
    for i, line in enumerate(rows):
        # find race name
        if re.search('VOTES\s*?PERCENT', line):
            race = rows[i+1]
        
        race = race.replace('  ',' ')
        race = race.rstrip()
        #print(race)
        
        # find candidate names
        for result in re.findall('(\d\d)\s*?=\s*?(\w.*?)\d', line):
            candidates[int(result[0])] = result[1].rstrip()
       
        # find start at location for CSV reader
        if re.findall('-{5,}', line):
            startAt = i+3
            break
    
    # skips enrollment stats, as the don't have candidates
    if not candidates or not 2 in candidates:
        continue
    
    if not race:
        continue
        
    #print(race)
    
    df = pd.read_csv(
        StringIO(raceData),
        header=None,
        skiprows=startAt,
        sep='(?<=\d)\s{1,}(?=\d)',
        engine='python',
        on_bad_lines='warn')
    
    df=df[df[0].str.contains('^\d{4}').fillna(False)] # ONLY ROWS STARTING WITH 4-DIGIT ED CODE
    
    df.reset_index(drop=True, inplace=True) # reset index so we can use it in formulas

    df=df.rename(candidates,axis=1) # rename columns 
    
    df.iloc[:,1:]=df.iloc[:,1:].apply(pd.to_numeric) # make sure all columns are numeric 
    df.iloc[:,1:]=df.iloc[:,1:].convert_dtypes('int32') # cast columns to int32

    # crunching
    df.insert(1, 'Ballot', df.iloc[:,1:].convert_dtypes('int32').sum(axis=1)) # add total column
    
    df.insert(2, 'Blanks', df['OVER VOTES'].convert_dtypes('int32')) # add blank
    df.drop(labels=['OVER VOTES'], axis=1, inplace=True)

    df['Blanks']+=df['UNDER VOTES'].convert_dtypes('int32') # add under votes
    df.drop(labels=['UNDER VOTES'], axis=1, inplace=True)

    df.insert(3, 'Canvas', '=F'+(df.index+2).map(str)+'-G'+(df.index+2).map(str))  
    df.insert(4, 'TO %', '=(F'+(df.index+2).map(str)+'/E'+(df.index+2).map(str)+')')
    df.insert(5, 'DO %', '=(G'+(df.index+2).map(str)+'/F'+(df.index+2).map(str)+')')
        
    # create check columns
    df['CHECK'] = '=F'+(df.index+2).map(str)+'-G'+(df.index+2).map(str)
    df['CHECK %'] = '=0'
    
    # add percent columns
    for i, col in enumerate(df.columns[6:-2]): 
        try:
            # check columns
            if (len(df.columns)-i-2) > 0:
                # add to check column
                df['CHECK'] += '-'+letter(i+i+11)+(df.index+2).map(str)

                # add check percent
                df['CHECK %'] += '+'+letter(i+i+12)+(df.index+2).map(str)

            # add percent
            df.insert(i+i+7, col+' %', '=('+letter(i+i+11)+(df.index+2).map(str)+'/H'+(df.index+2).map(str)+')') 
        except:
            pass

    # add T W E Columns, drop combined field
    df[['ED Code','Municipality','String','Ward','ED']]=df[0].str.extract('(\d\d\d\d)\s*?(.*?)(\s*?WARD\s*(\d*))?\s*?ED\s*(\d*)')

    # temporary string used for data merge with enrollments
    df['ELECTION DIST'] = df['Municipality'].str.strip().replace('  ',' ') + \
    ' '+df['Ward'].fillna(0).astype(str).str.zfill(3)+df['ED'].str.zfill(3).astype('str')

    # merge on ED Key
    df=df.merge(enroll, on='ELECTION DIST', how='left')
    df=df.drop(0, axis=1)
    df=df.drop(labels=['ELECTION DIST','String'],axis=1)
    
    # move columns to proper order
    cols = list(df.columns)
    df = df[cols[-5:]+cols[:-5]]
    
    df['Municipality']=df['Municipality'].str.title()
    df['Municipality'] = df['Municipality'].str.strip().replace('  ',' ') # remove whitespace around muni column
    
    # array with election result dataframes
    er[race]=df.fillna(0)
    
# write file
ew = pd.ExcelWriter(outputPath)

for i, race in enumerate(er):
    raceStr = str(i+1)+' '+race
    if (len(race)>28):
        raceStr = str(i+1)+' '+race[:10] + '...' + race[-10:]
    
    # disble header, manually write, as pre-defined headers can't be formatted
    er[race].fillna(0).to_excel(ew,sheet_name=raceStr, index=False)
    
    headForm = ew.book.add_format({'text_wrap': 1, 'font_family': 'Arial', 'bold': True , 
                                   'valign': 'vcenter', 'align': 'center', 'bg_color': '#CCCCCC'})
    for colnum, value in enumerate(er[race].columns.values):
        ew.sheets[raceStr].write(0, colnum, value, headForm)
    
    ew.sheets[raceStr].set_row(0,50)
    
    # set column width for all columns to 20, freeze panes
    bodyForm = ew.book.add_format({'text_wrap': 1, 'font_family': 'Arial', 'num_format': '#,##0', 'valign': 'vcenter', 'align': 'right'})
    bodyPerForm = ew.book.add_format({'text_wrap': 1, 'font_family': 'Arial', 'num_format': '0.0%', 'valign': 'vcenter', 'align': 'right'})
    muniForm = ew.book.add_format({'text_wrap': 1, 'font_family': 'Arial', 'valign': 'vcenter', 'align': 'center'})
    ew.sheets[raceStr].set_column('A1:A9999', 6, bodyForm)
    ew.sheets[raceStr].set_column('B1:B9999', 15, muniForm)
    ew.sheets[raceStr].set_column('C1:D9999', 4, bodyForm)
    ew.sheets[raceStr].set_column('E1:H9999', 10, bodyForm)
    ew.sheets[raceStr].set_column('I1:J9999', 10, bodyPerForm)
    
    for colNum in range(11,40):
        if colNum%2:
            ew.sheets[raceStr].set_column(letter(colNum)+'1:'+letter(colNum)+'9999', 12, bodyForm)
        else:
            ew.sheets[raceStr].set_column(letter(colNum)+'1:'+letter(colNum)+'9999', 12, bodyPerForm)

    ew.sheets[raceStr].freeze_panes('E2')
        
ew.save()

Leave a Reply

Your email address will not be published. Required fields are marked *