Python code, takes a list of addresses, runs them through NYS Address Management system, then uses GeoPandas to calculate the municipality and Assembly district from GeoPackages then writes out to a CSV file

Python, the State Geocoder and Outputting Assembly Districts

Python code, takes a list of addresses, runs them through NYS Address Management system, then uses GeoPandas to calculate the municipality and Assembly district from GeoPackages then writes out to a CSV file. Nothing fancy, but it does a job. I could use it with any other shapefile I want. GeoPandas is nice because it’s fast and doesn’t require loading QGIS.

01#!/usr/bin/python
02 
03import requests,sys,json,os,csv
04 
05import pandas as pd
06import geopandas as gpd
07 
08lines=[]
09 
10# read list of addresses
11with open(sys.argv[-1], newline='') as csvfile:
12    for line in csv.DictReader(csvfile):
13        lines.append(line)
14         
15# build address query
16query = '{"records": ['
17i=0
18for line in lines:
19    query += '{ "attributes": { "OBJECTID":'+str(i)+', "SINGLELINE": "'+line['Address'].rstrip()+'"} },'+"\n"
20    i+=1   
21query += ']}'
22 
23post = { 'f':'pjson', 'outSR': 4326, 'addresses': query }
25 
26# send request to state geocoder
27req = requests.post(url, data = post)
28locations = json.loads(req.text)['locations']
29 
30# parse response
31for loc in locations:
32    i = loc['attributes']['ResultID']
33    lines[i]['y'] = loc['location']['y']
34    lines[i]['x'] = loc['location']['x']
35    lines[i]['Match_addr'] = loc['attributes']['Match_addr']
36     
37    # hackish, might cause problems but keeps joins from erroring
38    if (lines[i]['x'] == 'NaN'):
39        lines[i]['x'] = 0
40        lines[i]['y'] = 0
41 
42# convert to pandas
43locPd = pd.DataFrame(lines,columns=lines[0].keys())
44locPd = gpd.GeoDataFrame(locPd,  geometry=gpd.points_from_xy(locPd.x.astype('float32'), locPd.y.astype('float32')))
45 
46# add county municipality column
47cosub = gpd.read_file(r'/home/andy/Documents/GIS.Data/geocode/cosub.gpkg')
48locPd = gpd.sjoin(locPd, cosub, op="within")
49 
50del locPd['index_right']
51 
52# add ads column
53ad = gpd.read_file(r'/home/andy/Documents/GIS.Data/geocode/ad.gpkg')
54locPd = gpd.sjoin(locPd, ad, op="within")
55 
56del locPd['index_right']
57 
58# add sd column
59sd = gpd.read_file(r'/home/andy/Documents/GIS.Data/geocode/sd.gpkg')
60locPd = gpd.sjoin(locPd, sd, op="within")
61 
62del locPd['index_right']
63 
64# add cd column
65sd = gpd.read_file(r'/home/andy/Documents/GIS.Data/geocode/cd.gpkg')
66locPd = gpd.sjoin(locPd, sd, op="within")
67 
68 
69# remove added geometery and index columns
70del locPd['geometry']
71del locPd['index_right']
72 
73# write pandas back to out csv
74locPd.to_csv (os.path.splitext(sys.argv[-1])[0]+'-output.csv', index = False, header=True)