Geocoding Using NY Geocoder and Finding NY Political Districts Using R

Geocoding Using NY Geocoder and Finding NY Political Districts Using R

I re-wrote my Python script for coding political districts in NY using R and the ArcGIS Feature Services containing the political districts. This is a much more elegant approach then using Python, as it is self-contained and geocodes 750 records at a time, so large files that contain more then 1,000 records can be used with this script to be geocoded. You could just modify this script for purposes of getting coordinates, and skip the section that figures out what district persons live in.

library(tidyverse)
library(httr)
library(sf)

rm(list=ls())

# get district lines from nys gis
cd <- arcpullr::get_spatial_layer('https://gisservices.its.ny.gov/arcgis/rest/services/NYS_Congressional_Districts/FeatureServer/0')
sd <- arcpullr::get_spatial_layer('https://gisservices.its.ny.gov/arcgis/rest/services/NYS_Senate_Districts/FeatureServer/0')
ad <- arcpullr::get_spatial_layer('https://gisservices.its.ny.gov/arcgis/rest/services/NYS_Assembly_Districts/FeatureServer/0')

# read file
df <- openxlsx::read.xlsx('~/Downloads/addresses.xlsx') %>%
  janitor::clean_names()

# clean up data, remove " and return characters which will break the 
# json query and choke geocoder, convert to single line
df <- df %>% 
  mutate(
    'OBJECTID' = row_number(),
    'SINGLELINE' = paste(str_replace(address, '-\\d+ ', ' ') %>% str_replace_all("[\r\n]",'') %>% str_replace_all('"', '\\"') %>% str_trim(), 
                         str_replace(municipality,'\\(.*?\\)',''), state, zip_code, sep = ', '),
  )

# temporary tibble for storing geocoded addresses
res_df <- tibble()

# geocode 750 records at a time
for (i in seq(1, nrow(df), 750)) {
  
  # keep from over-running the dataframe
  j <- i + 749
  j <- ifelse(j > nrow(df), nrow(df), j) 

  # build json for query
  query <- str_c( 
                 '{ "attributes": { "OBJECTID": ', df[i:j, 'OBJECTID'],
                 ', "SINGLELINE": "', df[i:j, 'SINGLELINE'], 
                 '" } },', collapse = '\n')
  
  query <- str_c('{"records": [', query, ']}', sep='\n')
  
  post <- list( 'f' = 'pjson', 'outSR' = 4326, 'addresses' =  query )
  url <- 'https://gisservices.its.ny.gov/arcgis/rest/services/Locators/Street_and_Address_Composite/GeocodeServer/geocodeAddresses'
  
  # query state geocoder
  res <- POST(url, body = post, encode = "multipart")

  Sys.sleep(5)
  
  # extract from json the coordinates
  res_df <- bind_rows(res_df, 
                      jsonlite::fromJSON(content(res, as='text')) %>% 
                        as.data.frame() %>% 
                        as_tibble() %>%
                      janitor::clean_names() %>% 
                      unnest(cols = c(locations_location, locations_attributes)) %>%
                      select(OBJECTID = ResultID, matched_address = Match_addr, 
                             score = locations_score, lng = x, lat = y)
            )

}

# join the found coordinates from state geocoder to df
df <- df %>% left_join(res_df, by='OBJECTID')

# spatial join against cd, sd, ad
df <- df %>% 
  mutate(lat = ifelse(is.na(lat), 0, lat),
         lng = ifelse(is.na(lng), 0, lng)) %>%
  st_as_sf(coords = c("lng", "lat"),
                crs=4326) %>%
  st_intersection(cd) %>%
  st_drop_geometry() %>%
  select(OBJECTID, CD.2022 = DISTRICT, CD.2023.MEMBER = NAME) %>%
  left_join(df, ., by='OBJECTID')


df <- df %>% 
  mutate(lat = ifelse(is.na(lat), 0, lat),
         lng = ifelse(is.na(lng), 0, lng)) %>%
  st_as_sf(coords = c("lng", "lat"),
           crs=4326) %>%
  st_intersection(sd) %>%
  st_drop_geometry() %>%
  select(OBJECTID, SD.2022 = DISTRICT, SD.2023.MEMBER = NAME) %>%
  left_join(df, ., by='OBJECTID')

df <- df %>% 
  mutate(lat = ifelse(is.na(lat), 0, lat),
         lng = ifelse(is.na(lng), 0, lng)) %>%
  st_as_sf(coords = c("lng", "lat"),
           crs=4326) %>%
  st_intersection(ad) %>%
  st_drop_geometry() %>%
  select(OBJECTID, AD.2022 = DISTRICT, AD.2023.MEMBER = NAME) %>%
  left_join(df, ., by='OBJECTID')

# output the excel spreadsheet
df %>%
  select(addresses:zip_code, CD.2022:AD.2023.MEMBER, score, lat, lng, matched_address) %>%
  openxlsx::write.xlsx('/tmp/geocoded_addresses.xlsx')

Leave a Reply

Your email address will not be published. Required fields are marked *