Geocoding Using NY Geocoder and Finding NY Political Districts Using R

Geocoding Using NY Geocoder and Finding NY Political Districts Using R

I re-wrote my Python script for coding political districts in NY using R and the ArcGIS Feature Services containing the political districts. This is a much more elegant approach then using Python, as it is self-contained and geocodes 750 records at a time, so large files that contain more then 1,000 records can be used with this script to be geocoded. You could just modify this script for purposes of getting coordinates, and skip the section that figures out what district persons live in.

001library(tidyverse)
002library(httr)
003library(sf)
004 
005rm(list=ls())
006 
007# get district lines from nys gis
011 
012# read file
013df <- openxlsx::read.xlsx('~/Downloads/addresses.xlsx') %>%
014  janitor::clean_names()
015 
016# clean up data, remove " and return characters which will break the
017# json query and choke geocoder, convert to single line
018df <- df %>%
019  mutate(
020    'OBJECTID' = row_number(),
021    'SINGLELINE' = paste(str_replace(address, '-\\d+ ', ' ') %>% str_replace_all("[\r\n]",'') %>% str_replace_all('"', '\\"') %>% str_trim(),
022                         str_replace(municipality,'\\(.*?\\)',''), state, zip_code, sep = ', '),
023  )
024 
025# temporary tibble for storing geocoded addresses
026res_df <- tibble()
027 
028# geocode 750 records at a time
029for (i in seq(1, nrow(df), 750)) {
030   
031  # keep from over-running the dataframe
032  j <- i + 749
033  j <- ifelse(j > nrow(df), nrow(df), j)
034 
035  # build json for query
036  query <- str_c(
037                 '{ "attributes": { "OBJECTID": ', df[i:j, 'OBJECTID'],
038                 ', "SINGLELINE": "', df[i:j, 'SINGLELINE'],
039                 '" } },', collapse = '\n')
040   
041  query <- str_c('{"records": [', query, ']}', sep='\n')
042   
043  post <- list( 'f' = 'pjson', 'outSR' = 4326, 'addresses' =  query )
045   
046  # query state geocoder
047  res <- POST(url, body = post, encode = "multipart")
048 
049  Sys.sleep(5)
050   
051  # extract from json the coordinates
052  res_df <- bind_rows(res_df,
053                      jsonlite::fromJSON(content(res, as='text')) %>%
054                        as.data.frame() %>%
055                        as_tibble() %>%
056                      janitor::clean_names() %>%
057                      unnest(cols = c(locations_location, locations_attributes)) %>%
058                      select(OBJECTID = ResultID, matched_address = Match_addr,
059                             score = locations_score, lng = x, lat = y)
060            )
061 
062}
063 
064# join the found coordinates from state geocoder to df
065df <- df %>% left_join(res_df, by='OBJECTID')
066 
067# spatial join against cd, sd, ad
068df <- df %>%
069  mutate(lat = ifelse(is.na(lat), 0, lat),
070         lng = ifelse(is.na(lng), 0, lng)) %>%
071  st_as_sf(coords = c("lng", "lat"),
072                crs=4326) %>%
073  st_intersection(cd) %>%
074  st_drop_geometry() %>%
075  select(OBJECTID, CD.2022 = DISTRICT, CD.2023.MEMBER = NAME) %>%
076  left_join(df, ., by='OBJECTID')
077 
078 
079df <- df %>%
080  mutate(lat = ifelse(is.na(lat), 0, lat),
081         lng = ifelse(is.na(lng), 0, lng)) %>%
082  st_as_sf(coords = c("lng", "lat"),
083           crs=4326) %>%
084  st_intersection(sd) %>%
085  st_drop_geometry() %>%
086  select(OBJECTID, SD.2022 = DISTRICT, SD.2023.MEMBER = NAME) %>%
087  left_join(df, ., by='OBJECTID')
088 
089df <- df %>%
090  mutate(lat = ifelse(is.na(lat), 0, lat),
091         lng = ifelse(is.na(lng), 0, lng)) %>%
092  st_as_sf(coords = c("lng", "lat"),
093           crs=4326) %>%
094  st_intersection(ad) %>%
095  st_drop_geometry() %>%
096  select(OBJECTID, AD.2022 = DISTRICT, AD.2023.MEMBER = NAME) %>%
097  left_join(df, ., by='OBJECTID')
098 
099# output the excel spreadsheet
100df %>%
101  select(addresses:zip_code, CD.2022:AD.2023.MEMBER, score, lat, lng, matched_address) %>%
102  openxlsx::write.xlsx('/tmp/geocoded_addresses.xlsx')

Leave a Reply

Your email address will not be published. Required fields are marked *