R Script for Converting RPTL 1590 Reports into Excel Files
Real Property Tax Law 1590 requires that municipalities post their tax rolls, within 10 days of the proposed and final rolls being approved. Below is an R script that will extract the reports into a Shapefile for use in a GIS program or as a Excel Spreadsheet. Previously, I posted a PHP version of this script. Both versions can also be found on my GitHub.
This R script uses three R packages β tidyverse, pdftools and sf. It does not require any external dependencies and should work on Mac OS, Linux or Windows if you have free RStudio program installed.
You can install the required R packages using: lapply(c('tidyverse', 'pdftools', 'sf'), install.packages)
library(pdftools)
library(tidyverse)
library(sf)
pdf.text <- pdftools::pdf_text('knox_2022_final_roll.pdf')
tax.rolls <- pdf.text %>%
str_split('\\*{5,}') %>%
map(~str_trim(.))
tax.rolls <- unlist(tax.rolls, recursive = F)
tax.rolls <-
tax.rolls[str_length(tax.rolls) > 50 &
!grepl('STATE OF NEW YORK',
tax.rolls) ]
tax.rolls.formatted <-
tibble(text = tax.rolls) %>%
transmute(taxid = str_extract(text, '^.*?\\n.(.*?) ', group = 1),
property.address = str_extract(text, '^(.*?)(\\n| {2,})', group = 1),
owner = str_extract(text, '(.*?\n){2}(.*?) {2}', group=2),
acres = str_extract(text, 'ACRES *?((\\d|.)*) ', group = 1) %>%
parse_number,
property.type = str_extract(text, '^.*?\\n.*? {2,}(.*?) {2,}', group = 1),
school.district = str_extract(text, '(.*?\n){2}(.*?) {2,}(.*?) {2,}', group=3),
deed.book = str_extract(text, 'DEED BOOK(.*?)(\n| {6,})', group=1) %>% str_replace('\\s{2,}',' ') %>% str_trim,
owner.address = str_extract(text, '(.*?\n){3}(.*?) {2}', group=2),
owner.address2 = str_extract(text, '(.*?\n){4}(.*?) {2}', group=2),
owner.address3 = str_extract(text, '(.*?\n){5}(.*?) {2}', group=2),
land.assessment = str_extract(text, '(.*?\n){2}(.*?) {3,}(.*?) {2,}(.*?) {2,}', group=4) %>%
parse_number,
total.assessment = str_extract(text, 'TOWN.*?VALUE {4,}(.*?)\n', group=1) %>%
parse_number,
full.market.value = str_extract(text, 'FULL.*?MARKET.*?VALUE *?(.*?)(\\n|$)', group=1) %>%
parse_number,
town.taxable.value = str_extract(text, 'TOWN.*?TAXABLE.*?VALUE(.*?)\n', group = 1) %>%
parse_number,
city.taxable.value = str_extract(text, 'CITY.*?TAXABLE.*?VALUE(.*?)\n', group = 1) %>%
parse_number,
county.taxable.value = str_extract(text, 'COUNTY TAXABLE VALUE(.*?)\n', group = 1) %>%
parse_number,
school.taxable.value = str_extract(text, 'SCHOOL.*?TAXABLE.*?VALUE(.*?)\n', group = 1) %>%
parse_number,
ag.dist.law = paste(str_extract(text,
'(MAY BE SUBJECT TO PAYMENT)', group=1),
str_extract(text,
'(UNDER .*?) {2,}', group=1)
),
north = str_extract(text, 'NRTH-(\\d*)', group=1) %>%
parse_number %>% replace_na(0),
east = str_extract(text, 'EAST-(\\d*)', group=1) %>%
parse_number %>% replace_na(0),
) %>%
# ny long island crs 32014, ny east crs 32015, ny central crs 32016, ny west crs 32017
st_as_sf(coords=c('east','north'), crs=32015)
tax.rolls.formatted %>%
write_sf('/tmp/taxroll.shp')
tax.rolls.formatted %>%
write_sf('/tmp/taxroll.csv')