This document contains links to the source code for this project.
Storage at UW CSDE “R:/Project/UWED/html”.
cat(readLines("../code/download_cvap_acs.R"), sep = "\n")
# Lauren Woyczynski< Phil Hurvitz
# 2024-01-17
# download US Census ACS CVAP data
# extract the Block Group tables from the zip files.
.libPaths('R:/Project/UWED/html/_site/site_libs/') #pull packages from the site_libs - also install packages here
library(curl)
library(tidyverse)
library(RPostgreSQL)
# db connection
source("R:/Project/UWED/code/dbconnect.R")
# This assumes we made a user-level environment variable "uwed_user_pgpassword" containing the plain text
# PostgreSQL password
U <- uwed <- connectdb(
dbname = "uwed",
host = "doyenne.csde.washington.edu",
user = "uwed_user",
password = Sys.getenv("uwed_user_pgpassword")
)
# a path to download to
myPath <- "R:/Project/UWED/data/CVAP_practice"
setwd(myPath)
# a wrapper----
f_wrapper_cvap <- function(){
f_download_cvap_1_all()
f_partmake()
f_unzip_cvap_all()
f_cvap_general_all()
f_cvap_2000_all()
}
# function to download the CVAP files for 2009-2020----
# this year
myYear <- Sys.Date() %>% strftime("%Y")
f_download_cvap_1_all <- function(){
for(i in 2009:myYear){
f_download_cvap_0_one(i)
}
}
f_download_cvap_0_one <- function(year){
# start and end years
end_year <- year %>% as.character()
start_year <- (year-4) %>% as.character()
# a URL
#"https://www2.census.gov/programs-surveys/decennial/rdo/datasets/2020/2020-cvap/CVAP_2016-2020_ACS_csv_files.zip"
url <- "https://www2.census.gov/programs-surveys/decennial/rdo/datasets/xEYx/xEYx-cvap/CVAP_xSYx-xEYx_ACS_csv_files.zip" %>%
str_replace_all(pattern = "xEYx", replacement = end_year) %>%
str_replace_all(pattern = "xSYx", replacement = start_year)
# file name
fname <- file.path(myPath, basename(url))
# download if necessary
if(!file.exists(fname)){
message(url)
message(paste(" ", fname))
try(curl_download(url = url, destfile = fname, quiet = TRUE))
}
}
# a partitioned table----
f_partmake <- function(overwrite = FALSE){
if(tExists(conn = U, table_schema = "public", table_name = "census_cvap_wa")){
if(!overwrite){
message("census_cvap_wa exists. use overwrite = TRUE?")
return(invisible())
} else {
dbGetQuery(conn = U, statement = "drop table census_cvap_wa;")
}
}
sql <- "create table census_cvap_wa (year int, geoid text, total int, white int, black int, aian int, asian int, nhpi int, other int, hispanic int, non_hispanic int, geom_4269 geometry(MultiPolygon,4269), geom_2927 geometry(MultiPolygon,2927), primary key (year,geoid)) partition by list(year);
create index idx_census_cvap_wa_year on census_cvap_wa using btree(year);
create index idx_census_cvap_wa_4269 on census_cvap_wa using gist(geom_4269);
create index idx_census_cvap_wa_2927 on census_cvap_wa using gist(geom_2927);
"
O <- dbGetQuery(conn = U, statement = sql)
}
# pull the block group file from each zip file----
myZipFiles <- list.files(path = myPath, pattern = ".*.zip", full.names = TRUE)
# a function to process the 2009 onward CVAP files----
f_unzip_cvap <- function(fname){
# terminal year
tyear <- fname %>% str_remove_all(pattern = ".*-") %>% str_remove_all(pattern = "_ACS.*")
# unzip the BG data
# x <- unzip(zipfile = i, list = TRUE)
# handle folder structure
if(tyear < 2011){
bgfname <- "CVAP Files/BlockGr.csv"
} else {
bgfname <- "BlockGr.csv"
}
# select only WA
# rename
outfname <- paste0(tyear, "_BlockGr.csv")
if(!file.exists(outfname)){
message(paste("unzipping", bgfname, "from", fname))
unzip(zipfile = fname, files = bgfname, junkpaths = TRUE, exdir = myPath)
file.rename(from = "BlockGr.csv", to = outfname)
}
}
# a function to unzip all files
f_unzip_cvap_all <- function(){
for(i in myZipFiles){
f_unzip_cvap(i)
}
}
# a function to process one file
f_cvap_general <- function(year, overwrite = FALSE){
# year as text
tyear <- year %>% as.character()
# geographic unit
gunit <- case_when(
year %in% 2010:2012 ~ 'trt',
TRUE ~ 'bg')
# first check to see if the table exists
if(tExists(conn = U, table_schema = "detail_census_wa", table_name = paste0("census_cvap_wa_", tyear))){
if(!overwrite){
message(paste0("detail_census_wa.census_cvap_wa_", tyear, " exists. use overwrite = TRUE?"))
return(invisible())
} else {
dbGetQuery(conn = U, statement = paste0("drop table detail_census_wa.census_cvap_wa_", tyear))
}
}
myCSVs <- list.files(path = myPath, pattern = ".*Block.*csv")
fname <- str_subset(string = myCSVs, pattern = year %>% as.character())
message("reading")
x <- read.csv(fname)
colnames(x) %<>% str_to_lower()
message("mutating")
y <- x %>%
# get WA
filter(
str_detect(string = geoid, pattern = "US53")
) %>%
# a better geoid
mutate(
geoid = str_replace_all(string = geoid, pattern = ".*US", replacement = "")
) %>%
# handle tract vs bg by year
mutate(
geoid = case_when(
tyear %in% 2009:2012 ~ str_sub(string = geoid, start = 1, end = 11),
TRUE ~ geoid
)
) %>%
# named values
mutate(race =
case_when(
lntitle == 'Total' ~ 'total',
lntitle == 'Not Hispanic or Latino' ~ 'non_hispanic',
lntitle == 'American Indian or Alaska Native Alone' ~ 'aian',
lntitle == 'Asian Alone' ~ 'asian',
lntitle == 'Black or African American Alone' ~ 'black',
lntitle == 'Native Hawaiian or Other Pacific Islander Alone' ~ 'nhpi',
lntitle == 'White Alone' ~ 'white',
lntitle == 'American Indian or Alaska Native and White' ~ 'other',
lntitle == 'Asian and White' ~ 'other',
lntitle == 'Black or African American and White' ~ 'other',
lntitle == 'American Indian or Alaska Native and Black or African American' ~ 'other',
lntitle == 'Remainder of Two or More Race Responses' ~ 'other',
lntitle == 'Hispanic or Latino' ~ 'hispanic'
),
year = tyear %>% as.integer()
) %>%
# aggregate "other"
group_by(geoid, race, year) %>%
summarise(n = sum(cvap_est),
.groups = "drop") %>%
# widen
pivot_wider(id_cols = c(year, geoid), names_from = race, values_from = n) %>%
# order
select(year, geoid, total, white, black, aian, asian, nhpi, other, hispanic, non_hispanic)
# write to db temp
message("writing")
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists tmp.cvap_", year))
O <- dbWriteTable(conn = U, name = c("tmp", paste0("cvap_", tyear)), value = y, row.names = FALSE)
# make the actual table with census geog
message("creating spatial table")
sql_create_table <- "create table detail_census_wa.census_cvap_wa_xYRx partition of census_cvap_wa for values in (xYRx);
with c as
(select * from tmp.cvap_xYRx)
, a as (select geoid, geom_2927, geom_4269 from detail_census_wa.census_wa_xYRx_xGEOx)
, j as (select year,geoid,total,white,black,aian,asian,nhpi,other,hispanic,non_hispanic,geom_4269,geom_2927
from c
left join a using (geoid))
insert into census_cvap_wa (year,geoid,total,white,black,aian,asian,nhpi,other,hispanic,non_hispanic,geom_4269,geom_2927)
select * from j on conflict do nothing;" %>%
str_replace_all(pattern = "xYRx", (year %>% as.character())) %>%
str_replace_all(pattern = "xGEOx", gunit)
O <- dbGetQuery(conn = U, statement = sql_create_table)
# update geom
O <- dbGetQuery(conn = U, statement = "update census_cvap_wa set geom_2927 = st_transform(geom_4269, 2927) where geom_2927 is null;")
}
f_cvap_general_all <- function(overwrite = FALSE){
for (y in 2009:2021){
f_cvap_general(year = y, overwrite = overwrite)
}
}
# deal with the 2000 CVAP
f_cvap_2000 <- function(year = 2000, overwrite = FALSE){
tyear <- year %>% as.character()
# first check to see if the table exists
if(tExists(conn = U, table_schema = "detail_census_wa", table_name = paste0("census_cvap_wa_", tyear))){
if(!overwrite){
message("census_cvap_wa exists. use overwrite = TRUE?")
return(invisible())
} else {
dbGetQuery(conn = U, statement = paste0("drop table detail_census_wa.census_cvap_wa_", tyear))
}
}
layout <- read.csv("R:/Project/UWED/data/CVAP/stp_76/column_layout.csv", header = FALSE) %>% as_tibble()
widths <- layout %>% pull(V1) %>% str_split(pattern = "-", simplify = TRUE) %>% as.data.frame() %>%
mutate(V1 = as.integer(V1),
V2 = as.integer(V2),
V2 = case_when(is.na(V2) ~ V1,
TRUE ~ V2))
# read the fixed width file
cvap_2000 <- read_fwf(file = "R:/Project/UWED/data/CVAP/stp_76/Data Files/stp76-53.txt",
col_positions = fwf_positions(widths$V1, widths$V2, layout$V2))
# some processing
cvap_2000 %<>%
# convert to integer
mutate_if(is.numeric, as.integer) %>%
# only BGs
filter(!is.na(bg)) %>%
# geoid
mutate(geoid = str_c(statefips, countyfips, tract, bg, sep = ""),
aian = NA_integer_, nhpi = NA_integer_,
non_hispanic = pop18 - hispanic_total,
year = year) %>%
# final format to match the other files
select(year, geoid, total = pop18, white, black, aian, asian, nhpi, other, hispanic = hispanic_total, non_hispanic)
# write to db
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists tmp.cvap_", tyear))
O <- dbWriteTable(conn = U, name = c("tmp", paste0("cvap_", tyear)), value = cvap_2000, row.names = FALSE)
# make the actual table with census geog
message("creating spatial table")
sql_create_table <- "create table detail_census_wa.census_cvap_wa_xYRx partition of census_cvap_wa for values in (xYRx);
with c as
(select * from tmp.cvap_xYRx)
,
a as (select geoid, geom_2927, geom_4269 from detail_census_wa.census_wa_xYRx_bg),
j as (select year,geoid,total,white,black,aian,asian,nhpi,other,hispanic,non_hispanic,geom_4269,geom_2927
from c
left join a using (geoid))
insert into census_cvap_wa (year,geoid,total,white,black,aian,asian,nhpi,other,hispanic,non_hispanic,geom_4269,geom_2927)
select * from j on conflict do nothing;" %>% str_replace_all(pattern = "xYRx", tyear)
O <- dbGetQuery(conn = U, statement = sql_create_table)
}
f_cvap_2000_all <- function(overwrite = FALSE){
for(y in 2000:2008){
f_cvap_2000(year = y, overwrite = overwrite)
}
}
#Run it!
f_wrapper_cvap()
cat(readLines("../code/precinct_shape_to_postgis.R"), sep = "\n")
# standardize the precinct shape files to a PostGIS database
# Phil Hurvitz 20221103
library(pacman)
pacman::p_load(tidyverse, tidycensus, sf, magrittr, tigris, readxl)
source("R:/Project/UWED/code/dbconnect.R")
# This assumes we made a user-level environment variable "uwed_user_pgpassword" containing the plain text
# PostgreSQL password
U <- uwed <- connectdb(
dbname = "uwed",
host = "doyenne.csde.washington.edu",
user = "uwed_user",
password = Sys.getenv("uwed_user_pgpassword")
)
# county codes
wa_fips <- fips_codes %>%filter(state == "WA") %>%
mutate(county = str_replace_all(string = county, pattern = " County", ""))
wa_county_codes <- read.csv("R:/Project/UWED/data/shapefiles/wa_counties_codes.csv")
wa <- wa_fips %>% left_join(wa_county_codes, by = "county") %>%
select(countyfips = county_code, countyid, county)
wa2letter <- dbGetQuery(conn = U, statement = "select distinct countycode, countyid from public.voting_history order by countyid;")
wa %<>% left_join(wa2letter, by = "countyid")
# list all of the zip files
zipfnames <- list.files(path = "r:/Project/UWED/data/shapefiles", pattern = ".*zip", full.names = TRUE) %>%
str_subset(pattern = "splits", negate = TRUE)
# make a temp dir
td <- tempdir()
# make another dir
uzdir <- file.path(td, "s")
if(!dir.exists(uzdir)){
dir.create(path = uzdir)
}
# shape file dir
sfdir <- "R:/Project/UWED/data/shapefiles"
# a schema to make to hold each year precinct shape
dirExists <- dbGetQuery(conn = U, statement = "select count(*) = 1 as direxists from pg_catalog.pg_namespace where nspname = 'detail_precinct_shape';")$direxists
if(!dirExists){
O <- dbGetQuery(conn = U, statement = "create schema detail_precinct_shape;")
}
# a wrapper
f_wrapper_precint <- function(){
f_precinct_structure()
f_run_precinct_2004_2010()
f_precinct_2012_2013(2012)
f_precinct_2012_2013(2013)
#f_precinct_2014_2017()
f_run_precinct_2014_2017()
f_precinct_2018()
f_precinct_2019()
f_precinct_2020()
f_precinct_2021()
f_precinct_2022()
}
# now create the precinct file
f_precinct_structure <- function() {
sql <- "
create schema if not exists tmp;
drop table if exists precinct cascade;
create table precinct(year integer,
statefips text,
countyfips text,
precinctname text,
countyid integer,
county text,
countycode text,
geom_2927 geometry(multipolygon, 2927),
unique(year, countyfips, precinctname)
) partition by list(year);
create index idx_precinct on precinct using gist(geom_2927);
"
O <- dbGetQuery(conn = U, statement = sql)
}
f_precinct_2004_2010 <- function(year){
year <- year %>% as.character()
# zip filename, etc
zipfn <- zipfnames %>% str_subset(pattern = year)
shapefn <- str_c(".*", year, ".*shp$")
tname <- str_c("detail_precinct_shape.precinct_", year)
# unzip
message(paste("unzipping", zipfn))
unzip(zipfile = zipfn, exdir = uzdir)
# list
fnshp <- list.files(uzdir, pattern = shapefn, full.names = TRUE)
# read the shapefile
message(paste("reading", shapefn))
x <- st_read(fnshp, quiet = TRUE)
# missing CRS?
if(st_crs(x) %>% is.na()) {
st_crs(x) <- 4269
}
# various changes
message(paste("mutation"))
x %<>%
rename_all(., .funs = tolower) %>% # lowercase column names
rename_with(~gsub("\\d+", "", .)) %>% # remove digits from column names
mutate(year = year %>% as.integer()) %>% # year as integer
mutate(name = str_to_lower(name)) %>% # lowercase precinct names
# select and rename some columns
select(year, statefips = statefp, countyfips = countyfp, precinctname = name, geom_2927 = geometry) %>%
left_join(y = wa, by = "countyfips") %>% # join with administrative codes
st_transform(2927) %>% # transform to WA SPS
st_set_geometry("geom_2927")
# write to db
message(paste("writing to db"))
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists detail_precinct_shape.precinct_", year, ";"))
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists tmp.precinct_", year, ";"))
O <- dbWriteTable(conn = U, name = c("tmp", paste0("precinct_", year)), value = x)
O <- dbGetQuery(conn = U,
statement = paste0("alter table tmp.precinct_", year, " alter column geom_2927 set data type geometry(multipolygon, 2927);"))
message(paste("repairing geoms"))
# repair geoms
O <- dbGetQuery(conn = U, statement = paste0("update tmp.precinct_", year, " set geom_2927 = st_multi(st_collectionextract(st_makevalid(geom_2927, 'method=linework'),3));"))
# union by precinct name
message(paste("unioning"))
sqlU <- paste0("create table detail_precinct_shape.precinct_", year, " as select year, statefips, countyfips, precinctname, countyid, county, countycode, st_multi(st_union(geom_2927))::geometry(multipolygon, 2927) as geom_2927 from tmp.precinct_", year, " group by year, statefips, countyfips, precinctname, countyid, county, countycode;")
O <- dbGetQuery(conn = U, statement = sqlU)
# add to partition
O <- dbGetQuery(conn = U, statement = paste("alter table precinct attach partition", tname, "for values in (", year, ");"))
}
f_run_precinct_2004_2010 <- function(years = 2004:2010){
for(i in years){
f_precinct_2004_2010(i)
}
}
# 2011 is missing
# 2012 and 2013
f_precinct_2012_2013 <- function(year = 2012){
year <- year %>% as.character()
# zip filename, etc
zipfn <- zipfnames %>% str_subset(pattern = year)
shapefn <- str_c(".*", year, ".*shp$")
tname <- str_c("detail_precinct_shape.precinct_", year)
# unzip
message(paste("unzipping", zipfn))
unzip(zipfile = zipfn, exdir = uzdir)
# list
fnshp <- list.files(uzdir, pattern = shapefn, full.names = TRUE)
# read the shapefile
message(paste("reading", shapefn))
x <- st_read(fnshp, quiet = TRUE)
# missing CRS?
if(st_crs(x) %>% is.na()) {
st_crs(x) <- 4269
}
# various changes
message(paste("mutation"))
x %<>%
rename_all(., .funs = tolower) %>% # lowercase column names
rename_with(~gsub("\\d+", "", .)) %>% # remove digits from column names
mutate(year = year %>% as.integer()) %>% # year as integer
mutate(name = str_to_lower(name),
statefips = "53") %>% # lowercase precinct names
# select and rename some columns
select(year, statefips, countycode, precinctname = name, geom_2927 = geometry) %>%
left_join(y = wa, by = "countycode") %>% # join with administrative codes
st_transform(2927) %>% # transform to WA SPS
st_set_geometry("geom_2927")
# write to db
message(paste("writing to db"))
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists detail_precinct_shape.precinct_", year, ";"))
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists tmp.precinct_", year, ";"))
O <- dbWriteTable(conn = U, name = c("tmp", paste0("precinct_", year)), value = x)
O <- dbGetQuery(conn = U,
statement = paste0("alter table tmp.precinct_", year, " alter column geom_2927 set data type geometry(multipolygon, 2927);"))
message(paste("repairing geoms"))
# repair geoms
O <- dbGetQuery(conn = U, statement = paste0("update tmp.precinct_", year, " set geom_2927 = st_multi(st_collectionextract(st_makevalid(geom_2927),3)) where not st_isvalid(geom_2927);"))
# union by precinct name
message(paste("unioning"))
sqlU <- paste0("create table detail_precinct_shape.precinct_", year, " as select year, statefips, countyfips, precinctname, countyid, county, countycode, st_multi(st_union(geom_2927))::geometry(multipolygon, 2927) as geom_2927 from tmp.precinct_", year, " group by year, statefips, countyfips, precinctname, countyid, county, countycode;")
O <- dbGetQuery(conn = U, statement = sqlU)
# add to partition
O <- dbGetQuery(conn = U, statement = paste("alter table precinct attach partition", tname, "for values in (", year, ");"))
}
# 2014
f_precinct_2014_2017 <- function(year = 2014){
year <- year %>% as.character()
# zip filename, etc
zipfn <- zipfnames %>% str_subset(pattern = year)
shapefn <- str_c(".*", year, ".*shp$")
tname <- str_c("detail_precinct_shape.precinct_", year)
# unzip
message(paste("unzipping", zipfn))
unzip(zipfile = zipfn, exdir = uzdir)
# list
fnshp <- list.files(uzdir, pattern = shapefn, full.names = TRUE)
# read the shapefile
message(paste("reading", shapefn))
x <- st_read(fnshp, quiet = TRUE)
# missing CRS?
if(st_crs(x) %>% is.na()) {
st_crs(x) <- 4269
}
# various changes
message(paste("mutation"))
x %<>%
rename_all(., .funs = tolower) %>% # lowercase column names
rename_with(~gsub("\\d+", "", .)) # remove digits from column names
x %<>%
mutate(year = year %>% as.integer()) %>% # year as integer
mutate(name = str_to_lower(precname),
statefips = "53") %>% # lowercase precinct names
# select and rename some columns
select(year, statefips, countycode, precinctname = name, geom_2927 = geometry) %>%
left_join(y = wa, by = "countycode") %>% # join with administrative codes
st_transform(2927) %>% # transform to WA SPS
st_set_geometry("geom_2927")
# write to db
message(paste("writing to db"))
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists detail_precinct_shape.precinct_", year, ";"))
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists tmp.precinct_", year, ";"))
O <- dbWriteTable(conn = U, name = c("tmp", paste0("precinct_", year)), value = x)
O <- dbGetQuery(conn = U,
statement = paste0("alter table tmp.precinct_", year, " alter column geom_2927 set data type geometry(multipolygon, 2927);"))
message(paste("repairing geoms"))
# repair geoms
O <- dbGetQuery(conn = U, statement = paste0("update tmp.precinct_", year, " set geom_2927 = st_multi(st_collectionextract(st_makevalid(geom_2927),3)) where not st_isvalid(geom_2927);"))
# union by precinct name
message(paste("unioning"))
sqlU <- paste0("create table detail_precinct_shape.precinct_", year, " as select year, statefips, countyfips, precinctname, countyid, county, countycode, st_multi(st_union(geom_2927))::geometry(multipolygon, 2927) as geom_2927 from tmp.precinct_", year, " group by year, statefips, countyfips, precinctname, countyid, county, countycode;")
O <- dbGetQuery(conn = U, statement = sqlU)
# add to partition
O <- dbGetQuery(conn = U, statement = paste("alter table precinct attach partition", tname, "for values in (", year, ");"))
}
f_run_precinct_2014_2017 <- function(years = 2014:2017){
for(i in years){
f_precinct_2014_2017(i)
}
}
# 2018 does not have precinct names
f_precinct_2018 <- function(year = 2018){
# precinct name lookups
pnl <- read_excel("R:/Project/UWED/data/shapefiles/PrecinctName_Lookup_2018Gen.xlsx") %>%
rename_all(., .funs = tolower) %>% # lowercase column names
select(countycode, precinctcode, precinctname)
year <- year %>% as.character()
# zip filename, etc
zipfn <- zipfnames %>% str_subset(pattern = year)
shapefn <- str_c(".*", year, ".*shp$")
tname <- str_c("detail_precinct_shape.precinct_", year)
# unzip
message(paste("unzipping", zipfn))
unzip(zipfile = zipfn, exdir = uzdir)
# list
fnshp <- list.files(uzdir, pattern = shapefn, full.names = TRUE)
# read the shapefile
message(paste("reading", shapefn))
x <- st_read(fnshp, quiet = TRUE)
# missing CRS?
if(st_crs(x) %>% is.na()) {
st_crs(x) <- 4269
}
assign(x = "shp", value = x, envir = .GlobalEnv)
# various changes
message(paste("mutation"))
x %<>%
rename_all(., .funs = tolower) %>% # lowercase column names
rename_with(~gsub("\\d+", "", .)) %>% # remove digits from column names
mutate(year = year %>% as.integer(),
statefips = "53") %>% # state FIPS
# select and rename some columns
select(year, statefips, countycode = countycd, precinctcode = prccode, geom_2927 = geometry)
x %<>%
mutate(precinctcode = precinctcode %>%str_remove_all(pattern = "[[:punct:]]") %>% as.integer()) %>%
left_join(pnl, by = c("countycode", "precinctcode")) %>%
left_join(y = wa, by = "countycode") %>% # join with administrative codes
st_transform(2927) %>% # transform to WA SPS
st_set_geometry("geom_2927") %>% # geom column name
mutate(precinctname = str_to_lower(precinctname))
# write to db
message(paste("writing to db"))
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists detail_precinct_shape.precinct_", year, ";"))
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists tmp.precinct_", year, ";"))
O <- dbWriteTable(conn = U, name = c("tmp", paste0("precinct_", year)), value = x)
O <- dbGetQuery(conn = U,
statement = paste0("alter table tmp.precinct_", year, " alter column geom_2927 set data type geometry(multipolygon, 2927);"))
message(paste("repairing geoms"))
# repair geoms
O <- dbGetQuery(conn = U, statement = paste0("update tmp.precinct_", year, " set geom_2927 = st_multi(st_collectionextract(st_makevalid(geom_2927),3)) where not st_isvalid(geom_2927);"))
# union by precinct name
message(paste("unioning"))
sqlU <- paste0("create table detail_precinct_shape.precinct_", year, " as select year, statefips, countyfips, precinctname, countyid, county, countycode, st_multi(st_union(geom_2927))::geometry(multipolygon, 2927) as geom_2927 from tmp.precinct_", year, " group by year, statefips, countyfips, precinctname, countyid, county, countycode;")
O <- dbGetQuery(conn = U, statement = sqlU)
# add to partition
O <- dbGetQuery(conn = U, statement = paste("alter table precinct attach partition", tname, "for values in (", year, ");"))
# x
}
# 2019 does not have precinct names, or alpha county codes!
f_precinct_2019 <- function(year = 2019){
year <- year %>% as.character()
# zip filename, etc
zipfn <- "R:/Project/UWED/data/shapefiles/statewide_splits_2019general.zip"
shapefn <- str_c(".*", year, ".*shp$")
tname <- str_c("detail_precinct_shape.precinct_", year)
# unzip
message(paste("unzipping", zipfn))
unzip(zipfile = zipfn, exdir = uzdir)
# list
fnshp <- list.files(uzdir, pattern = shapefn, full.names = TRUE)
# read the shapefile
message(paste("reading", shapefn))
x <- st_read(fnshp, quiet = TRUE)
# missing CRS?
if(st_crs(x) %>% is.na()) {
st_crs(x) <- 4269
}
# there are no precinct names, but there are precinct codes
pcodes <- dbGetQuery(conn = U, statement = "select distinct on (countycode, precinctcode) countycode, precinctname, precinctcode from public.voting_history where electionyear = 2019 and precinctcode <> -1 order by countycode, precinctcode;")
assign(x = "shp", value = x, envir = .GlobalEnv)
# county name
wa_x <- wa %>% mutate(countyname = county)
# various changes
message(paste("mutation"))
x %<>%
rename_all(., .funs = tolower) %>% # lowercase column names
rename_with(~gsub("\\d+", "", .)) %>% # remove digits from column names
mutate(year = year %>% as.integer(),
statefips = "53") %>% # state FIPS
# select and rename some columns
select(year, statefips, countyname, precinctcode = preccode, geom_2927 = geometry) %>%
mutate(precinctcode = precinctcode %>% as.integer()) %>%
select(county = countyname, everything())
# joins
x %<>%
left_join(y = wa_x, by = "county") %<>% # WA
left_join(pcodes, by = c("countycode", "precinctcode"))
# transform
x %<>% # join with administrative codes
st_transform(2927) %>% # transform to WA SPS
st_set_geometry("geom_2927") %>% # geom column name
mutate(precinctname = str_to_lower(precinctname)) %>%
select(year, statefips, countyfips, precinctname, countyid, county = countyname, countycode, geom_2927)
# write to db
message(paste("writing to db"))
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists detail_precinct_shape.precinct_", year, ";"))
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists tmp.precinct_", year, ";"))
O <- dbWriteTable(conn = U, name = c("tmp", paste0("precinct_", year)), value = x)
O <- dbGetQuery(conn = U,
statement = paste0("alter table tmp.precinct_", year, " alter column geom_2927 set data type geometry(multipolygon, 2927);"))
message(paste("repairing geoms"))
# repair geoms
O <- dbGetQuery(conn = U, statement = paste0("update tmp.precinct_", year, " set geom_2927 = st_multi(st_collectionextract(st_makevalid(geom_2927),3)) where not st_isvalid(geom_2927);"))
# union by precinct name
message(paste("unioning"))
sqlU <- paste0("create table detail_precinct_shape.precinct_", year, " as select year, statefips, countyfips, precinctname, countyid, county, countycode, st_multi(st_union(geom_2927))::geometry(multipolygon, 2927) as geom_2927 from tmp.precinct_", year, " group by year, statefips, countyfips, precinctname, countyid, county, countycode;")
O <- dbGetQuery(conn = U, statement = sqlU)
# add to partition
O <- dbGetQuery(conn = U, statement = paste("alter table precinct attach partition", tname, "for values in (", year, ");"))
}
f_precinct_2020 <- function(year = 2020){
year <- year %>% as.character()
# zip filename, etc
zipfn <- zipfnames %>% str_subset(pattern = year)
shapefn <- str_c(".*", year, ".*shp$")
tname <- str_c("detail_precinct_shape.precinct_", year)
# unzip
message(paste("unzipping", zipfn))
unzip(zipfile = zipfn, exdir = uzdir)
# list
fnshp <- list.files(uzdir, pattern = shapefn, full.names = TRUE)
# read the shapefile
message(paste("reading", shapefn))
x <- st_read(fnshp, quiet = TRUE)
# missing CRS?
if(st_crs(x) %>% is.na()) {
st_crs(x) <- 4269
}
# various changes
message(paste("mutation"))
x %<>%
rename_all(., .funs = tolower) %>% # lowercase column names
rename_with(~gsub("\\d+", "", .)) %>% # remove digits from column names
mutate(year = year %>% as.integer()) %>% # year as integer
mutate(statefips = "53") %>% # lowercase precinct names
# select and rename some columns
select(year, statefips, county = countyname, precinctname = precname, geom_2927 = geometry) %>%
left_join(y = wa, by = "county") %>% # join with administrative codes
st_transform(2927) %>% # transform to WA SPS
st_set_geometry("geom_2927") %>%
mutate(precinctname = str_to_lower(precinctname)) %>%
select(year, statefips, countyfips, precinctname, countyid, county, countycode, geom_2927)
# write to db
message(paste("writing to db"))
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists detail_precinct_shape.precinct_", year, ";"))
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists tmp.precinct_", year, ";"))
O <- dbWriteTable(conn = U, name = c("tmp", paste0("precinct_", year)), value = x)
O <- dbGetQuery(conn = U,
statement = paste0("alter table tmp.precinct_", year, " alter column geom_2927 set data type geometry(multipolygon, 2927);"))
message(paste("repairing geoms"))
# repair geoms
O <- dbGetQuery(conn = U, statement = paste0("update tmp.precinct_", year, " set geom_2927 = st_multi(st_collectionextract(st_makevalid(geom_2927),3)) where not st_isvalid(geom_2927);"))
# union by precinct name
message(paste("unioning"))
sqlU <- paste0("create table detail_precinct_shape.precinct_", year, " as select year, statefips, countyfips, precinctname, countyid, county, countycode, st_multi(st_union(geom_2927))::geometry(multipolygon, 2927) as geom_2927 from tmp.precinct_", year, " group by year, statefips, countyfips, precinctname, countyid, county, countycode;")
O <- dbGetQuery(conn = U, statement = sqlU)
# add to partition
O <- dbGetQuery(conn = U, statement = paste("alter table precinct attach partition", tname, "for values in (", year, ");"))
}
f_precinct_2021 <- function(year = 2021){
year <- year %>% as.character()
# zip filename, etc
zipfn <- zipfnames %>% str_subset(pattern = year) %>% str_subset(pattern = "ok")
shapefn <- str_c(".*", year, ".*shp$")
tname <- str_c("detail_precinct_shape.precinct_", year)
# unzip
message(paste("unzipping", zipfn))
unzip(zipfile = zipfn, exdir = uzdir)
# list
fnshp <- list.files(uzdir, pattern = shapefn, full.names = TRUE)
# read the shapefile
message(paste("reading", shapefn))
x <- st_read(fnshp, quiet = TRUE)
# missing CRS?
if(st_crs(x) %>% is.na()) {
st_crs(x) <- 4269
}
# county is fips
wa_x <- wa %>% mutate(statecountyfips = str_c("53", countyfips))
# various changes
message(paste("mutation"))
x %<>%
rename_all(., .funs = tolower) %>% # lowercase column names
rename_with(~gsub("\\d+", "", .)) # remove digits from column names
x %<>%
rename(statecountyfips = county) %>%
mutate(year = year %>% as.integer()) %>% # year as integer
mutate(statefips = "53") %>% # lowercase precinct names
# select and rename some columns
select(year, statefips, statecountyfips, precinctname = precname, geom_2927 = geometry) %>%
left_join(y = wa_x, by = "statecountyfips") %>% # join with administrative codes
st_transform(2927) %>% # transform to WA SPS
st_set_geometry("geom_2927") %>%
mutate(precinctname = str_to_lower(precinctname)) %>%
select(year, statefips, countyfips, precinctname, countyid, county, countycode, geom_2927)
# write to db
message(paste("writing to db"))
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists detail_precinct_shape.precinct_", year, ";"))
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists tmp.precinct_", year, ";"))
O <- dbWriteTable(conn = U, name = c("tmp", paste0("precinct_", year)), value = x)
O <- dbGetQuery(conn = U,
statement = paste0("alter table tmp.precinct_", year, " alter column geom_2927 set data type geometry(multipolygon, 2927);"))
message(paste("repairing geoms"))
# repair geoms
O <- dbGetQuery(conn = U, statement = paste0("update tmp.precinct_", year, " set geom_2927 = st_multi(st_collectionextract(st_makevalid(geom_2927),3)) where not st_isvalid(geom_2927);"))
# union by precinct name
message(paste("unioning"))
sqlU <- paste0("create table detail_precinct_shape.precinct_", year, " as select year, statefips, countyfips, precinctname, countyid, county, countycode, st_multi(st_union(geom_2927))::geometry(multipolygon, 2927) as geom_2927 from tmp.precinct_", year, " group by year, statefips, countyfips, precinctname, countyid, county, countycode;")
O <- dbGetQuery(conn = U, statement = sqlU)
# add to partition
O <- dbGetQuery(conn = U, statement = paste("alter table precinct attach partition", tname, "for values in (", year, ");"))
}
f_precinct_2022 <- function(year = 2022){
year <- year %>% as.character()
# zip filename, etc
zipfn <- zipfnames %>% str_subset(pattern = year) %>% str_subset(pattern = "ok")
shapefn <- str_c(".*", year, ".*shp$")
tname <- str_c("detail_precinct_shape.precinct_", year)
# unzip
message(paste("unzipping", zipfn))
unzip(zipfile = zipfn, exdir = uzdir)
# list
fnshp <- list.files(uzdir, pattern = shapefn, full.names = TRUE)
# read the shapefile
message(paste("reading", shapefn))
x <- st_read(fnshp, quiet = TRUE)
# missing CRS?
if(st_crs(x) %>% is.na()) {
st_crs(x) <- 4269
}
# county is fips
wa_x <- wa %>% mutate(statecountyfips = str_c("53", countyfips))
# various changes
message(paste("mutation"))
x %<>%
rename_all(., .funs = tolower) %>% # lowercase column names
rename_with(~gsub("\\d+", "", .)) # remove digits from column names
x %<>%
rename(statecountyfips = county) %>%
mutate(year = year %>% as.integer()) %>% # year as integer
mutate(statefips = "53") %>% # lowercase precinct names
# select and rename some columns
select(year, statefips, statecountyfips, precinctname = precname, geom_2927 = geometry) %>%
left_join(y = wa_x, by = "statecountyfips") %>% # join with administrative codes
st_transform(2927) %>% # transform to WA SPS
st_set_geometry("geom_2927") %>%
mutate(precinctname = str_to_lower(precinctname)) %>%
select(year, statefips, countyfips, precinctname, countyid, county, countycode, geom_2927)
# write to db
message(paste("writing to db"))
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists detail_precinct_shape.precinct_", year, ";"))
O <- dbGetQuery(conn = U, statement = paste0("drop table if exists tmp.precinct_", year, ";"))
O <- dbWriteTable(conn = U, name = c("tmp", paste0("precinct_", year)), value = x)
O <- dbGetQuery(conn = U,
statement = paste0("alter table tmp.precinct_", year, " alter column geom_2927 set data type geometry(multipolygon, 2927);"))
message(paste("repairing geoms"))
# repair geoms
O <- dbGetQuery(conn = U, statement = paste0("update tmp.precinct_", year, " set geom_2927 = st_multi(st_collectionextract(st_makevalid(geom_2927),3)) where not st_isvalid(geom_2927);"))
# union by precinct name
message(paste("unioning"))
sqlU <- paste0("create table detail_precinct_shape.precinct_", year, " as select year, statefips, countyfips, precinctname, countyid, county, countycode, st_multi(st_union(geom_2927))::geometry(multipolygon, 2927) as geom_2927 from tmp.precinct_", year, " group by year, statefips, countyfips, precinctname, countyid, county, countycode;")
O <- dbGetQuery(conn = U, statement = sqlU)
# add to partition
O <- dbGetQuery(conn = U, statement = paste("alter table precinct attach partition", tname, "for values in (", year, ");"))
}
# this gets really messy. there is no standardization of format of table or geometry from file to file.
# these are the variables that represent precinct ID
s <- '
statewide_vtd_2004,vtdst04 xxyyyy xx = county FIPS left pad zeros, yyyy = precinct left pad zeros
statewide_vtd_2005,vtdst05 ditto
statewide_vtd_2006,vtdst06 ditto
statewide_vtd_2007,vtdst07 ditto
statewide_vtd_2008,vtdst08 ditto
statewide_vtd_2009,vtdst09 ditto
statewide_vtd_2010,vtdst10 ditto
statewide_prec_2012,code yyyy yyyy = precinct, left pad zeros
statewide_prec_2012,name sometimes is code, sometimes is a string
statewide_prec_2013,code yyyy, left pad zeros
statewide_prec_2013,name
statewide_prec_2014,preccode yyy no pad zeros
# these are from the 2015 data
as_prec_2012,code null values
as_prec_2012,name alphabetical
cu_prec_2012,code yyy pad zeros♪
cu_prec_2012,name alpha
cu_prec_2012,vtdst10 xxyyy xx = county, yyy = precinct left pad zeros
fe_prec_2012,vtdst10 xxyyy xx = county, yyy = precinct left pad zeros
ga_prec_2012,preccode yyy yyy = precinct
je_prec_2012,precincts yyy yyy = precinct
ks_prec_2012,precinct_1 yyyy
ks_prec_2012,precinct_c
ks_prec_2012,precinct_n
ma_prec_2012,elecprec yyy
ma_prec_2012,name
pa_prec_2012,p_name last word seems to be the precinct code
pa_prec_2012,district
pe_prec_2012,precinct_i this is a varchar that matches "precinctname" in the vote history, and strings are not identical, e.g., Diamond Lake East <-> Diamond Lk East
sj_prec_2012,code integer
sj_prec_2012,name
st_prec_2012,precinct
st_prec_2012,pricno integer
wk_prec_2012,precinct
wk_prec_2012,pre_num integer
wt_prec_2012,vtdst10 xxyyy xx = county, yyy = precinct
gy_prec_2013,prec_num yyy pad zeros
sm_prec_2013,precnct_nm
sm_prec_2013,precnct_no integer
dg_prec_2014,precinct integer
kt_prec_2014,precinct string hopefully matches precinctname from voting data
sp_prec_2014,precnum integer
ad_prec_2015,vtdst10 xxyyy xx = county, yyy = precinct
be_prec_2015,districtna presume this is OK, integer
ch_prec_2015,precinct integer
cm_prec_2015,district
cm_prec_2015,label
cm_prec_2015,label2
cm_prec_2015,prec_no integer
cr_prec_2015,precinct integer
cz_prec_2015,preccode integer
fr_prec_2015,district_c strip "PREC" to get integer
fr_prec_2015,district_t
fr_prec_2015,label
gr_prec_2015,code integer
gr_prec_2015,precinctnm
is_prec_2015,precinctna
is_prec_2015,precno integer
ki_prec_2015,name
ki_prec_2015,votdst integer left padded
kp_prec_2015,district integer
le_prec_2015,precinct
le_prec_2015,precinct_n integer
li_prec_2015,label integer
li_prec_2015,name
ok_prec_2015,precinct integer
pi_prec_2015,name
pi_prec_2015,precinct xxyyy there is a problem with ogc_fid 474
sk_prec_2015,prec_no
sk_prec_2015,precinct
sn_prec_2015,precinct large integer
sn_prec_2015,precinct_n
th_prec_2015,name
th_prec_2015,precinctnu integer padded zeros
wl_prec_2015,precinct string match with precinctname, strip last 3 characters, replace "CP #" with "COLLEGE PLACE " (note space)
wm_prec_2015,name integer
ya_prec_2015,code integer
statewide_prec_2016,preccode integer
statewide_prec_2017,preccode integer
2018precincts_verified,prccode integer
statewide_precincts_2020general,preccode integer
statewide_splits_2020general,preccode integer why are there splits?
statewide_precincts_2021general_ok_consol,preccode integer
statewide_splits_2021gen,preccode
'
cat(readLines("../code/conflate_precinct_census.sql"), sep = "\n")
-- Phil Hurvitz, 2022-12-09
-- combines precinct and census polygon data
-- estimates count and proportion of different races for each precinct
drop table if exists public.precinct_census;
create table public.precinct_census as
with
--intersection
i as (select p.year as pyear,
p.countycode,
p.precinctname,
c.year as cyear,
c.area_census_orig,
c.geoid,
c.total,
c.white,
c.black,
c.aian,
c.asian,
c.nhpi,
c.other,
--need to do the collectionextract trick, creating multipart polys
st_multi(st_collectionextract(st_intersection(p.geom_2927, c.geom_2927), 3))::geometry(multipolygon, 2927) as geom_2927
from
--precinct
(select * from precinct) as p,
--census
(select *, st_area(geom_2927) as area_census_orig from census_wa) as c
--where there is an intersection between the two layers and the year is the same
where st_intersects(p.geom_2927, c.geom_2927)
and p.year = c.year)
, f1 as (select *, st_area(geom_2927) as area_intersect, st_area(geom_2927) / area_census_orig as area_prop from i)
, f2 as (select *,
total * area_prop as total_prop,
white * area_prop as white_prop,
black * area_prop as black_prop,
aian * area_prop as aian_prop,
asian * area_prop as asian_prop,
nhpi * area_prop as nhpi_prop,
other * area_prop as other_prop
from f1)
select *
from f2
order by pyear, countycode, precinctname;
;
/* aggregate back to precincts */
drop table if exists precinct_census_agg;
create table precinct_census_agg as
--aggregate back to precincts
with f1 as (select pyear as year,
countycode,
precinctname,
array_agg(geoid) as geoid_agg,
sum(area_intersect) as area_intersect,
sum(area_prop) as area_agg,
sum(total_prop) as total_agg,
sum(white_prop) as white_agg,
sum(black_prop) as black_agg,
sum(aian_prop) as aian_agg,
sum(asian_prop) as asian_agg,
sum(nhpi_prop) as nhpi_agg,
sum(other_prop) as other_agg,
st_multi(st_union(geom_2927))::geometry(multipolygon, 2927) as geom_2927
from precinct_census
group by pyear, countycode, precinctname
)
-- join with voting data
, e as (select * from precinct)
--, j as (select * from )
select *
from f1;
--now join these counts with election results
drop table if exists precinct_census_agg_votes;
create table precinct_census_agg_votes as
with p as (select year as electionyear
, countycode
, precinctname
, round(total_agg::numeric, 1) as total
, round(white_agg::numeric, 1) as white
, round(black_agg::numeric, 1) as black
, round(aian_agg::numeric, 1) as aian
, round(asian_agg::numeric, 1) as asian
, round(nhpi_agg::numeric, 1) as nhpi
, round(other_agg::numeric, 1) as other
, geom_2927
from precinct_census_agg),
e as (select electionyear,
electiondate::date,
countycode,
precinctname,
ballotname,
ballotnamepartyname,
raceid,
votes
from voting_history),
-- aggregate arrays
ea as (select array_agg(e.electiondate)::text as electiondate
, array_agg(raceid)::text as raceid
, countycode
, array_agg(ballotname) ::text as ballotname
, array_agg(ballotnamepartyname)::text as ballotnamepartyname
, precinctname
, array_agg(votes)::text as votes
, electionyear
from e
group by countycode, precinctname, electionyear)
,
j as (select *
from p
left join ea using (electionyear, countycode, precinctname))
select electionyear,
countycode,
precinctname,
total as race_total,
white as race_white,
black as race_black,
aian as race_aian,
asian as race_asian,
nhpi as race_nhpi,
other as race_other,
electiondate,
raceid,
ballotname,
ballotnamepartyname,
votes,
geom_2927
from j
--order by electionyear, countycode, precinctname
;
--create another non-GIS table with each precinct's results in long form with race in wide form, i.e.,
/*
electiondate, year, countycode, precinct1, vote stuff, [race columns]
*/
drop table if exists precinct_census_agg_votes_nogeog;
create table precinct_census_agg_votes_nogeog as
with p as (select year as electionyear
, countycode
, precinctname
, round(total_agg::numeric, 1) as total
, round(white_agg::numeric, 1) as white
, round(black_agg::numeric, 1) as black
, round(aian_agg::numeric, 1) as aian
, round(asian_agg::numeric, 1) as asian
, round(nhpi_agg::numeric, 1) as nhpi
, round(other_agg::numeric, 1) as other
from precinct_census_agg),
e as (select electionyear,
electiondate::date,
countycode,
precinctname,
ballotname,
ballotnamepartyname,
raceid,
votes
from voting_history),
j as (select *
from e
left join p using (electionyear, countycode, precinctname))
select electionyear,
countycode,
precinctname,
total as race_total,
white as race_white,
black as race_black,
aian as race_aian,
asian as race_asian,
nhpi as race_nhpi,
other as race_other,
electiondate,
raceid,
ballotname,
ballotnamepartyname,
votes
from j
order by electionyear, countycode, precinctname;
--export
--\copy precinct_census_agg_votes_nogeog to 'R:/Project/UWED/results/precinct_census_agg_votes_nogeog.csv' with csv header;
-- Appendix -- helper stuff
/*
select 'sum(' || c || ') as ' || c || ','
from (select column_name as c from information_schema.columns where table_name = 'precinct_census') as foo;
select 'round (' || c || '::numeric, 1) as ' || c
from (select column_name as c from information_schema.columns where table_name = 'precinct_census_agg') as foo;
select 'array_agg (' || c || ') as ' || c
from (select column_name as c from information_schema.columns where table_name = 'precinct') as foo;
select column_name || ',' from information_schema.columns where table_name = 'precinct_census_agg_votes';
select *
from precinct_census;*/
with a as (select *,
st_transform(geom_2927, 4326) as geometry
from precinct_census_agg_votes
where electionyear = 2020),
b as (select *, st_centroid(geometry) as geom_cen from a),
c as (select electionyear,
countycode,
precinctname,
race_total,
race_white,
race_black,
race_aian,
race_asian,
race_nhpi,
race_other,
electiondate,
raceid,
ballotname,
ballotnamepartyname,
votes,
st_x(geom_cen) as lng,
st_y(geom_cen) as lat,
st_astext(geometry) as geometry
from b)
select *
from c;
Rendered at 2024-01-18 18:58:40.447507