library(tidyverse)
library(sf)
library(rmapshaper)
library(mapview)
library(units)
library(readxl)
library(gghighlight)
source("../R/utils.R")
counties = USAboundaries::us_counties()
conus_counties = counties %>%
filter(!state_name %in% c("Alaska", "Puerto Rico", "Hawaii")) %>%
st_transform(5070)
cent_counties = st_centroid(conus_counties)
cent_counties_u = st_union(cent_counties)
counties_uni = st_union(conus_counties)
mapview::npts(counties_uni)
## [1] 3229
plot(counties_uni)
counties_simp = ms_simplify(counties_uni, keep = .05)
mapview::npts(counties_simp)
## [1] 161
plot(counties_simp)
# tesselations
v_grid = st_voronoi(cent_counties_u) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id = 1:n())
v_grid_int = st_intersection(v_grid, counties_simp)
mapview::npts(v_grid_int)
## [1] 21626
t_grid = st_triangulate(cent_counties_u) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id = 1:n())
t_grid_int = st_intersection(t_grid, counties_simp)
mapview::npts(t_grid_int)
## [1] 25593
# coverage
sq_grid = st_make_grid(conus_counties, n = c(70, 50)) %>%
st_as_sf() %>%
mutate(id = 1:n())
hex_grid = st_make_grid(conus_counties, n = c(70, 50), square = FALSE) %>%
st_as_sf() %>%
mutate(id = 1:n())
Simplifying the grids reduced the points drastically. For example, for counties, the total amount of points went from 3229 to 161.
library(tidyverse)
library(sf)
library(rmapshaper)
library(mapview)
library(units)
library(readxl)
library(gghighlight)
source("../R/utils.R")
plot_tess(hex_grid, "Hexagonal Coverage")
plot_tess(sq_grid, "Square Coverage")
plot_tess(t_grid_int, "Voroni Coverage")
plot_tess(v_grid_int, "Voroni Coverage")
plot_tess(conus_counties, "Counties")
library(tidyverse)
library(sf)
library(rmapshaper)
library(mapview)
library(units)
library(readxl)
library(gghighlight)
sum_tess = function(sf_object, char){
areas = drop_units(set_units(st_area(sf_object), "km2"))
tess= data.frame(tesselation = char,
mean_sq = set_units(mean(areas), "km^2"),
total_feat = length(areas),
stan_dev = sd(areas),
total_area = set_units(sum(areas), "km^2"))
return(tess)
}
sum_tess(sf_object = v_grid_int, 'Voroni')
## tesselation mean_sq total_feat stan_dev total_area
## 1 Voroni 2525.425 [km^2] 3106 2886.661 7843970 [km^2]
sum_tess(sf_object = t_grid_int, 'triangulate')
## tesselation mean_sq total_feat stan_dev total_area
## 1 triangulate 1253.691 [km^2] 6195 1578.647 7766614 [km^2]
sum_tess(sf_object = sq_grid, "square")
## tesselation mean_sq total_feat stan_dev total_area
## 1 square 3819.376 [km^2] 2242 1.453724e-11 8563041 [km^2]
sum_tess(sf_object = hex_grid, "hexagon")
## tesselation mean_sq total_feat stan_dev total_area
## 1 hexagon 3763.052 [km^2] 2271 1.412376e-11 8545891 [km^2]
sum_tess(sf_object = conus_counties, "counties")
## tesselation mean_sq total_feat stan_dev total_area
## 1 counties 2521.745 [km^2] 3108 3404.325 7837583 [km^2]
library(tidyverse)
library(sf)
library(rmapshaper)
library(mapview)
library(units)
library(readxl)
library(gghighlight)
tess_summary = bind_rows(
sum_tess(sf_object = v_grid_int, 'Voroni'),
sum_tess(sf_object = t_grid_int, 'Triangulation'),
sum_tess(sf_object = sq_grid, "Square"),
sum_tess(sf_object = hex_grid, "Hexagon"),
sum_tess(sf_object = conus_counties, "Counties"))
knitr::kable(tess_summary, caption = "US Tesselations by County", col.names =
c("Tesselation", "Mean", "Total Number of Features", "Standard Deviation"
, "Total Area"))
Tesselation | Mean | Total Number of Features | Standard Deviation | Total Area |
---|---|---|---|---|
Voroni | 2525.425 [km^2] | 3106 | 2886.661 | 7843970 [km^2] |
Triangulation | 1253.691 [km^2] | 6195 | 1578.647 | 7766614 [km^2] |
Square | 3819.376 [km^2] | 2242 | 0.000 | 8563041 [km^2] |
Hexagon | 3763.052 [km^2] | 2271 | 0.000 | 8545891 [km^2] |
Counties | 2521.745 [km^2] | 3108 | 3404.325 | 7837583 [km^2] |
For the Voroni Tesselation, the total area is massive so it might affect it’s final result with the amount of calculations needed. For the triangulation tesselation, the mean is worrisome because it has the smallest mean out of the tesselations so it will standout. For the hexagonal tesselation, the final product when it is mapped well be deatiled because of the amount of land it covers. For the squar tesselation, the detail and coverage will, also, be fine. For the counties tesselation, it has been simplified so it is at a point where I reduced it to my liking.
library(tidyverse)
library(sf)
library(rmapshaper)
library(mapview)
library(units)
library(readxl)
library(gghighlight)
dam = read_excel('../data/NID2019_U.xlsx')
dam_filt = dam %>%
filter(!is.na(LONGITUDE), !is.na(LATITUDE)) %>%
st_as_sf(coords = c('LONGITUDE', 'LATITUDE'), crs = 4326) %>%
st_transform(5070)
# step 3.2
peep = function(point, polygon, group) {
st_join(polygon, point) %>%
st_drop_geometry() %>%
count(get(group)) %>%
setNames(c(group, "n")) %>%
left_join(polygon, by = group) %>%
st_as_sf()
}
# step 3.3
v_peep = peep(dam_filt, v_grid_int, "id")
t_peep = peep(dam_filt, t_grid_int, "id")
sq_peep = peep(dam_filt, sq_grid, "id")
hex_peep = peep(dam_filt, hex_grid, "id")
conus_peep = peep(dam_filt, conus_counties, "geoid")
# step 3.4
plot_peep = function(object_sf, title) {
ggplot() +
geom_sf(data = object_sf, aes(fill = n), col = 'NA') +
scale_fill_viridis_c() +
theme_void() +
labs(title = title,
caption = paste0(sum(object_sf$n), " dams represented"))
}
library(tidyverse)
library(sf)
library(rmapshaper)
library(mapview)
library(units)
library(readxl)
library(gghighlight)
plot_peep(hex_peep, "Hexagonal Grid: U.S. Dams")
plot_peep(sq_peep, "Square Grid: U.S. Dams")
plot_peep(v_peep, "Voroni Grid: U.S. Dams")
plot_peep(t_peep, "Triangulation Grid: U.S. Dams")
plot_peep(conus_peep, "U.S. Counties: Dams")
This relates the MAUP problem because each graph displays the same data in its own right. Meaning, each graph highlights and displays a standout factor in the data. I would choose the triangulation grid because of the amount of detail that comes with it.
library(tidyverse)
library(sf)
library(rmapshaper)
library(mapview)
library(units)
library(readxl)
library(gghighlight)
dam_purp = dam_filt %>%
filter(grepl(c("P", "R", "S", "F"), PURPOSES))
dam_protect = dam_purp %>%
filter(grepl("P", PURPOSES))
dam_pro_hex = peep(dam_protect, hex_grid, "id")
dam_rec = dam_purp %>%
filter(grepl("R", PURPOSES))
dam_rec_hex = peep(dam_rec, hex_grid, "id")
dam_ws = dam_purp %>%
filter(grepl("S", PURPOSES))
dam_ws_hex = peep(dam_ws, hex_grid, "id")
dam_fad = dam_purp %>%
filter(grepl("F", PURPOSES))
dam_fad_hex = peep(dam_fad, hex_grid, "id")
I chose recreation as a purpose because I am curious to think how many dams there are available for our recreational use. Also, I chose wildlife and fishes because I would think that would be the only purpose for dams. On another note, I chose protection as a purpose because I was curious at the distribution of dams meant for the use of proection. Finally, I chose water supply as a purpose because I would think we would not be in a drought with all these dams but I was wrong.
library(tidyverse)
library(sf)
library(rmapshaper)
library(mapview)
library(units)
library(readxl)
library(gghighlight)
plot_peep(dam_fad_hex, "Dam Purpose: Fish and Wildlife") +
gghighlight(n > (mean(n) + sd(n)))
plot_peep(dam_pro_hex, "Dam Purpose: Protect") +
gghighlight(n > (mean(n) + sd(n)))
plot_peep(dam_ws_hex, "Dam Purpose: Water Supply") +
gghighlight(n > (mean(n) + sd(n)))
plot_peep(dam_rec_hex, "Dam Purpose: Recreation") +
gghighlight(n > (mean(n) + sd(n)))
The geographic distribution of the dams makes sense becuase they are mainly centered in the central U.S. where there is not that much access to a water supply there. Also, I was surprised to see the amount of dams used fro protection in the Central U.S. because of how dry it looks compared to the states situated by the coast. The location and distribution of the dams is consistent with the type of climate found near those places; mostly dry and warm/cold.