Question 1

library(tidyverse)
library(sf)
library(rmapshaper)
library(mapview)
library(units)
library(readxl)
library(gghighlight)

source("../R/utils.R")

counties = USAboundaries::us_counties()

conus_counties = counties %>%
  filter(!state_name %in% c("Alaska", "Puerto Rico", "Hawaii")) %>%
  st_transform(5070)

cent_counties = st_centroid(conus_counties)

cent_counties_u = st_union(cent_counties)

counties_uni = st_union(conus_counties)

mapview::npts(counties_uni)
## [1] 3229
plot(counties_uni)

counties_simp = ms_simplify(counties_uni, keep = .05)

mapview::npts(counties_simp)
## [1] 161
plot(counties_simp)

# tesselations

v_grid = st_voronoi(cent_counties_u) %>%
  st_cast() %>%
  st_as_sf() %>%
  mutate(id = 1:n())

v_grid_int = st_intersection(v_grid, counties_simp)

mapview::npts(v_grid_int)
## [1] 21626
t_grid = st_triangulate(cent_counties_u) %>%
  st_cast() %>%
  st_as_sf() %>%
  mutate(id = 1:n())

t_grid_int = st_intersection(t_grid, counties_simp)

mapview::npts(t_grid_int)
## [1] 25593
# coverage

sq_grid = st_make_grid(conus_counties, n = c(70, 50)) %>%
  st_as_sf() %>%
  mutate(id = 1:n())

hex_grid = st_make_grid(conus_counties, n = c(70, 50), square = FALSE) %>%
  st_as_sf() %>%
  mutate(id = 1:n())

Simplifying the grids reduced the points drastically. For example, for counties, the total amount of points went from 3229 to 161.

Question 1: Plots

library(tidyverse)
library(sf)
library(rmapshaper)
library(mapview)
library(units)
library(readxl)
library(gghighlight)

source("../R/utils.R")

plot_tess(hex_grid, "Hexagonal Coverage")

plot_tess(sq_grid, "Square Coverage")

plot_tess(t_grid_int, "Voroni Coverage")

plot_tess(v_grid_int, "Voroni Coverage")

plot_tess(conus_counties, "Counties")

Question 2.1-2.2

library(tidyverse)
library(sf)
library(rmapshaper)
library(mapview)
library(units)
library(readxl)
library(gghighlight)

sum_tess = function(sf_object, char){

  areas = drop_units(set_units(st_area(sf_object), "km2"))

  tess= data.frame(tesselation = char,
                   mean_sq = set_units(mean(areas), "km^2"),
                   total_feat = length(areas),
                   stan_dev = sd(areas),
                   total_area = set_units(sum(areas), "km^2"))

  return(tess)
}

sum_tess(sf_object = v_grid_int, 'Voroni')
##   tesselation         mean_sq total_feat stan_dev     total_area
## 1      Voroni 2525.425 [km^2]       3106 2886.661 7843970 [km^2]
sum_tess(sf_object = t_grid_int, 'triangulate')
##   tesselation         mean_sq total_feat stan_dev     total_area
## 1 triangulate 1253.691 [km^2]       6195 1578.647 7766614 [km^2]
sum_tess(sf_object = sq_grid, "square")
##   tesselation         mean_sq total_feat     stan_dev     total_area
## 1      square 3819.376 [km^2]       2242 1.453724e-11 8563041 [km^2]
sum_tess(sf_object = hex_grid, "hexagon")
##   tesselation         mean_sq total_feat     stan_dev     total_area
## 1     hexagon 3763.052 [km^2]       2271 1.412376e-11 8545891 [km^2]
sum_tess(sf_object = conus_counties, "counties")
##   tesselation         mean_sq total_feat stan_dev     total_area
## 1    counties 2521.745 [km^2]       3108 3404.325 7837583 [km^2]

Question 2.3-2.5

library(tidyverse)
library(sf)
library(rmapshaper)
library(mapview)
library(units)
library(readxl)
library(gghighlight)

tess_summary = bind_rows(
  sum_tess(sf_object = v_grid_int, 'Voroni'),
  sum_tess(sf_object = t_grid_int, 'Triangulation'),
  sum_tess(sf_object = sq_grid, "Square"),
  sum_tess(sf_object = hex_grid, "Hexagon"),
  sum_tess(sf_object = conus_counties, "Counties"))

knitr::kable(tess_summary, caption = "US Tesselations by County", col.names =
               c("Tesselation", "Mean", "Total Number of Features", "Standard Deviation"
                 , "Total Area"))
US Tesselations by County
Tesselation Mean Total Number of Features Standard Deviation Total Area
Voroni 2525.425 [km^2] 3106 2886.661 7843970 [km^2]
Triangulation 1253.691 [km^2] 6195 1578.647 7766614 [km^2]
Square 3819.376 [km^2] 2242 0.000 8563041 [km^2]
Hexagon 3763.052 [km^2] 2271 0.000 8545891 [km^2]
Counties 2521.745 [km^2] 3108 3404.325 7837583 [km^2]

For the Voroni Tesselation, the total area is massive so it might affect it’s final result with the amount of calculations needed. For the triangulation tesselation, the mean is worrisome because it has the smallest mean out of the tesselations so it will standout. For the hexagonal tesselation, the final product when it is mapped well be deatiled because of the amount of land it covers. For the squar tesselation, the detail and coverage will, also, be fine. For the counties tesselation, it has been simplified so it is at a point where I reduced it to my liking.

Question 3

library(tidyverse)
library(sf)
library(rmapshaper)
library(mapview)
library(units)
library(readxl)
library(gghighlight)

dam = read_excel('../data/NID2019_U.xlsx')


dam_filt = dam %>%
  filter(!is.na(LONGITUDE), !is.na(LATITUDE)) %>%
  st_as_sf(coords = c('LONGITUDE', 'LATITUDE'), crs = 4326) %>%
  st_transform(5070)

# step 3.2

peep = function(point, polygon, group) {
    st_join(polygon, point) %>%
    st_drop_geometry() %>%
    count(get(group)) %>%
    setNames(c(group, "n")) %>%
    left_join(polygon, by = group) %>%
    st_as_sf()

}

# step 3.3

v_peep = peep(dam_filt, v_grid_int, "id")

t_peep = peep(dam_filt, t_grid_int, "id")

sq_peep = peep(dam_filt, sq_grid, "id")

hex_peep = peep(dam_filt, hex_grid, "id")

conus_peep = peep(dam_filt, conus_counties, "geoid")



# step 3.4

plot_peep = function(object_sf, title) {
  ggplot() +
    geom_sf(data = object_sf, aes(fill = n), col = 'NA') +
    scale_fill_viridis_c() +
    theme_void() +
    labs(title = title,
         caption = paste0(sum(object_sf$n), " dams represented"))
}

Question 3: Plots

library(tidyverse)
library(sf)
library(rmapshaper)
library(mapview)
library(units)
library(readxl)
library(gghighlight)

plot_peep(hex_peep, "Hexagonal Grid: U.S. Dams")

plot_peep(sq_peep, "Square Grid: U.S. Dams")

plot_peep(v_peep, "Voroni Grid: U.S. Dams")

plot_peep(t_peep, "Triangulation Grid: U.S. Dams")

plot_peep(conus_peep, "U.S. Counties: Dams")

This relates the MAUP problem because each graph displays the same data in its own right. Meaning, each graph highlights and displays a standout factor in the data. I would choose the triangulation grid because of the amount of detail that comes with it.

Question 4

library(tidyverse)
library(sf)
library(rmapshaper)
library(mapview)
library(units)
library(readxl)
library(gghighlight)

dam_purp = dam_filt %>%
  filter(grepl(c("P", "R", "S", "F"), PURPOSES))

dam_protect = dam_purp %>%
  filter(grepl("P", PURPOSES))

dam_pro_hex = peep(dam_protect, hex_grid, "id")

dam_rec = dam_purp %>%
  filter(grepl("R", PURPOSES))

dam_rec_hex = peep(dam_rec, hex_grid, "id")

dam_ws = dam_purp %>%
  filter(grepl("S", PURPOSES))

dam_ws_hex = peep(dam_ws, hex_grid, "id")

dam_fad = dam_purp %>%
  filter(grepl("F", PURPOSES))

dam_fad_hex = peep(dam_fad, hex_grid, "id")

I chose recreation as a purpose because I am curious to think how many dams there are available for our recreational use. Also, I chose wildlife and fishes because I would think that would be the only purpose for dams. On another note, I chose protection as a purpose because I was curious at the distribution of dams meant for the use of proection. Finally, I chose water supply as a purpose because I would think we would not be in a drought with all these dams but I was wrong.

Question 4: Plots

library(tidyverse)
library(sf)
library(rmapshaper)
library(mapview)
library(units)
library(readxl)
library(gghighlight)

plot_peep(dam_fad_hex, "Dam Purpose: Fish and Wildlife") +
  gghighlight(n > (mean(n) + sd(n)))

plot_peep(dam_pro_hex, "Dam Purpose: Protect") +
  gghighlight(n > (mean(n) + sd(n)))

plot_peep(dam_ws_hex, "Dam Purpose: Water Supply") +
  gghighlight(n > (mean(n) + sd(n)))

plot_peep(dam_rec_hex, "Dam Purpose: Recreation") +
  gghighlight(n > (mean(n) + sd(n)))

The geographic distribution of the dams makes sense becuase they are mainly centered in the central U.S. where there is not that much access to a water supply there. Also, I was surprised to see the amount of dams used fro protection in the Central U.S. because of how dry it looks compared to the states situated by the coast. The location and distribution of the dams is consistent with the type of climate found near those places; mostly dry and warm/cold.