surveilling-surveillance/analysis/results.Rmd
2021-05-20 13:22:04 -07:00

280 lines
7.6 KiB
Text

---
title: "results"
author: "Keniel Yao"
date: "4/26/2021"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r load-functions}
library(tidyverse)
library(sf)
library(glue)
library(tidycensus)
library(broom)
source(here::here('analysis', 'figures.R'))
theme_set(theme_bw(base_size = 14))
```
# Load data
```{r data}
df_pre <- read_csv(here::here("data", "cameras_2011-2015.csv")) %>%
mutate(period = "2011-2015")
df_post <- read_csv(here::here("data", "cameras_2015-2021.csv")) %>%
mutate(period = "2015-2021")
city_data <- read_csv(here::here("data", "city_metadata.csv"))
recall <- 0.63
```
# Figures
## Table 1: City metadata
```{r metadata}
city_data %>%
arrange(desc(type), desc(road_network_length_km)) %>%
transmute(
City = case_when(
city == "New York" ~ "New York City",
city == "Washington" ~ "Washington, D.C.",
TRUE ~ city
),
Population = formatC(round(population_census2010, -3), format = "d", big.mark=","),
`Area (sq. km)` = formatC(area_sqkm_census2010, format = "d", big.mark=","),
`Road length (km)` = formatC(road_network_length_km, format = "d", big.mark=",")
)
```
## Figure 5: Spatial distribution of sampled points
```{r sampled-points}
generate_sampled_point_map(df_post, "San Francisco")
generate_sampled_point_map(df_post, "Chicago")
generate_sampled_point_map(df_post, "New York")
```
## Table 3: Detection count, density and total camera estimates
```{r main-table}
bind_rows(
df_pre,
df_post
) %>%
group_by(city, period) %>%
summarize(
n_pano = n(),
n_detection = sum(camera_count)
) %>%
ungroup() %>%
estimate_detection_metrics(recall = recall) %>%
transmute(
rank = if_else(period == "2015-2021", est_detections_per_km, 0),
city = fct_reorder(city, - rank),
type,
period = if_else(period == "2015-2021", "2016-2020", period),
road_network_length_km = formatC(road_network_length_km, format = "d", big.mark=","),
m_per_pano = round(m_per_pano, 0),
n_detection,
est_detections_per_km = round(est_detections_per_km, 2),
se_detections_per_km = glue("({ round(se_detections_per_km, 2) })"),
est_detections = formatC(round(est_detections, -2), format = "d", big.mark=","),
se_detections = glue('({ formatC(round(se_detections, -2), format = "d", big.mark=",") })')
) %>%
pivot_wider(
id_cols = c(city, type, road_network_length_km, m_per_pano),
names_from = period,
values_from = c(n_detection, est_detections_per_km, se_detections_per_km, est_detections, se_detections)
) %>%
arrange(desc(type), city) %>%
mutate(
across(ends_with("2011-2015"), ~ str_replace_na(.x, "-")),
city = as.character(city)
) %>%
select(
city, road_network_length_km, m_per_pano,
`n_detection_2011-2015`, `n_detection_2016-2020`,
`est_detections_per_km_2011-2015`, `se_detections_per_km_2011-2015`,
`est_detections_per_km_2016-2020`, `se_detections_per_km_2016-2020`,
`est_detections_2011-2015`, `se_detections_2011-2015`,
`est_detections_2016-2020`, `se_detections_2016-2020`
)
```
## Figure 9: Maps of detected points
```{r detected-points}
generate_detected_point_map(df_post, "San Francisco")
generate_detected_point_map(df_post, "Chicago")
generate_detected_point_map(df_post, "New York")
```
## Figure 10: Pre-post estimated camera density
```{r density-plot}
df_post %>%
group_by(city, period) %>%
summarize(
n_pano = n(),
n_detection = sum(camera_count)
) %>%
ungroup() %>%
estimate_detection_metrics(recall = recall) %>%
mutate(
city = case_when(
city == "New York" ~ "New York City",
city == "Washington" ~ "Washington, D.C.",
T ~ city
),
type = factor(type, c("Global", "US")),
city = fct_reorder(city, est_detections_per_km)
) %>%
plot_camera_density(legend = FALSE)
```
## Figure 11: Zone identification rate
```{r annotate-race-data}
us_cities <- city_data %>%
filter(type == "US") %>%
pull(city)
df_post_w_race <- us_cities %>%
map_dfr(~ annotate_points_with_census(df_post, .x, "race")) %>%
st_drop_geometry() %>%
mutate(
city = case_when(
city == "New York" ~ "New York City",
city == "Washington" ~ "Washington D.C.",
TRUE ~ city
),
city = factor(
city,
c("New York City", "San Francisco", "Boston", "Chicago", "Philadelphia",
"Washington D.C.", "Los Angeles", "Baltimore", "Seattle", "Milwaukee")
),
zone_type = str_to_title(zone_type),
zone_type = factor(
zone_type,
c("Public", "Residential", "Industrial", "Commercial", "Mixed"),
exclude = NULL
),
zone_type = fct_explicit_na(zone_type, na_level = "Unknown"),
camera_count = as.integer(camera_count)
)
```
```{r zone-all}
df_post_w_race %>%
filter(zone_type != "Unknown") %>%
group_by(zone_type) %>%
summarize(
total = n(),
total_identified = sum(camera_count, na.rm=T),
perc_detected = sum(total_identified) / total
) %>%
mutate(se = sqrt(perc_detected * (1 - perc_detected) / total)) %>%
ungroup() %>%
mutate(
zone_type = fct_relevel(
zone_type,
c("Mixed", "Commercial", "Industrial", "Public", "Residential", "Unknown")
),
zone_type = fct_rev(zone_type)
) %>%
ggplot(aes(x = zone_type, y = perc_detected)) +
geom_point() +
geom_pointrange(aes(
ymin = perc_detected - 1.96 * se,
ymax = perc_detected + 1.96 * se
)) +
scale_x_discrete(name = "") +
scale_y_continuous(
name = "Identification rate",
position = "right",
labels = scales::percent_format(accuracy = 0.01),
expand = expansion(mult = c(0, 0.1)),
limits = c(0, NA)
) +
coord_flip() +
theme(
panel.grid = element_blank(),
panel.border = element_blank(),
axis.text = element_text(family = "Helvetica", color = "black"),
axis.title.x = element_text(family = "Helvetica", color = "black"),
axis.line = element_line(size = 0.5, color = "black"),
axis.ticks = element_line(size = 0.5, color = "black")
)
```
## Figure 12: Race identification rate
```{r race-all}
df_post_w_race %>%
ggplot(aes(x = percentage_minority, y = camera_count)) +
geom_smooth(
method = "lm",
formula = y ~ poly(x, degree = 2),
se = TRUE
) +
scale_x_continuous(
name = "Minority share of population (census block group)",
expand = expansion(mult = c(0, 0.05)),
labels = scales::percent_format(accuracy = 1)
) +
scale_y_continuous(
name = "Identification rate",
limits = c(0, NA),
oob = scales::squish,
expand = expansion(mult = c(0, 0.1)),
labels = scales::percent_format(accuracy = 0.1)
) +
theme(
panel.grid = element_blank(),
panel.border = element_blank(),
axis.text = element_text(family = "Helvetica", color = "black"),
axis.title = element_text(family = "Helvetica", color = "black"),
axis.line = element_line(size = 0.5, color = "black"),
axis.ticks.x = element_line(size = 0.5, color = "black"),
axis.ticks.y = element_line(size = 0.5, color = "black")
)
```
## Table 4: Regression output
```{r regression-model}
# reference level:
# - city: None (interceptless)
# - zone_type: residential
model_lm_poly <- df_post_w_race %>%
filter(zone_type != "Unknown") %>%
mutate(
detected = if_else(camera_count > 0, 1, 0),
zone_type = fct_relevel(
zone_type,
c("Residential", "Public", "Commercial", "Industrial", "Mixed", "Unknown")
)
) %>%
lm(detected ~ city-1 + zone_type + percentage_minority + I(percentage_minority^2), data = .)
tidy(model_lm_poly) %>%
filter(!str_detect(term, "^city")) %>%
transmute(
term,
estimate = formatC(estimate, format = "f"),
std.error = formatC(std.error, format = "f")
)
```