280 lines
7.6 KiB
Text
280 lines
7.6 KiB
Text
---
|
|
title: "results"
|
|
author: "Keniel Yao"
|
|
date: "4/26/2021"
|
|
output: html_document
|
|
---
|
|
|
|
```{r setup, include=FALSE}
|
|
knitr::opts_chunk$set(echo = TRUE)
|
|
```
|
|
|
|
```{r load-functions}
|
|
library(tidyverse)
|
|
library(sf)
|
|
library(glue)
|
|
library(tidycensus)
|
|
library(broom)
|
|
|
|
source(here::here('analysis', 'figures.R'))
|
|
|
|
theme_set(theme_bw(base_size = 14))
|
|
```
|
|
|
|
|
|
# Load data
|
|
|
|
```{r data}
|
|
df_pre <- read_csv(here::here("data", "cameras_2011-2015.csv")) %>%
|
|
mutate(period = "2011-2015")
|
|
df_post <- read_csv(here::here("data", "cameras_2015-2021.csv")) %>%
|
|
mutate(period = "2015-2021")
|
|
city_data <- read_csv(here::here("data", "city_metadata.csv"))
|
|
|
|
recall <- 0.63
|
|
```
|
|
|
|
# Figures
|
|
|
|
## Table 1: City metadata
|
|
|
|
```{r metadata}
|
|
city_data %>%
|
|
arrange(desc(type), desc(road_network_length_km)) %>%
|
|
transmute(
|
|
City = case_when(
|
|
city == "New York" ~ "New York City",
|
|
city == "Washington" ~ "Washington, D.C.",
|
|
TRUE ~ city
|
|
),
|
|
Population = formatC(round(population_census2010, -3), format = "d", big.mark=","),
|
|
`Area (sq. km)` = formatC(area_sqkm_census2010, format = "d", big.mark=","),
|
|
`Road length (km)` = formatC(road_network_length_km, format = "d", big.mark=",")
|
|
)
|
|
```
|
|
|
|
## Figure 5: Spatial distribution of sampled points
|
|
|
|
```{r sampled-points}
|
|
generate_sampled_point_map(df_post, "San Francisco")
|
|
generate_sampled_point_map(df_post, "Chicago")
|
|
generate_sampled_point_map(df_post, "New York")
|
|
```
|
|
|
|
## Table 3: Detection count, density and total camera estimates
|
|
|
|
```{r main-table}
|
|
bind_rows(
|
|
df_pre,
|
|
df_post
|
|
) %>%
|
|
group_by(city, period) %>%
|
|
summarize(
|
|
n_pano = n(),
|
|
n_detection = sum(camera_count)
|
|
) %>%
|
|
ungroup() %>%
|
|
estimate_detection_metrics(recall = recall) %>%
|
|
transmute(
|
|
rank = if_else(period == "2015-2021", est_detections_per_km, 0),
|
|
city = fct_reorder(city, - rank),
|
|
type,
|
|
period = if_else(period == "2015-2021", "2016-2020", period),
|
|
road_network_length_km = formatC(road_network_length_km, format = "d", big.mark=","),
|
|
m_per_pano = round(m_per_pano, 0),
|
|
n_detection,
|
|
est_detections_per_km = round(est_detections_per_km, 2),
|
|
se_detections_per_km = glue("({ round(se_detections_per_km, 2) })"),
|
|
est_detections = formatC(round(est_detections, -2), format = "d", big.mark=","),
|
|
se_detections = glue('({ formatC(round(se_detections, -2), format = "d", big.mark=",") })')
|
|
) %>%
|
|
pivot_wider(
|
|
id_cols = c(city, type, road_network_length_km, m_per_pano),
|
|
names_from = period,
|
|
values_from = c(n_detection, est_detections_per_km, se_detections_per_km, est_detections, se_detections)
|
|
) %>%
|
|
arrange(desc(type), city) %>%
|
|
mutate(
|
|
across(ends_with("2011-2015"), ~ str_replace_na(.x, "-")),
|
|
city = as.character(city)
|
|
) %>%
|
|
select(
|
|
city, road_network_length_km, m_per_pano,
|
|
`n_detection_2011-2015`, `n_detection_2016-2020`,
|
|
`est_detections_per_km_2011-2015`, `se_detections_per_km_2011-2015`,
|
|
`est_detections_per_km_2016-2020`, `se_detections_per_km_2016-2020`,
|
|
`est_detections_2011-2015`, `se_detections_2011-2015`,
|
|
`est_detections_2016-2020`, `se_detections_2016-2020`
|
|
)
|
|
```
|
|
|
|
|
|
## Figure 9: Maps of detected points
|
|
|
|
```{r detected-points}
|
|
generate_detected_point_map(df_post, "San Francisco")
|
|
generate_detected_point_map(df_post, "Chicago")
|
|
generate_detected_point_map(df_post, "New York")
|
|
```
|
|
|
|
## Figure 10: Pre-post estimated camera density
|
|
|
|
```{r density-plot}
|
|
df_post %>%
|
|
group_by(city, period) %>%
|
|
summarize(
|
|
n_pano = n(),
|
|
n_detection = sum(camera_count)
|
|
) %>%
|
|
ungroup() %>%
|
|
estimate_detection_metrics(recall = recall) %>%
|
|
mutate(
|
|
city = case_when(
|
|
city == "New York" ~ "New York City",
|
|
city == "Washington" ~ "Washington, D.C.",
|
|
T ~ city
|
|
),
|
|
type = factor(type, c("Global", "US")),
|
|
city = fct_reorder(city, est_detections_per_km)
|
|
) %>%
|
|
plot_camera_density(legend = FALSE)
|
|
```
|
|
|
|
## Figure 11: Zone identification rate
|
|
|
|
```{r annotate-race-data}
|
|
us_cities <- city_data %>%
|
|
filter(type == "US") %>%
|
|
pull(city)
|
|
|
|
df_post_w_race <- us_cities %>%
|
|
map_dfr(~ annotate_points_with_census(df_post, .x, "race")) %>%
|
|
st_drop_geometry() %>%
|
|
mutate(
|
|
city = case_when(
|
|
city == "New York" ~ "New York City",
|
|
city == "Washington" ~ "Washington D.C.",
|
|
TRUE ~ city
|
|
),
|
|
city = factor(
|
|
city,
|
|
c("New York City", "San Francisco", "Boston", "Chicago", "Philadelphia",
|
|
"Washington D.C.", "Los Angeles", "Baltimore", "Seattle", "Milwaukee")
|
|
),
|
|
zone_type = str_to_title(zone_type),
|
|
zone_type = factor(
|
|
zone_type,
|
|
c("Public", "Residential", "Industrial", "Commercial", "Mixed"),
|
|
exclude = NULL
|
|
),
|
|
zone_type = fct_explicit_na(zone_type, na_level = "Unknown"),
|
|
camera_count = as.integer(camera_count)
|
|
)
|
|
```
|
|
|
|
|
|
```{r zone-all}
|
|
df_post_w_race %>%
|
|
filter(zone_type != "Unknown") %>%
|
|
group_by(zone_type) %>%
|
|
summarize(
|
|
total = n(),
|
|
total_identified = sum(camera_count, na.rm=T),
|
|
perc_detected = sum(total_identified) / total
|
|
) %>%
|
|
mutate(se = sqrt(perc_detected * (1 - perc_detected) / total)) %>%
|
|
ungroup() %>%
|
|
mutate(
|
|
zone_type = fct_relevel(
|
|
zone_type,
|
|
c("Mixed", "Commercial", "Industrial", "Public", "Residential", "Unknown")
|
|
),
|
|
zone_type = fct_rev(zone_type)
|
|
) %>%
|
|
ggplot(aes(x = zone_type, y = perc_detected)) +
|
|
geom_point() +
|
|
geom_pointrange(aes(
|
|
ymin = perc_detected - 1.96 * se,
|
|
ymax = perc_detected + 1.96 * se
|
|
)) +
|
|
scale_x_discrete(name = "") +
|
|
scale_y_continuous(
|
|
name = "Identification rate",
|
|
position = "right",
|
|
labels = scales::percent_format(accuracy = 0.01),
|
|
expand = expansion(mult = c(0, 0.1)),
|
|
limits = c(0, NA)
|
|
) +
|
|
coord_flip() +
|
|
theme(
|
|
panel.grid = element_blank(),
|
|
panel.border = element_blank(),
|
|
axis.text = element_text(family = "Helvetica", color = "black"),
|
|
axis.title.x = element_text(family = "Helvetica", color = "black"),
|
|
axis.line = element_line(size = 0.5, color = "black"),
|
|
axis.ticks = element_line(size = 0.5, color = "black")
|
|
)
|
|
|
|
```
|
|
|
|
## Figure 12: Race identification rate
|
|
|
|
```{r race-all}
|
|
df_post_w_race %>%
|
|
ggplot(aes(x = percentage_minority, y = camera_count)) +
|
|
geom_smooth(
|
|
method = "lm",
|
|
formula = y ~ poly(x, degree = 2),
|
|
se = TRUE
|
|
) +
|
|
scale_x_continuous(
|
|
name = "Minority share of population (census block group)",
|
|
expand = expansion(mult = c(0, 0.05)),
|
|
labels = scales::percent_format(accuracy = 1)
|
|
) +
|
|
scale_y_continuous(
|
|
name = "Identification rate",
|
|
limits = c(0, NA),
|
|
oob = scales::squish,
|
|
expand = expansion(mult = c(0, 0.1)),
|
|
labels = scales::percent_format(accuracy = 0.1)
|
|
) +
|
|
theme(
|
|
panel.grid = element_blank(),
|
|
panel.border = element_blank(),
|
|
axis.text = element_text(family = "Helvetica", color = "black"),
|
|
axis.title = element_text(family = "Helvetica", color = "black"),
|
|
axis.line = element_line(size = 0.5, color = "black"),
|
|
axis.ticks.x = element_line(size = 0.5, color = "black"),
|
|
axis.ticks.y = element_line(size = 0.5, color = "black")
|
|
)
|
|
```
|
|
|
|
## Table 4: Regression output
|
|
|
|
```{r regression-model}
|
|
# reference level:
|
|
# - city: None (interceptless)
|
|
# - zone_type: residential
|
|
model_lm_poly <- df_post_w_race %>%
|
|
filter(zone_type != "Unknown") %>%
|
|
mutate(
|
|
detected = if_else(camera_count > 0, 1, 0),
|
|
zone_type = fct_relevel(
|
|
zone_type,
|
|
c("Residential", "Public", "Commercial", "Industrial", "Mixed", "Unknown")
|
|
)
|
|
) %>%
|
|
lm(detected ~ city-1 + zone_type + percentage_minority + I(percentage_minority^2), data = .)
|
|
|
|
tidy(model_lm_poly) %>%
|
|
filter(!str_detect(term, "^city")) %>%
|
|
transmute(
|
|
term,
|
|
estimate = formatC(estimate, format = "f"),
|
|
std.error = formatC(std.error, format = "f")
|
|
)
|
|
```
|
|
|
|
|
|
|