This commit is contained in:
Hao 2021-05-20 13:20:48 -07:00
commit cbfd966555
79 changed files with 11596 additions and 0 deletions

BIN
.github/image/detections.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 356 KiB

141
.gitignore vendored Normal file
View file

@ -0,0 +1,141 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# IPython Notebook
*.ipynb
*.csv
*.ckpt
output/
cache/
figures/
notebook/
# Images
*.jpg

21
LICENSE Normal file
View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2020 Hao Sheng
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

86
README.md Normal file
View file

@ -0,0 +1,86 @@
# Surveilling Surveillance: Estimating the Prevalence of Surveillance Cameras with Street View Data
### [Project page](https://stanford-policylab.github.io/surveillance/) | [Paper](https://arxiv.org/abs/2105.01764)
![detections](.github/image/detections.png)
__Locations of verified cameras in 10 large U.S. cities for the period 20162020. Densely clustered areas of points indicate regions with high camera density in each city. Camera density varies widely between neighborhoods. Note: Scale varies
between cities.__
This is the code base of the [Surveillance Camera](https://arxiv.org/abs/2105.01764) paper:
```
@article{sheng2021surveilling,
title={Surveilling Surveillance: Estimating the Prevalence of Surveillance Cameras with Street View Data},
author={Sheng, Hao and Yao, Keniel and Goel, Sharad},
journal={arXiv e-prints},
pages={arXiv--2105},
year={2021}
}
```
## Camera Detection
### Requirements
- Linux or macOS with Python ≥ 3.6
- [PyTorch](https://pytorch.org/) ≥ 1.6 and [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation. Install them together at [pytorch.org](https://pytorch.org/) to make sure of this
- [Detection2](https://github.com/facebookresearch/detectron2). The installation instruction of Detection2 can be found [here](https://detectron2.readthedocs.io/en/latest/tutorials/install.html)
Install Python dependencies by running:
```shell
pip install -r requirements.txt
```
### Download street-view images
```shell
python main.py download_streetview_image --key GOOGLE_API_KEY --sec GOOGLE_API_SECRET
```
### Model training
```shell
cd detection && python main.py train --exp_name EXPERIMENT_NAME --[hyparameter] [value]
```
### Model inference
```shell
cd detection && python main.py test --deploy --deploy_meta_path [DEPLOY_META_PATH]
```
, where `DEPLOY_META_PATH` is a path to a csv file of the following format:
| save_path | panoid | heading | downloaded |
| --------- | ------ | ------- | ---------- |
| /dY/5I/l8/4NW89-ChFSP71GiA/344.png | dY5Il84NW89-ChFSP71GiA | -105.55188877562128 | True |
| ... | | |
Here, `panoid` and `heading` refer to the ID and heading of each street-view image.
## Analysis
To reproduce the figures and tables in our paper, run the `analysis/results.Rmd` script.
You'll need to download our camera and road network data [available here](https://storage.googleapis.com/scpl-surveillance/camera-data.zip) into a `data` directory in the root of this repository.
## Artifacts
### Annotations
Our collected camera annotations can be downloaded as follows:
| #images | # cameras | link | md5 |
| ------- | :---------: | ---- | --- |
| 3,155 | 1,696 | [download](https://storage.googleapis.com/scpl-surveillance/meta.csv) | `b2340143c6af2d1e6bfefd5001fd94c1` |
- *2021-5-20: This dataset is larger than the one reported in the paper as we include verified examples from our pilot.*
- *2021-5-18: The metadata can also be found in this repo as `./data/meta.csv`*.
### Pre-trained Models
Our pre-trained camera detection model can be downloaded as follows:
| architecture | Size | link | md5 |
| ------------ | ----- | ---- | --- |
| FasterRCNN | 472 Mb| [download](https://storage.googleapis.com/scpl-surveillance/model.zip) | `dba44ad36340d3291102e72b340568a0` |
- *2021-5-20: We updated the model architecture (FasterRCNN).*
### Detection and Road Network Data
| Size | link | md5 |
| ----- | ---- | --- |
| 97 Mb| [download](https://storage.googleapis.com/scpl-surveillance/camera-data.zip) | `6ceab577c53ba8dbe60b0ff1c8d5069a` |

252
analysis/figures.R Normal file
View file

@ -0,0 +1,252 @@
estimate_detection_metrics <- function(df, recall = 0.63) {
df %>%
left_join(city_data) %>%
transmute(
city,
type,
period,
road_network_length_km ,
m_per_pano,
pop_pano = 2 * road_network_length_km * 1000 / m_per_pano, # N
n_pano,
n_detection,
# detection rate (unadjusted detections per pano)
p_hat = n_detection / n_pano,
# infinite population sd:
p_hat_sd = sqrt(p_hat * (1 - p_hat) / n_pano),
# for finite population sd:
# p_hat_sd = sqrt((p_hat * (1 - p_hat) / n_pano) * ((pop_pano - n_pano) / (pop_pano - 1))),
# detection rate (detections per km counting both sides of the road per km)
est_detections_per_km = p_hat * (1000 / m_per_pano) * (2 / recall),
se_detections_per_km = p_hat_sd * (1000 / m_per_pano) * (2 / recall),
# detection count
est_detections = est_detections_per_km * road_network_length_km,
se_detections = se_detections_per_km * road_network_length_km
) %>%
ungroup() %>%
select(-p_hat, -p_hat_sd)
}
plot_camera_density <- function(df, legend = TRUE) {
if (legend) {
legend_position = "bottom"
} else {
legend_position = "none"
}
df %>%
ggplot(aes(x = city, y = est_detections_per_km, fill = type)) +
geom_col() +
geom_linerange(aes(
ymin = est_detections_per_km - 1.96*se_detections_per_km,
ymax = est_detections_per_km + 1.96*se_detections_per_km
)) +
scale_x_discrete(name = "") +
scale_y_continuous(
name = "Estimated cameras per km",
position = "right",
expand = expansion(mult = c(0, 0.1))
) +
scale_fill_discrete(name = "") +
coord_flip() +
theme(
panel.border = element_blank(),
axis.line = element_line(size = 1, color = "black"),
axis.title.x = element_text(family = "Helvetica", color = "black"),
axis.text = element_text(family = "Helvetica", color = "black"),
legend.position = legend_position,
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank()
)
}
load_road_network <- function(city_name){
stopifnot(city_name %in% city_data$city)
path <- here::here("data", "road_network", city_name, "edges.shp")
read_sf(path)
}
get_max_points <- function(df){
df %>%
select(geometry) %>%
st_cast("POINT") %>%
st_coordinates() %>%
as_tibble() %>%
summarize(
x_max = max(X),
x_min = min(X),
y_max = max(Y),
y_min = min(Y)
)
}
generate_sampled_point_map <- function(df, city_name){
# load road network
road_network <- load_road_network(city_name)
# get crs
road_network_crs <- st_crs(road_network) %>%
as.integer()
road_network_crs <- road_network_crs[1]
# find bounding coordinates of road network
bbox <- st_bbox(road_network)
# plot points
road_network %>%
ggplot() +
geom_sf(fill = "white", color = "gray", alpha = 0.6) +
geom_sf(
data = df %>%
filter(city == city_name) %>%
st_as_sf(coords = c("lon", "lat"),
# ensure same crs as road network
crs = road_network_crs,
agr = "constant"),
color = "blue", size = 0.2,
shape = 16, alpha = 1
) +
scale_x_continuous(expand = expansion(mult = c(0.02, 0.02))) +
scale_y_continuous(expand = expansion(mult = c(0, 0.02))) +
coord_sf(xlim = c(bbox$xmin, bbox$xmax), ylim = c(bbox$ymin, bbox$ymax)) +
theme(
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
panel.border = element_blank(),
legend.position = "bottom",
legend.text = element_text(size = 20)
)
}
generate_detected_point_map <- function(df, city_name){
# load road network
road_network <- load_road_network(city_name)
# get crs
road_network_crs <- st_crs(road_network) %>%
as.integer()
road_network_crs <- road_network_crs[1]
# find bounding coordinates of road network
bbox <- st_bbox(road_network)
# plot points
road_network %>%
ggplot() +
geom_sf(fill = "white", color = "gray", alpha = 0.6) +
geom_sf(
data = df %>%
filter(
city == city_name,
camera_count > 0
) %>%
st_as_sf(coords = c("lon", "lat"),
# ensure same crs as road network
crs = road_network_crs,
agr = "constant"),
color = "red", size = 0.5,
shape = 16, alpha = 1
) +
scale_x_continuous(expand = expansion(mult = c(0.02, 0.02))) +
scale_y_continuous(expand = expansion(mult = c(0, 0.02))) +
coord_sf(xlim = c(bbox$xmin, bbox$xmax), ylim = c(bbox$ymin, bbox$ymax)) +
theme(
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
panel.border = element_blank(),
legend.position = "bottom",
legend.text = element_text(size = 20)
)
}
annotate_points_with_census <- function(df, city_name, census_var){
stopifnot(census_var %in% c("income", "race"))
# define state, county using `city_data`
state <- city_data %>%
filter(city == city_name) %>%
pull(state)
county <- city_data %>%
filter(city == city_name) %>%
pull(county)
# specify variables
summary_vars <- "B03002_001" # total population
if (census_var == "income") {
vars <- c(Income = "B19113_001")
} else if (census_var == "race") {
vars <- c(White = "B03002_003") #non-Hispanic white
}
# get census data
if (city_name == "New York") {
state = "NY"
counties <- c("New York County", "Kings County", "Queens County",
"Bronx County", "Richmond County")
new_york <- purrr::map(
counties,
~ get_acs(
state = state,
county = .x,
geography = "block group",
variables = vars,
summary_var = summary_vars,
geometry = TRUE
)
)
df_census_block_group <- bind_rows(new_york)
} else{
if (city_name == "Washington") {
county <- NULL
}
df_census_block_group <- get_acs(
state = state,
county = county,
geography = "block group",
variables = vars,
summary_var = summary_vars,
geometry = TRUE
)
}
# add GIS features
df <- df %>%
filter(city == city_name) %>%
# ensure same coords as tidycensus
st_as_sf(
coords = c("lon", "lat"),
crs = 4269,
agr = "constant"
)
# annotate points with census data
if (census_var == "income") {
df <- st_join(
df,
df_census_block_group %>%
select(GEOID, NAME, median_household_income = estimate, geometry)
)
} else if (census_var == "race") {
df <- st_join(
df,
df_census_block_group %>%
transmute(
GEOID, NAME,
percentage_minority = (summary_est - estimate) / summary_est, geometry
)
)
}
df
}

280
analysis/results.Rmd Normal file
View file

@ -0,0 +1,280 @@
---
title: "results"
author: "Keniel Yao"
date: "4/26/2021"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r load-functions}
library(tidyverse)
library(sf)
library(glue)
library(tidycensus)
library(broom)
source(here::here('analysis', 'figures.R'))
theme_set(theme_bw(base_size = 14))
```
# Load data
```{r data}
df_pre <- read_csv(here::here("data", "cameras_2011-2015.csv")) %>%
mutate(period = "2011-2015")
df_post <- read_csv(here::here("data", "cameras_2015-2021.csv")) %>%
mutate(period = "2015-2021")
city_data <- read_csv(here::here("data", "city_metadata.csv"))
recall <- 0.63
```
# Figures
## Table 1: City metadata
```{r metadata}
city_data %>%
arrange(desc(type), desc(road_network_length_km)) %>%
transmute(
City = case_when(
city == "New York" ~ "New York City",
city == "Washington" ~ "Washington, D.C.",
TRUE ~ city
),
Population = formatC(round(population_census2010, -3), format = "d", big.mark=","),
`Area (sq. km)` = formatC(area_sqkm_census2010, format = "d", big.mark=","),
`Road length (km)` = formatC(road_network_length_km, format = "d", big.mark=",")
)
```
## Figure 5: Spatial distribution of sampled points
```{r sampled-points}
generate_sampled_point_map(df_post, "San Francisco")
generate_sampled_point_map(df_post, "Chicago")
generate_sampled_point_map(df_post, "New York")
```
## Table 3: Detection count, density and total camera estimates
```{r main-table}
bind_rows(
df_pre,
df_post
) %>%
group_by(city, period) %>%
summarize(
n_pano = n(),
n_detection = sum(camera_count)
) %>%
ungroup() %>%
estimate_detection_metrics(recall = recall) %>%
transmute(
rank = if_else(period == "2015-2021", est_detections_per_km, 0),
city = fct_reorder(city, - rank),
type,
period = if_else(period == "2015-2021", "2016-2020", period),
road_network_length_km = formatC(road_network_length_km, format = "d", big.mark=","),
m_per_pano = round(m_per_pano, 0),
n_detection,
est_detections_per_km = round(est_detections_per_km, 2),
se_detections_per_km = glue("({ round(se_detections_per_km, 2) })"),
est_detections = formatC(round(est_detections, -2), format = "d", big.mark=","),
se_detections = glue('({ formatC(round(se_detections, -2), format = "d", big.mark=",") })')
) %>%
pivot_wider(
id_cols = c(city, type, road_network_length_km, m_per_pano),
names_from = period,
values_from = c(n_detection, est_detections_per_km, se_detections_per_km, est_detections, se_detections)
) %>%
arrange(desc(type), city) %>%
mutate(
across(ends_with("2011-2015"), ~ str_replace_na(.x, "-")),
city = as.character(city)
) %>%
select(
city, road_network_length_km, m_per_pano,
`n_detection_2011-2015`, `n_detection_2016-2020`,
`est_detections_per_km_2011-2015`, `se_detections_per_km_2011-2015`,
`est_detections_per_km_2016-2020`, `se_detections_per_km_2016-2020`,
`est_detections_2011-2015`, `se_detections_2011-2015`,
`est_detections_2016-2020`, `se_detections_2016-2020`
)
```
## Figure 9: Maps of detected points
```{r detected-points}
generate_detected_point_map(df_post, "San Francisco")
generate_detected_point_map(df_post, "Chicago")
generate_detected_point_map(df_post, "New York")
```
## Figure 10: Pre-post estimated camera density
```{r density-plot}
df_post %>%
group_by(city, period) %>%
summarize(
n_pano = n(),
n_detection = sum(camera_count)
) %>%
ungroup() %>%
estimate_detection_metrics(recall = recall) %>%
mutate(
city = case_when(
city == "New York" ~ "New York City",
city == "Washington" ~ "Washington, D.C.",
T ~ city
),
type = factor(type, c("Global", "US")),
city = fct_reorder(city, est_detections_per_km)
) %>%
plot_camera_density(legend = FALSE)
```
## Figure 11: Zone identification rate
```{r annotate-race-data}
us_cities <- city_data %>%
filter(type == "US") %>%
pull(city)
df_post_w_race <- us_cities %>%
map_dfr(~ annotate_points_with_census(df_post, .x, "race")) %>%
st_drop_geometry() %>%
mutate(
city = case_when(
city == "New York" ~ "New York City",
city == "Washington" ~ "Washington D.C.",
TRUE ~ city
),
city = factor(
city,
c("New York City", "San Francisco", "Boston", "Chicago", "Philadelphia",
"Washington D.C.", "Los Angeles", "Baltimore", "Seattle", "Milwaukee")
),
zone_type = str_to_title(zone_type),
zone_type = factor(
zone_type,
c("Public", "Residential", "Industrial", "Commercial", "Mixed"),
exclude = NULL
),
zone_type = fct_explicit_na(zone_type, na_level = "Unknown"),
camera_count = as.integer(camera_count)
)
```
```{r zone-all}
df_post_w_race %>%
filter(zone_type != "Unknown") %>%
group_by(zone_type) %>%
summarize(
total = n(),
total_identified = sum(camera_count, na.rm=T),
perc_detected = sum(total_identified) / total
) %>%
mutate(se = sqrt(perc_detected * (1 - perc_detected) / total)) %>%
ungroup() %>%
mutate(
zone_type = fct_relevel(
zone_type,
c("Mixed", "Commercial", "Industrial", "Public", "Residential", "Unknown")
),
zone_type = fct_rev(zone_type)
) %>%
ggplot(aes(x = zone_type, y = perc_detected)) +
geom_point() +
geom_pointrange(aes(
ymin = perc_detected - 1.96 * se,
ymax = perc_detected + 1.96 * se
)) +
scale_x_discrete(name = "") +
scale_y_continuous(
name = "Identification rate",
position = "right",
labels = scales::percent_format(accuracy = 0.01),
expand = expansion(mult = c(0, 0.1)),
limits = c(0, NA)
) +
coord_flip() +
theme(
panel.grid = element_blank(),
panel.border = element_blank(),
axis.text = element_text(family = "Helvetica", color = "black"),
axis.title.x = element_text(family = "Helvetica", color = "black"),
axis.line = element_line(size = 0.5, color = "black"),
axis.ticks = element_line(size = 0.5, color = "black")
)
```
## Figure 12: Race identification rate
```{r race-all}
df_post_w_race %>%
ggplot(aes(x = percentage_minority, y = camera_count)) +
geom_smooth(
method = "lm",
formula = y ~ poly(x, degree = 2),
se = TRUE
) +
scale_x_continuous(
name = "Minority share of population (census block group)",
expand = expansion(mult = c(0, 0.05)),
labels = scales::percent_format(accuracy = 1)
) +
scale_y_continuous(
name = "Identification rate",
limits = c(0, NA),
oob = scales::squish,
expand = expansion(mult = c(0, 0.1)),
labels = scales::percent_format(accuracy = 0.1)
) +
theme(
panel.grid = element_blank(),
panel.border = element_blank(),
axis.text = element_text(family = "Helvetica", color = "black"),
axis.title = element_text(family = "Helvetica", color = "black"),
axis.line = element_line(size = 0.5, color = "black"),
axis.ticks.x = element_line(size = 0.5, color = "black"),
axis.ticks.y = element_line(size = 0.5, color = "black")
)
```
## Table 4: Regression output
```{r regression-model}
# reference level:
# - city: None (interceptless)
# - zone_type: residential
model_lm_poly <- df_post_w_race %>%
filter(zone_type != "Unknown") %>%
mutate(
detected = if_else(camera_count > 0, 1, 0),
zone_type = fct_relevel(
zone_type,
c("Residential", "Public", "Commercial", "Industrial", "Mixed", "Unknown")
)
) %>%
lm(detected ~ city-1 + zone_type + percentage_minority + I(percentage_minority^2), data = .)
tidy(model_lm_poly) %>%
filter(!str_detect(term, "^city")) %>%
transmute(
term,
estimate = formatC(estimate, format = "f"),
std.error = formatC(std.error, format = "f")
)
```

0
data/image/.keepme Normal file
View file

22
data/info.yaml Normal file
View file

@ -0,0 +1,22 @@
author: Hao
class_names:
- Directed Camera
- Dome Camera
date: 2021-04-06
description: Camera detection dataset
name: camera-detection
sources:
- channels: null
date: null
height: 640
name: gsv
resolution: ''
width: 640
task:
- object detection
version:
description: Camera detection dataset
major: 1
minor: 0
patch: 0
version_str: 1.0.0

3156
data/meta.csv Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,13 @@
import pandas as pd
import os
from .version import Version
from .base import BaseDataset
from .info import DatasetInfo
from . import constants as C
def get_dataset(split="train"):
meta = pd.read_csv("../data/meta.csv")
info = DatasetInfo.load("../data/info.yaml")
return BaseDataset(info, meta)[split]

77
detection/data/base.py Normal file
View file

@ -0,0 +1,77 @@
import numpy as np
import os
from PIL import Image
from tqdm import tqdm
from torch.utils.data import Dataset
from .info import DatasetInfoMixin
from .detection import DetectionMixin
from .util import _is_path
class BaseDataset(Dataset,
DatasetInfoMixin,
DetectionMixin):
def __init__(self,
info,
meta,
split=None,
):
DatasetInfoMixin.__init__(self,
info=info,
meta=meta,
split=split)
@staticmethod
def _load_image_file(file_path):
if not _is_path(file_path):
return None
image_pil = Image.open(file_path).convert('RGB')
image_np = np.array(image_pil)
return image_np
@staticmethod
def _load_pickle_file(file_path):
with open(file_path, 'rb') as f:
data = pickle.load(f)
return data
@staticmethod
def _load_numpy_file(file_path):
data = np.load(file_path)
return data
@classmethod
def _load_single_image(cls, sample_dict):
new_sample_dict = {}
for k, v in sample_dict.items():
if k.endswith("image_path"):
new_sample_dict[k.replace(
"_image_path", "_image")] = cls._load_image_file(v)
else:
new_sample_dict[k] = v
return new_sample_dict
def __getitem__(self, index):
if isinstance(index, str):
return self.get_split(index)
elif isinstance(index, slice):
return self.slice(index)
sample = self._meta.iloc[index].to_dict()
# Replace Nan
# TODO
# Load Images
sample = self._load_single_image(sample)
# Apply Format
if isinstance(self._format, list):
sample = {k: v for k, v in sample.items() if k in self._format}
elif isinstance(self._format, dict):
sample = {self._format[k]: v for k,
v in sample.items() if k in self._format}
return sample

View file

@ -0,0 +1 @@
ANNOTATION_COLUMN = "annotations"

View file

@ -0,0 +1,79 @@
import torch
from torch.utils.data import Dataset
from .info import DatasetInfoMixin
from . import constants as C
def trivial_batch_collator(batch):
return batch
class DetectionMixin:
def detection_dataloader(self,
augmentations=None,
is_train=True,
use_instance_mask=False,
image_path_col=None,
**kwargs):
from detectron2.data import DatasetMapper
if augmentations is None:
augmentations = []
mapper = DatasetMapper(is_train=is_train,
image_format="RGB",
use_instance_mask=use_instance_mask,
instance_mask_format="bitmask",
augmentations=augmentations
)
return DetectionDataset(info=self.info,
meta=self.meta,
split=self.split,
image_path_col=image_path_col,
mapper=mapper) \
.dataloader(**kwargs)
class DetectionDataset(Dataset, DatasetInfoMixin):
"""
Dataset class that provides standard Detectron2 model input format:
https://detectron2.readthedocs.io/en/latest/tutorials/models.html?highlight=input%20format#model-input-format
Notice the annotation column in the meta file need to follow Detectron2's
standard dataset dict format:
https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html#standard-dataset-dicts
"""
def __init__(self, info, meta, mapper, split=None, image_path_col=None):
if C.ANNOTATION_COLUMN not in meta.columns:
raise ValueError(f"[{C.ANNOTATION_COLUMN}] column not found in the meta data.")
if image_path_col is None:
image_path_cols = [
c for c in meta.columns if c.endswith("image_path")]
if len(image_path_cols) == 0:
raise ValueError(
"No image path column found in the meta data. Please check meta data and use `image_path_col` argument to specify the column.")
elif len(image_path_cols) > 1:
raise ValueError(
"Multiple image path columns found in the meta data. Please use `image_path_col` argument to specify the column.")
else:
image_path_col = image_path_cols[0]
meta = meta.rename(columns={image_path_col: "file_name"})
self.mapper = mapper
DatasetInfoMixin.__init__(self,
info=info,
meta=meta,
split=split)
def __getitem__(self, index):
sample = self._meta.iloc[index].to_dict()
sample[C.ANNOTATION_COLUMN] = eval(sample[C.ANNOTATION_COLUMN])
return self.mapper(sample)
def dataloader(self, **kwargs):
return torch.utils.data.DataLoader(
self,
collate_fn=trivial_batch_collator,
**kwargs)

178
detection/data/info.py Normal file
View file

@ -0,0 +1,178 @@
import yaml
import dataclasses
import pandas as pd
from copy import deepcopy
from dataclasses import asdict, dataclass, field
from typing import List, Optional, Union
from .version import Version
class BaseInfo:
@classmethod
def from_dict(cls, dataset_info_dict: dict) -> "DatasetInfo":
field_names = set(f.name for f in dataclasses.fields(cls))
return cls(
**{k: v for k, v in dataset_info_dict.items() if k in field_names})
@dataclass
class ImageSourceInfo(BaseInfo):
# Required Fields
name: str = field(default_factory=str)
height: int = field(default_factory=int)
width: int = field(default_factory=int)
date: str = field(default_factory=str)
# Optional Fields
channels: Optional[list] = None
resolution: Optional[str] = field(default_factory=str)
@dataclass
class DatasetInfo(BaseInfo):
name: str = field(default_factory=str)
description: str = field(default_factory=str)
author: str = field(default_factory=str)
version: Union[str, Version] = field(default_factory=Version)
date: str = field(default_factory=str)
task: List[str] = field(default_factory=list)
class_names: List[str] = field(default_factory=list)
sources: List[ImageSourceInfo] = field(default_factory=ImageSourceInfo)
def __post_init__(self):
if self.version is not None and not isinstance(self.version, Version):
if isinstance(self.version, str):
self.version = Version(self.version)
else:
self.version = Version.from_dict(self.version)
if self.sources is not None and not all(
[isinstance(s, ImageSourceInfo) for s in self.sources]):
sources = []
for source in self.sources:
if isinstance(source, ImageSourceInfo):
pass
elif isinstance(source, dict):
source = ImageSourceInfo.from_dict(source)
else:
raise ValueError(
f"Unknown type for ImageSourceInfo: {type(source)}")
sources.append(source)
self.sources = sources
@classmethod
def load(cls, path):
with open(path, "r") as f:
yaml_dict = yaml.load(f, Loader=yaml.SafeLoader)
return cls.from_dict(yaml_dict)
def save(self, path):
with open(path, "w") as f:
yaml.dump(asdict(self), f)
def dump(self, fileobj):
yaml.dump(asdict(self), fileobj)
class DatasetInfoMixin:
def __init__(self,
info: DatasetInfo,
meta: pd.DataFrame,
split: Optional[str] = None):
self._info = info
self._meta = meta
self._split = split
self._format = None
if self._split is not None and self._split != 'all':
self._meta.query(f"split == '{self._split}'", inplace=True)
def __len__(self):
return len(self._meta)
def __repr__(self):
features = self.features
if len(features) < 5:
features_repr = "[" + ", ".join(features) + "]"
else:
features_repr = "[" + \
", ".join(features[:3] + ["...", features[-1]]) + "]"
return f"{type(self).__name__}(split: {self.split}, version: {self.version}, features[{len(features)}]: {features_repr}, samples: {self.__len__()})"
def get_split(self, split):
if split == "all":
return self
elif split in self.splits:
result = self.query(f"split == '{split}'")
result._split = split
return result
else:
raise ValueError(
f"Unknown split {split}. Split has to be one of {list(self.splits.keys())}")
def slice(self, expr):
result = deepcopy(self)
result._meta = result._meta.iloc[expr]
return result
def query(self, expr):
result = deepcopy(self)
result._meta = result._meta.query(expr)
return result
def filter(self, func):
result = deepcopy(self)
result._meta = result._meta[result._meta.apply(func, 1)].reset_index()
return result
def set_format(self, columns: Union[dict, list]):
self._format = columns
def reset_format(self):
self.set_format(None)
def value_counts(self, value):
return self._meta[value].value_counts().to_dict()
@property
def info(self):
return self._info
@property
def meta(self):
return self._meta.copy()
@property
def name(self):
return self._info.name
@property
def version(self):
return self._info.version
@property
def description(self):
return self._info.description
@property
def author(self):
return self._info.author
@property
def sources(self):
return [s.name for s in self._info.sources]
@property
def split(self):
if self._split is None:
return "all"
return self._split
@property
def splits(self):
return self.value_counts("split")
@property
def features(self):
features = list(self._meta.columns)
return features

6
detection/data/util.py Normal file
View file

@ -0,0 +1,6 @@
from pathlib import Path, PosixPath
def _is_path(file_path):
return isinstance(file_path, (str, PosixPath))

99
detection/data/version.py Normal file
View file

@ -0,0 +1,99 @@
""" Adapted from
https://github.com/huggingface/datasets/blob/master/src/datasets/utils/version.py
"""
import dataclasses
import re
from dataclasses import dataclass
_VERSION_TMPL = r"^(?P<major>{v})" r"\.(?P<minor>{v})" r"\.(?P<patch>{v})$"
_VERSION_WILDCARD_REG = re.compile(_VERSION_TMPL.format(v=r"\d+|\*"))
_VERSION_RESOLVED_REG = re.compile(_VERSION_TMPL.format(v=r"\d+"))
@dataclass()
class Version:
"""Dataset version MAJOR.MINOR.PATCH.
Args:
version_str: string. Eg: "1.2.3".
description: string, a description of what is new in this version.
"""
version_str: str
description: str = None
major: str = None
minor: str = None
patch: str = None
def __post_init__(self):
self.major, self.minor, self.patch = _str_to_version(self.version_str)
def __repr__(self):
return "{}.{}.{}".format(*self.tuple)
@property
def tuple(self):
return self.major, self.minor, self.patch
def _validate_operand(self, other):
if isinstance(other, str):
return Version(other)
elif isinstance(other, Version):
return other
raise AssertionError("{} (type {}) cannot be compared to version.".format(other, type(other)))
def __hash__(self):
return hash(self.tuple)
def __eq__(self, other):
other = self._validate_operand(other)
return self.tuple == other.tuple
def __ne__(self, other):
other = self._validate_operand(other)
return self.tuple != other.tuple
def __lt__(self, other):
other = self._validate_operand(other)
return self.tuple < other.tuple
def __le__(self, other):
other = self._validate_operand(other)
return self.tuple <= other.tuple
def __gt__(self, other):
other = self._validate_operand(other)
return self.tuple > other.tuple
def __ge__(self, other):
other = self._validate_operand(other)
return self.tuple >= other.tuple
def match(self, other_version):
"""Returns True if other_version matches.
Args:
other_version: string, of the form "x[.y[.x]]" where {x,y,z} can be a
number or a wildcard.
"""
major, minor, patch = _str_to_version(other_version, allow_wildcard=True)
return major in [self.major, "*"] and minor in [self.minor, "*"] and patch in [self.patch, "*"]
@classmethod
def from_dict(cls, dic):
field_names = set(f.name for f in dataclasses.fields(cls))
return cls(**{k: v for k, v in dic.items() if k in field_names})
def _str_to_version(version_str, allow_wildcard=False):
"""Return the tuple (major, minor, patch) version extracted from the str."""
reg = _VERSION_WILDCARD_REG if allow_wildcard else _VERSION_RESOLVED_REG
res = reg.match(version_str)
if not res:
msg = "Invalid version '{}'. Format should be x.y.z".format(version_str)
if allow_wildcard:
msg += " with {x,y,z} being digits or wildcard."
else:
msg += " with {x,y,z} being digits."
raise ValueError(msg)
return tuple(v if v == "*" else int(v) for v in [res.group("major"), res.group("minor"), res.group("patch")])

View file

@ -0,0 +1,2 @@
from .loss import get_loss_fn
from .evaluator import *

View file

@ -0,0 +1,4 @@
# Adapted from https://github.com/rafaelpadilla/Object-Detection-Metrics
from .evaluator import Evaluator
from .bbox import BoundingBox, BoundingBoxes
from .utils import BBType, BBFormat, CoordinatesType

View file

@ -0,0 +1,232 @@
from .utils import *
class BoundingBox:
def __init__(self,
imageName,
classId,
x,
y,
w,
h,
typeCoordinates=CoordinatesType.Absolute,
imgSize=None,
bbType=BBType.GroundTruth,
classConfidence=None,
format=BBFormat.XYWH):
"""Constructor.
Args:
imageName: String representing the image name.
classId: String value representing class id.
x: Float value representing the X upper-left coordinate of the bounding box.
y: Float value representing the Y upper-left coordinate of the bounding box.
w: Float value representing the width bounding box.
h: Float value representing the height bounding box.
typeCoordinates: (optional) Enum (Relative or Absolute) represents if the bounding box
coordinates (x,y,w,h) are absolute or relative to size of the image. Default:'Absolute'.
imgSize: (optional) 2D vector (width, height)=>(int, int) represents the size of the
image of the bounding box. If typeCoordinates is 'Relative', imgSize is required.
bbType: (optional) Enum (Groundtruth or Detection) identifies if the bounding box
represents a ground truth or a detection. If it is a detection, the classConfidence has
to be informed.
classConfidence: (optional) Float value representing the confidence of the detected
class. If detectionType is Detection, classConfidence needs to be informed.
format: (optional) Enum (BBFormat.XYWH or BBFormat.XYX2Y2) indicating the format of the
coordinates of the bounding boxes. BBFormat.XYWH: <left> <top> <width> <height>
BBFormat.XYX2Y2: <left> <top> <right> <bottom>.
"""
self._imageName = imageName
self._typeCoordinates = typeCoordinates
if typeCoordinates == CoordinatesType.Relative and imgSize is None:
raise IOError(
'Parameter \'imgSize\' is required. It is necessary to inform the image size.')
if bbType == BBType.Detected and classConfidence is None:
raise IOError(
'For bbType=\'Detection\', it is necessary to inform the classConfidence value.')
# if classConfidence != None and (classConfidence < 0 or classConfidence > 1):
# raise IOError('classConfidence value must be a real value between 0 and 1. Value: %f' %
# classConfidence)
self._classConfidence = classConfidence
self._bbType = bbType
self._classId = classId
self._format = format
# If relative coordinates, convert to absolute values
# For relative coords: (x,y,w,h)=(X_center/img_width ,
# Y_center/img_height)
if (typeCoordinates == CoordinatesType.Relative):
(self._x, self._y, self._w, self._h) = convertToAbsoluteValues(
imgSize, (x, y, w, h))
self._width_img = imgSize[0]
self._height_img = imgSize[1]
if format == BBFormat.XYWH:
self._x2 = self._w
self._y2 = self._h
self._w = self._x2 - self._x
self._h = self._y2 - self._y
else:
raise IOError(
'For relative coordinates, the format must be XYWH (x,y,width,height)')
# For absolute coords: (x,y,w,h)=real bb coords
else:
self._x = x
self._y = y
if format == BBFormat.XYWH:
self._w = w
self._h = h
self._x2 = self._x + self._w
self._y2 = self._y + self._h
else: # format == BBFormat.XYX2Y2: <left> <top> <right> <bottom>.
self._x2 = w
self._y2 = h
self._w = self._x2 - self._x
self._h = self._y2 - self._y
if imgSize is None:
self._width_img = None
self._height_img = None
else:
self._width_img = imgSize[0]
self._height_img = imgSize[1]
def getAbsoluteBoundingBox(self, format=BBFormat.XYWH):
if format == BBFormat.XYWH:
return (self._x, self._y, self._w, self._h)
elif format == BBFormat.XYX2Y2:
return (self._x, self._y, self._x2, self._y2)
def getRelativeBoundingBox(self, imgSize=None):
if imgSize is None and self._width_img is None and self._height_img is None:
raise IOError(
'Parameter \'imgSize\' is required. It is necessary to inform the image size.')
if imgSize is not None:
return convertToRelativeValues(
(imgSize[0], imgSize[1]), (self._x, self._x2, self._y, self._y2))
else:
return convertToRelativeValues(
(self._width_img, self._height_img), (self._x, self._x2, self._y, self._y2))
def getImageName(self):
return self._imageName
def getConfidence(self):
return self._classConfidence
def getFormat(self):
return self._format
def getClassId(self):
return self._classId
def getImageSize(self):
return (self._width_img, self._height_img)
def getCoordinatesType(self):
return self._typeCoordinates
def getBBType(self):
return self._bbType
@staticmethod
def compare(det1, det2):
det1BB = det1.getAbsoluteBoundingBox()
det1ImgSize = det1.getImageSize()
det2BB = det2.getAbsoluteBoundingBox()
det2ImgSize = det2.getImageSize()
if det1.getClassId() == det2.getClassId() and \
det1.classConfidence == det2.classConfidenc() and \
det1BB[0] == det2BB[0] and \
det1BB[1] == det2BB[1] and \
det1BB[2] == det2BB[2] and \
det1BB[3] == det2BB[3] and \
det1ImgSize[0] == det1ImgSize[0] and \
det2ImgSize[1] == det2ImgSize[1]:
return True
return False
@staticmethod
def clone(boundingBox):
absBB = boundingBox.getAbsoluteBoundingBox(format=BBFormat.XYWH)
# return (self._x,self._y,self._x2,self._y2)
newBoundingBox = BoundingBox(
boundingBox.getImageName(),
boundingBox.getClassId(),
absBB[0],
absBB[1],
absBB[2],
absBB[3],
typeCoordinates=boundingBox.getCoordinatesType(),
imgSize=boundingBox.getImageSize(),
bbType=boundingBox.getBBType(),
classConfidence=boundingBox.getConfidence(),
format=BBFormat.XYWH)
return newBoundingBox
class BoundingBoxes:
def __init__(self):
self._boundingBoxes = []
def addBoundingBox(self, bb):
self._boundingBoxes.append(bb)
def removeBoundingBox(self, _boundingBox):
for d in self._boundingBoxes:
if BoundingBox.compare(d, _boundingBox):
del self._boundingBoxes[d]
return
def removeAllBoundingBoxes(self):
self._boundingBoxes = []
def getBoundingBoxes(self):
return self._boundingBoxes
def getBoundingBoxByClass(self, classId):
boundingBoxes = []
for d in self._boundingBoxes:
if d.getClassId() == classId: # get only specified bounding box type
boundingBoxes.append(d)
return boundingBoxes
def getClasses(self):
classes = []
for d in self._boundingBoxes:
c = d.getClassId()
if c not in classes:
classes.append(c)
return classes
def getBoundingBoxesByType(self, bbType):
# get only specified bb type
return [d for d in self._boundingBoxes if d.getBBType() == bbType]
def getBoundingBoxesByImageName(self, imageName):
# get only specified bb type
return [d for d in self._boundingBoxes if d.getImageName() == imageName]
def count(self, bbType=None):
if bbType is None: # Return all bounding boxes
return len(self._boundingBoxes)
count = 0
for d in self._boundingBoxes:
if d.getBBType() == bbType: # get only specified bb type
count += 1
return count
def clone(self):
newBoundingBoxes = BoundingBoxes()
for d in self._boundingBoxes:
det = BoundingBox.clone(d)
newBoundingBoxes.addBoundingBox(det)
return newBoundingBoxes
def drawAllBoundingBoxes(self, image, imageName):
bbxes = self.getBoundingBoxesByImageName(imageName)
for bb in bbxes:
if bb.getBBType() == BBType.GroundTruth: # if ground truth