Create filtered dataset
This commit is contained in:
parent
6bfb80c88e
commit
cf0a761a73
2 changed files with 113 additions and 5 deletions
97
01-dataset-tools.py
Normal file
97
01-dataset-tools.py
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
"""
|
||||||
|
author: ruben van de ven
|
||||||
|
"""
|
||||||
|
|
||||||
|
import ast
|
||||||
|
import os
|
||||||
|
import fire
|
||||||
|
from pathlib import Path
|
||||||
|
from detection.data import get_dataset
|
||||||
|
import pandas as pd
|
||||||
|
import PIL
|
||||||
|
from PIL import Image, ImageDraw
|
||||||
|
import logging
|
||||||
|
import coloredlogs
|
||||||
|
|
||||||
|
from detection.data.base import BaseDataset
|
||||||
|
from detection.data.info import DatasetInfo
|
||||||
|
|
||||||
|
coloredlogs.install()
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# inline get_dataset("train") because of hard coded paths
|
||||||
|
|
||||||
|
|
||||||
|
split = "train"
|
||||||
|
meta = pd.read_csv("./data/meta.csv")
|
||||||
|
meta['image_path'] = [f'data/image/{panoid}_{heading}.jpg' for panoid, heading in zip(meta['panoid'], meta['heading'])]
|
||||||
|
|
||||||
|
info = DatasetInfo.load("./data/info.yaml")
|
||||||
|
duplicates = pd.read_csv(
|
||||||
|
"./data/duplicates.txt",
|
||||||
|
sep=' ',
|
||||||
|
names=['hash', 'path'])
|
||||||
|
# duplicates['panoid'] = duplicates['path'].str[11:-4]
|
||||||
|
|
||||||
|
meta = meta[~meta['image_path'].isin(duplicates['path'])]
|
||||||
|
|
||||||
|
dataset = BaseDataset(info, meta)[split]
|
||||||
|
|
||||||
|
|
||||||
|
def save_non_empty():
|
||||||
|
meta.to_csv("./data/non-empty-meta.csv")
|
||||||
|
print("Saved to", "./data/non-empty-meta.csv")
|
||||||
|
|
||||||
|
|
||||||
|
def render_images(dirname="/tmp/surveilling-surveillance/annotations"):
|
||||||
|
save_dir = Path(dirname)
|
||||||
|
save_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
#generally used with dataset.detection_dataloader(...)
|
||||||
|
print(meta)
|
||||||
|
# filter non empty on a stringified list
|
||||||
|
non_empty = meta[meta['annotations'] != "[]"]
|
||||||
|
|
||||||
|
|
||||||
|
stat_images = 0
|
||||||
|
stat_annotated_images = 0
|
||||||
|
stat_annotations = 0
|
||||||
|
|
||||||
|
for i, entry in non_empty.iterrows():
|
||||||
|
# category_id, bbox, bbox_mode, segmentation
|
||||||
|
annotations = ast.literal_eval(entry.annotations)
|
||||||
|
image_path = entry.image_path # f'./data/image/{entry.panoid}_{entry.heading}.jpg'
|
||||||
|
|
||||||
|
# print([a['bbox'] for a in annotations],os.path.exists(image_path))
|
||||||
|
try:
|
||||||
|
image = Image.open(image_path)
|
||||||
|
except PIL.UnidentifiedImageError:
|
||||||
|
logger.error(f"Invalid image: {image_path}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
stat_images += 1
|
||||||
|
|
||||||
|
for y, ann in enumerate(annotations):
|
||||||
|
draw = ImageDraw.Draw(image)
|
||||||
|
draw.rectangle(ann['bbox'], outline='red', width=2)
|
||||||
|
crop = image.crop(ann['bbox'])
|
||||||
|
|
||||||
|
if 0 in crop.size:
|
||||||
|
logger.warning(f"Invalid crop {crop.size} using bbox {ann['bbox']} in {image_path}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
stat_annotations += 1
|
||||||
|
if y == 0:
|
||||||
|
stat_annotated_images += 1
|
||||||
|
|
||||||
|
crop.save(save_dir / f'crop-{entry.panoid}_{entry.heading}_{y}.jpg')
|
||||||
|
|
||||||
|
image.save(save_dir / f'{entry.panoid}_{entry.heading}.jpg')
|
||||||
|
|
||||||
|
|
||||||
|
print(f"Total {stat_images} images and {stat_annotations} annotations.")
|
||||||
|
print(f"{stat_annotated_images} images with annotations, {stat_images - stat_annotated_images} images without annotations")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
fire.Fire()
|
21
README.md
21
README.md
|
@ -21,26 +21,37 @@ This is the code base of our [Surveilling Surveillance](https://arxiv.org/abs/21
|
||||||
- [PyTorch](https://pytorch.org/) ≥ 1.6 and [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation. Install them together at [pytorch.org](https://pytorch.org/) to make sure of this
|
- [PyTorch](https://pytorch.org/) ≥ 1.6 and [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation. Install them together at [pytorch.org](https://pytorch.org/) to make sure of this
|
||||||
- [Detection2](https://github.com/facebookresearch/detectron2). The installation instruction of Detection2 can be found [here](https://detectron2.readthedocs.io/en/latest/tutorials/install.html)
|
- [Detection2](https://github.com/facebookresearch/detectron2). The installation instruction of Detection2 can be found [here](https://detectron2.readthedocs.io/en/latest/tutorials/install.html)
|
||||||
|
|
||||||
Install Python dependencies by running:
|
Install Python dependencies by running (being a bit illegal with the detectron2 dependency due to it not supporting poetry):
|
||||||
```shell
|
```shell
|
||||||
pip install -r requirements.txt
|
poetry install
|
||||||
|
git clone https://github.com/facebookresearch/detectron2.git
|
||||||
|
poetry run python -m pip install -e detectron2
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Download street-view images
|
### Download street-view images
|
||||||
```shell
|
```shell
|
||||||
python main.py download_streetview_image --key GOOGLE_API_KEY --sec GOOGLE_API_SECRET
|
python main.py download_streetview_image --key GOOGLE_API_KEY --sec GOOGLE_API_SECRET
|
||||||
```
|
```
|
||||||
|
|
||||||
|
By now, lots of Steetview images from the original dataset have become unavailable. We can filter these by scanning for duplicates (as these now downloaded as to the same error image)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
find data/ ! -empty -type f -exec md5sum {} + | sort | uniq -w32 -dD > data/duplicates.txt
|
||||||
|
poetry run python 01-dataset-tools.py save_non_empty
|
||||||
|
```
|
||||||
|
|
||||||
### Model training
|
### Model training
|
||||||
```shell
|
```shell
|
||||||
cd detection && python main.py train --exp_name EXPERIMENT_NAME --[hyparameter] [value]
|
poetry run python detection/main.py train --exp_name EXPERIMENT_NAME --[hyparameter] [value]
|
||||||
```
|
```
|
||||||
|
|
||||||
### Model inference
|
### Model inference
|
||||||
```shell
|
```shell
|
||||||
cd detection && python main.py test --deploy --deploy_meta_path [DEPLOY_META_PATH]
|
poetry run python detection/main.py test CHECKPOINT
|
||||||
```
|
```
|
||||||
, where `DEPLOY_META_PATH` is a path to a csv file of the following format:
|
**[For now --deploy-meta-path is broken]** , where `DEPLOY_META_PATH` is a path to a csv file of the following format:
|
||||||
|
|
||||||
| save_path | panoid | heading | downloaded |
|
| save_path | panoid | heading | downloaded |
|
||||||
| --------- | ------ | ------- | ---------- |
|
| --------- | ------ | ------- | ---------- |
|
||||||
|
|
Loading…
Reference in a new issue