Create filtered dataset
This commit is contained in:
parent
6bfb80c88e
commit
cf0a761a73
2 changed files with 113 additions and 5 deletions
97
01-dataset-tools.py
Normal file
97
01-dataset-tools.py
Normal file
|
@ -0,0 +1,97 @@
|
|||
"""
|
||||
author: ruben van de ven
|
||||
"""
|
||||
|
||||
import ast
|
||||
import os
|
||||
import fire
|
||||
from pathlib import Path
|
||||
from detection.data import get_dataset
|
||||
import pandas as pd
|
||||
import PIL
|
||||
from PIL import Image, ImageDraw
|
||||
import logging
|
||||
import coloredlogs
|
||||
|
||||
from detection.data.base import BaseDataset
|
||||
from detection.data.info import DatasetInfo
|
||||
|
||||
coloredlogs.install()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# inline get_dataset("train") because of hard coded paths
|
||||
|
||||
|
||||
split = "train"
|
||||
meta = pd.read_csv("./data/meta.csv")
|
||||
meta['image_path'] = [f'data/image/{panoid}_{heading}.jpg' for panoid, heading in zip(meta['panoid'], meta['heading'])]
|
||||
|
||||
info = DatasetInfo.load("./data/info.yaml")
|
||||
duplicates = pd.read_csv(
|
||||
"./data/duplicates.txt",
|
||||
sep=' ',
|
||||
names=['hash', 'path'])
|
||||
# duplicates['panoid'] = duplicates['path'].str[11:-4]
|
||||
|
||||
meta = meta[~meta['image_path'].isin(duplicates['path'])]
|
||||
|
||||
dataset = BaseDataset(info, meta)[split]
|
||||
|
||||
|
||||
def save_non_empty():
|
||||
meta.to_csv("./data/non-empty-meta.csv")
|
||||
print("Saved to", "./data/non-empty-meta.csv")
|
||||
|
||||
|
||||
def render_images(dirname="/tmp/surveilling-surveillance/annotations"):
|
||||
save_dir = Path(dirname)
|
||||
save_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
#generally used with dataset.detection_dataloader(...)
|
||||
print(meta)
|
||||
# filter non empty on a stringified list
|
||||
non_empty = meta[meta['annotations'] != "[]"]
|
||||
|
||||
|
||||
stat_images = 0
|
||||
stat_annotated_images = 0
|
||||
stat_annotations = 0
|
||||
|
||||
for i, entry in non_empty.iterrows():
|
||||
# category_id, bbox, bbox_mode, segmentation
|
||||
annotations = ast.literal_eval(entry.annotations)
|
||||
image_path = entry.image_path # f'./data/image/{entry.panoid}_{entry.heading}.jpg'
|
||||
|
||||
# print([a['bbox'] for a in annotations],os.path.exists(image_path))
|
||||
try:
|
||||
image = Image.open(image_path)
|
||||
except PIL.UnidentifiedImageError:
|
||||
logger.error(f"Invalid image: {image_path}")
|
||||
continue
|
||||
|
||||
stat_images += 1
|
||||
|
||||
for y, ann in enumerate(annotations):
|
||||
draw = ImageDraw.Draw(image)
|
||||
draw.rectangle(ann['bbox'], outline='red', width=2)
|
||||
crop = image.crop(ann['bbox'])
|
||||
|
||||
if 0 in crop.size:
|
||||
logger.warning(f"Invalid crop {crop.size} using bbox {ann['bbox']} in {image_path}")
|
||||
continue
|
||||
|
||||
stat_annotations += 1
|
||||
if y == 0:
|
||||
stat_annotated_images += 1
|
||||
|
||||
crop.save(save_dir / f'crop-{entry.panoid}_{entry.heading}_{y}.jpg')
|
||||
|
||||
image.save(save_dir / f'{entry.panoid}_{entry.heading}.jpg')
|
||||
|
||||
|
||||
print(f"Total {stat_images} images and {stat_annotations} annotations.")
|
||||
print(f"{stat_annotated_images} images with annotations, {stat_images - stat_annotated_images} images without annotations")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire()
|
21
README.md
21
README.md
|
@ -21,26 +21,37 @@ This is the code base of our [Surveilling Surveillance](https://arxiv.org/abs/21
|
|||
- [PyTorch](https://pytorch.org/) ≥ 1.6 and [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation. Install them together at [pytorch.org](https://pytorch.org/) to make sure of this
|
||||
- [Detection2](https://github.com/facebookresearch/detectron2). The installation instruction of Detection2 can be found [here](https://detectron2.readthedocs.io/en/latest/tutorials/install.html)
|
||||
|
||||
Install Python dependencies by running:
|
||||
Install Python dependencies by running (being a bit illegal with the detectron2 dependency due to it not supporting poetry):
|
||||
```shell
|
||||
pip install -r requirements.txt
|
||||
poetry install
|
||||
git clone https://github.com/facebookresearch/detectron2.git
|
||||
poetry run python -m pip install -e detectron2
|
||||
```
|
||||
|
||||
|
||||
|
||||
### Download street-view images
|
||||
```shell
|
||||
python main.py download_streetview_image --key GOOGLE_API_KEY --sec GOOGLE_API_SECRET
|
||||
```
|
||||
|
||||
By now, lots of Steetview images from the original dataset have become unavailable. We can filter these by scanning for duplicates (as these now downloaded as to the same error image)
|
||||
|
||||
```bash
|
||||
find data/ ! -empty -type f -exec md5sum {} + | sort | uniq -w32 -dD > data/duplicates.txt
|
||||
poetry run python 01-dataset-tools.py save_non_empty
|
||||
```
|
||||
|
||||
### Model training
|
||||
```shell
|
||||
cd detection && python main.py train --exp_name EXPERIMENT_NAME --[hyparameter] [value]
|
||||
poetry run python detection/main.py train --exp_name EXPERIMENT_NAME --[hyparameter] [value]
|
||||
```
|
||||
|
||||
### Model inference
|
||||
```shell
|
||||
cd detection && python main.py test --deploy --deploy_meta_path [DEPLOY_META_PATH]
|
||||
poetry run python detection/main.py test CHECKPOINT
|
||||
```
|
||||
, where `DEPLOY_META_PATH` is a path to a csv file of the following format:
|
||||
**[For now --deploy-meta-path is broken]** , where `DEPLOY_META_PATH` is a path to a csv file of the following format:
|
||||
|
||||
| save_path | panoid | heading | downloaded |
|
||||
| --------- | ------ | ------- | ---------- |
|
||||
|
|
Loading…
Reference in a new issue