surveilling-surveillance/01-dataset-tools.py

98 lines
2.8 KiB
Python

"""
author: ruben van de ven
"""
import ast
import os
import fire
from pathlib import Path
from detection.data import get_dataset
import pandas as pd
import PIL
from PIL import Image, ImageDraw
import logging
import coloredlogs
from detection.data.base import BaseDataset
from detection.data.info import DatasetInfo
coloredlogs.install()
logger = logging.getLogger(__name__)
# inline get_dataset("train") because of hard coded paths
split = "train"
meta = pd.read_csv("./data/meta.csv")
meta['image_path'] = [f'data/image/{panoid}_{heading}.jpg' for panoid, heading in zip(meta['panoid'], meta['heading'])]
info = DatasetInfo.load("./data/info.yaml")
duplicates = pd.read_csv(
"./data/duplicates.txt",
sep=' ',
names=['hash', 'path'])
# duplicates['panoid'] = duplicates['path'].str[11:-4]
meta = meta[~meta['image_path'].isin(duplicates['path'])]
dataset = BaseDataset(info, meta)[split]
def save_non_empty():
meta.to_csv("./data/non-empty-meta.csv")
print("Saved to", "./data/non-empty-meta.csv")
def render_images(dirname="/tmp/surveilling-surveillance/annotations"):
save_dir = Path(dirname)
save_dir.mkdir(parents=True, exist_ok=True)
#generally used with dataset.detection_dataloader(...)
print(meta)
# filter non empty on a stringified list
non_empty = meta[meta['annotations'] != "[]"]
stat_images = 0
stat_annotated_images = 0
stat_annotations = 0
for i, entry in non_empty.iterrows():
# category_id, bbox, bbox_mode, segmentation
annotations = ast.literal_eval(entry.annotations)
image_path = entry.image_path # f'./data/image/{entry.panoid}_{entry.heading}.jpg'
# print([a['bbox'] for a in annotations],os.path.exists(image_path))
try:
image = Image.open(image_path)
except PIL.UnidentifiedImageError:
logger.error(f"Invalid image: {image_path}")
continue
stat_images += 1
for y, ann in enumerate(annotations):
draw = ImageDraw.Draw(image)
draw.rectangle(ann['bbox'], outline='red', width=2)
crop = image.crop(ann['bbox'])
if 0 in crop.size:
logger.warning(f"Invalid crop {crop.size} using bbox {ann['bbox']} in {image_path}")
continue
stat_annotations += 1
if y == 0:
stat_annotated_images += 1
crop.save(save_dir / f'crop-{entry.panoid}_{entry.heading}_{y}.jpg')
image.save(save_dir / f'{entry.panoid}_{entry.heading}.jpg')
print(f"Total {stat_images} images and {stat_annotations} annotations.")
print(f"{stat_annotated_images} images with annotations, {stat_images - stat_annotated_images} images without annotations")
if __name__ == "__main__":
fire.Fire()