Tool for extracing labels from the VLoD image dataset
This commit is contained in:
parent
c4aeedec59
commit
743c7d548f
1 changed files with 54 additions and 0 deletions
54
build_dataset_labels.py
Normal file
54
build_dataset_labels.py
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
Image.init() # required to initialise PIL.Image.EXTENSION
|
||||||
|
|
||||||
|
def extract_labels(f: Path) -> dict:
|
||||||
|
# get the labels for the image path
|
||||||
|
return {
|
||||||
|
"arrondisement": int(str(f).split(' ')[0][:-1]),
|
||||||
|
"street": str(f).split('/')[1].split(' ')[0],
|
||||||
|
}
|
||||||
|
|
||||||
|
def is_image_ext(f: Path) -> bool:
|
||||||
|
return f.suffix.lower() in Image.EXTENSION
|
||||||
|
|
||||||
|
def open_image_folder(source_dir, *, max_images: Optional[int] = None):
|
||||||
|
image_files = [f for f in sorted(Path(source_dir).rglob('*')) if is_image_ext(f) and os.path.isfile(f)]
|
||||||
|
|
||||||
|
labeled_images = [{
|
||||||
|
"abspath": f.resolve(),
|
||||||
|
"relpath": f.relative_to(source_dir),
|
||||||
|
"labels": extract_labels(f.relative_to(source_dir))
|
||||||
|
} for f in image_files]
|
||||||
|
|
||||||
|
return labeled_images
|
||||||
|
|
||||||
|
def filter_by_label(image_files: list, label, value):
|
||||||
|
return list(filter(lambda i: i['labels'][label] == value, image_files))
|
||||||
|
|
||||||
|
def aggregate_labels(image_files) -> dict[str, list]:
|
||||||
|
labels = {}
|
||||||
|
for f in image_files:
|
||||||
|
for label, value in f['labels'].items():
|
||||||
|
if label not in labels:
|
||||||
|
labels[label] = []
|
||||||
|
if value not in labels[label]:
|
||||||
|
labels[label].append(value)
|
||||||
|
|
||||||
|
for label, values in labels.items():
|
||||||
|
labels[label] = sorted(values)
|
||||||
|
|
||||||
|
return labels
|
||||||
|
|
||||||
|
def print_stats(image_files, labels):
|
||||||
|
for label, values in labels.items():
|
||||||
|
print(label)
|
||||||
|
for value in values:
|
||||||
|
print(f" - {value}: {len(filter_by_label(image_files, label, value))}")
|
||||||
|
|
||||||
|
image_files = open_image_folder("../VLoD")
|
||||||
|
labels = aggregate_labels(image_files)
|
||||||
|
print_stats(image_files, labels)
|
Loading…
Reference in a new issue