54 lines
1.7 KiB
Python
54 lines
1.7 KiB
Python
import os
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
from PIL import Image
|
|
|
|
Image.init() # required to initialise PIL.Image.EXTENSION
|
|
|
|
def extract_labels(f: Path) -> dict:
|
|
# get the labels for the image path
|
|
return {
|
|
"arrondisement": int(str(f).split(' ')[0][:-1]),
|
|
"street": str(f).split('/')[1].split(' ')[0],
|
|
}
|
|
|
|
def is_image_ext(f: Path) -> bool:
|
|
return f.suffix.lower() in Image.EXTENSION
|
|
|
|
def open_image_folder(source_dir, *, max_images: Optional[int] = None):
|
|
image_files = [f for f in sorted(Path(source_dir).rglob('*')) if is_image_ext(f) and os.path.isfile(f)]
|
|
|
|
labeled_images = [{
|
|
"abspath": f.resolve(),
|
|
"relpath": f.relative_to(source_dir),
|
|
"labels": extract_labels(f.relative_to(source_dir))
|
|
} for f in image_files]
|
|
|
|
return labeled_images
|
|
|
|
def filter_by_label(image_files: list, label, value):
|
|
return list(filter(lambda i: i['labels'][label] == value, image_files))
|
|
|
|
def aggregate_labels(image_files) -> dict[str, list]:
|
|
labels = {}
|
|
for f in image_files:
|
|
for label, value in f['labels'].items():
|
|
if label not in labels:
|
|
labels[label] = []
|
|
if value not in labels[label]:
|
|
labels[label].append(value)
|
|
|
|
for label, values in labels.items():
|
|
labels[label] = sorted(values)
|
|
|
|
return labels
|
|
|
|
def print_stats(image_files, labels):
|
|
for label, values in labels.items():
|
|
print(label)
|
|
for value in values:
|
|
print(f" - {value}: {len(filter_by_label(image_files, label, value))}")
|
|
|
|
image_files = open_image_folder("../VLoD")
|
|
labels = aggregate_labels(image_files)
|
|
print_stats(image_files, labels) |