import os from pathlib import Path from typing import Optional from PIL import Image Image.init() # required to initialise PIL.Image.EXTENSION def extract_labels(f: Path) -> dict: # get the labels for the image path return { "arrondisement": int(str(f).split(' ')[0][:-1]), "street": str(f).split('/')[1].split(' ')[0], } def is_image_ext(f: Path) -> bool: return f.suffix.lower() in Image.EXTENSION def open_image_folder(source_dir, *, max_images: Optional[int] = None): image_files = [f for f in sorted(Path(source_dir).rglob('*')) if is_image_ext(f) and os.path.isfile(f)] labeled_images = [{ "abspath": f.resolve(), "relpath": f.relative_to(source_dir), "labels": extract_labels(f.relative_to(source_dir)) } for f in image_files] return labeled_images def filter_by_label(image_files: list, label, value): return list(filter(lambda i: i['labels'][label] == value, image_files)) def aggregate_labels(image_files) -> dict[str, list]: labels = {} for f in image_files: for label, value in f['labels'].items(): if label not in labels: labels[label] = [] if value not in labels[label]: labels[label].append(value) for label, values in labels.items(): labels[label] = sorted(values) return labels def print_stats(image_files, labels): for label, values in labels.items(): print(label) for value in values: print(f" - {value}: {len(filter_by_label(image_files, label, value))}") image_files = open_image_folder("../VLoD") labels = aggregate_labels(image_files) print_stats(image_files, labels)