diff --git a/ldm/data/laion.py b/ldm/data/laion.py index 7c07d38..e3e14a3 100644 --- a/ldm/data/laion.py +++ b/ldm/data/laion.py @@ -347,6 +347,7 @@ def example03(): dataset = (dataset .select(filter_keys) .decode('pil', handler=wds.warn_and_continue)) + n_save = 20 n_total = 0 n_large = 0 n_large_nowm = 0 @@ -356,6 +357,9 @@ def example03(): n_large += 1 if filter_watermark(example): n_large_nowm += 1 + if n_large_nowm < n_save+1: + image = example["jpg"] + image.save(os.path.join("tmp", f"{n_large_nowm-1:06}.png")) if i%500 == 0: print(i)