drop watermarked images
This commit is contained in:
parent
eee8df53b5
commit
6dfe59e9b0
3 changed files with 26 additions and 8 deletions
|
@ -105,6 +105,7 @@ def dict_collation_fn(samples, combine_tensors=True, combine_scalars=True):
|
||||||
class WebDataModuleFromConfig(pl.LightningDataModule):
|
class WebDataModuleFromConfig(pl.LightningDataModule):
|
||||||
def __init__(self, tar_base, batch_size, train=None, validation=None,
|
def __init__(self, tar_base, batch_size, train=None, validation=None,
|
||||||
test=None, num_workers=4, multinode=True, min_size=None,
|
test=None, num_workers=4, multinode=True, min_size=None,
|
||||||
|
max_pwatermark=1.0,
|
||||||
**kwargs):
|
**kwargs):
|
||||||
super().__init__(self)
|
super().__init__(self)
|
||||||
print(f'Setting tar base to {tar_base}')
|
print(f'Setting tar base to {tar_base}')
|
||||||
|
@ -116,6 +117,7 @@ class WebDataModuleFromConfig(pl.LightningDataModule):
|
||||||
self.test = test
|
self.test = test
|
||||||
self.multinode = multinode
|
self.multinode = multinode
|
||||||
self.min_size = min_size # filter out very small images
|
self.min_size = min_size # filter out very small images
|
||||||
|
self.max_pwatermark = max_pwatermark # filter out watermarked images
|
||||||
|
|
||||||
def make_loader(self, dataset_config, train=True):
|
def make_loader(self, dataset_config, train=True):
|
||||||
if 'image_transforms' in dataset_config:
|
if 'image_transforms' in dataset_config:
|
||||||
|
@ -184,7 +186,7 @@ class WebDataModuleFromConfig(pl.LightningDataModule):
|
||||||
if self.min_size is None:
|
if self.min_size is None:
|
||||||
return True
|
return True
|
||||||
try:
|
try:
|
||||||
return x['json']['original_width'] >= self.min_size and x['json']['original_height'] >= self.min_size
|
return x['json']['original_width'] >= self.min_size and x['json']['original_height'] >= self.min_size and x['json']['pwatermark'] <= self.max_pwatermark
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -336,18 +338,30 @@ def example03():
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def filter_watermark(x):
|
||||||
|
try:
|
||||||
|
return x['json']['pwatermark'] < 0.5
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
dataset = (dataset
|
dataset = (dataset
|
||||||
.select(filter_keys)
|
.select(filter_keys)
|
||||||
.decode('pil', handler=wds.warn_and_continue))
|
.decode('pil', handler=wds.warn_and_continue))
|
||||||
n_total = 0
|
n_total = 0
|
||||||
n_large = 0
|
n_large = 0
|
||||||
|
n_large_nowm = 0
|
||||||
for i, example in enumerate(dataset):
|
for i, example in enumerate(dataset):
|
||||||
n_total += 1
|
n_total += 1
|
||||||
if filter_size(example):
|
if filter_size(example):
|
||||||
n_large += 1
|
n_large += 1
|
||||||
|
if filter_watermark(example):
|
||||||
|
n_large_nowm += 1
|
||||||
|
|
||||||
if i%1000 == 0:
|
if i%500 == 0:
|
||||||
|
print(i)
|
||||||
print(f"Large: {n_large}/{n_total} | {n_large/n_total*100:.2f}%")
|
print(f"Large: {n_large}/{n_total} | {n_large/n_total*100:.2f}%")
|
||||||
|
if n_large > 0:
|
||||||
|
print(f"No Watermark: {n_large_nowm}/{n_large} | {n_large_nowm/n_large*100:.2f}%")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -382,5 +396,5 @@ def example04():
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
#example01()
|
#example01()
|
||||||
#example02()
|
#example02()
|
||||||
#example03()
|
example03()
|
||||||
example04()
|
#example04()
|
||||||
|
|
|
@ -23,10 +23,14 @@ cd /fsx/stable-diffusion/stable-diffusion
|
||||||
CONFIG="/fsx/stable-diffusion/stable-diffusion/configs/stable-diffusion/v1_improvedaesthetics.yaml"
|
CONFIG="/fsx/stable-diffusion/stable-diffusion/configs/stable-diffusion/v1_improvedaesthetics.yaml"
|
||||||
|
|
||||||
# resume and set new seed to reshuffle data
|
# resume and set new seed to reshuffle data
|
||||||
EXTRA="--seed 718 model.params.ckpt_path=/fsx/stable-diffusion/stable-diffusion/checkpoints2/v1pp/v1pp-flatline.ckpt"
|
#EXTRA="--seed 718 model.params.ckpt_path=/fsx/stable-diffusion/stable-diffusion/checkpoints2/v1pp/v1pp-flatline.ckpt"
|
||||||
|
EXTRA="--seed 718 --resume_from_checkpoint /fsx/stable-diffusion/stable-diffusion/logs/2022-07-22T07-45-07_v1_improvedaesthetics/checkpoints/last.ckpt"
|
||||||
|
|
||||||
# only images >= 512
|
# only images >= 512 and pwatermark <= 0.4999
|
||||||
EXTRA="${EXTRA} data.params.min_size=512"
|
EXTRA="${EXTRA} data.params.min_size=512 data.params.max_pwatermark=0.4999"
|
||||||
|
|
||||||
|
# postfix
|
||||||
|
EXTRA="${EXTRA} -f v1_iahr_torch111"
|
||||||
|
|
||||||
# time to decay
|
# time to decay
|
||||||
#EXTRA="${EXTRA} model.params.scheduler_config.params.cycle_lengths=[50000] model.params.scheduler_config.params.f_min=[1e-6]"
|
#EXTRA="${EXTRA} model.params.scheduler_config.params.cycle_lengths=[50000] model.params.scheduler_config.params.f_min=[1e-6]"
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
#SBATCH --partition=compute-od-gpu
|
#SBATCH --partition=compute-od-gpu
|
||||||
#SBATCH --job-name=stable-diffusion-v1-iahr-torch111
|
#SBATCH --job-name=stable-diffusion-v1-iahr-torch111
|
||||||
#SBATCH --nodes 20
|
#SBATCH --nodes 32
|
||||||
#SBATCH --ntasks-per-node 1
|
#SBATCH --ntasks-per-node 1
|
||||||
#SBATCH --cpus-per-gpu=4
|
#SBATCH --cpus-per-gpu=4
|
||||||
#SBATCH --gres=gpu:8
|
#SBATCH --gres=gpu:8
|
||||||
|
|
Loading…
Reference in a new issue