From ec8442906f59051813caa7ea536455a68bf92943 Mon Sep 17 00:00:00 2001 From: Patrick Esser Date: Tue, 26 Jul 2022 08:23:46 +0000 Subject: [PATCH] v3 reduced lr --- scripts/checker.py | 5 +++++ scripts/slurm/v3_pretraining/launcher.sh | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/checker.py b/scripts/checker.py index 53e360e..8b94a5b 100644 --- a/scripts/checker.py +++ b/scripts/checker.py @@ -157,6 +157,11 @@ class Checker(object): def check(self): while True: + if not os.path.exists(self.filename): + print(f"Could not find {self.filename}. Waiting.") + time.sleep(self.interval) + continue + stamp = os.stat(self.filename).st_mtime if stamp != self._cached_stamp: while True: diff --git a/scripts/slurm/v3_pretraining/launcher.sh b/scripts/slurm/v3_pretraining/launcher.sh index 38d04b8..1aff7d9 100755 --- a/scripts/slurm/v3_pretraining/launcher.sh +++ b/scripts/slurm/v3_pretraining/launcher.sh @@ -27,7 +27,10 @@ CONFIG=configs/stable-diffusion/v3_pretraining.yaml #EXTRA="--seed 714 model.params.ckpt_path=/fsx/stable-diffusion/stable-diffusion/rlogs/2022-07-11T22-57-10_txt2img-v2-clip-encoder-improved_aesthetics-256/checkpoints/last.ckpt" #EXTRA="--seed 715 --resume_from_checkpoint /fsx/stable-diffusion/stable-diffusion/logs/2022-07-14T21-03-49_txt2img-v2-clip-encoder-improved_aesthetics-256/checkpoints/last.ckpt" #EXTRA="--seed 716 --resume_from_checkpoint /fsx/stable-diffusion/stable-diffusion/logs/2022-07-22T09-25-26_v3_pretraining/checkpoints/last.ckpt" -EXTRA="--seed 717 --resume_from_checkpoint /fsx/stable-diffusion/stable-diffusion/logs/2022-07-24T06-15-08_v3_pretraining/checkpoints/last.ckpt" +EXTRA="--seed 718 --resume_from_checkpoint /fsx/stable-diffusion/stable-diffusion/logs/2022-07-24T06-15-08_v3_pretraining/checkpoints/last.ckpt" + +# reduce lr a bit +EXTRA="${EXTRA} model.params.scheduler_config.params.f_max=[0.75]" # custom logdir #EXTRA="${EXTRA} --logdir rlogs"