From c229c115c1b3cabe3ea22ecb824e4009cd67caed Mon Sep 17 00:00:00 2001 From: Patrick Esser Date: Tue, 2 Aug 2022 20:32:22 +0000 Subject: [PATCH] print ema steps and resume v2 pretraining --- scripts/printckpt.py | 2 ++ scripts/slurm/v2_pretraining/launcher.sh | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/printckpt.py b/scripts/printckpt.py index b9824d7..8821524 100644 --- a/scripts/printckpt.py +++ b/scripts/printckpt.py @@ -10,6 +10,8 @@ def printit(p): sd = torch.load(p, map_location="cpu") if "global_step" in sd: print(f"This is global step {sd['global_step']}.") + if "model_ema.num_updates" in sd["state_dict"]: + print(f"And we got {sd['state_dict']['model_ema.num_updates']} EMA updates.") if __name__ == "__main__": diff --git a/scripts/slurm/v2_pretraining/launcher.sh b/scripts/slurm/v2_pretraining/launcher.sh index 6dffd49..a582509 100755 --- a/scripts/slurm/v2_pretraining/launcher.sh +++ b/scripts/slurm/v2_pretraining/launcher.sh @@ -24,8 +24,8 @@ cd /fsx/stable-diffusion/stable-diffusion CONFIG=configs/stable-diffusion/v2_pretraining.yaml # resume and set new seed to reshuffle data -EXTRA="--seed 542 model.params.ckpt_path=/fsx/stable-diffusion/stable-diffusion/checkpoints/v2-256/216k-256.ckpt" -#EXTRA="--seed 543 --resume_from_checkpoint ..." +#EXTRA="--seed 542 model.params.ckpt_path=/fsx/stable-diffusion/stable-diffusion/checkpoints/v2-256/216k-256.ckpt" +EXTRA="--seed 543 --resume_from_checkpoint /fsx/stable-diffusion/stable-diffusion/logs/2022-07-31T23-35-31_v2_pretraining/checkpoints/last.ckpt" # reduce lr a bit #EXTRA="${EXTRA} model.params.scheduler_config.params.f_max=[0.75]"