From 529bf6a0cf0938eb5f54a574794cc433d02a982e Mon Sep 17 00:00:00 2001 From: Robin Rombach Date: Sat, 9 Jul 2022 22:08:16 +0000 Subject: [PATCH] slurmy --- scripts/slurm/resume_512/launcher.sh | 11 ++++++++++- scripts/slurm/resume_512/sbatch.sh | 4 ++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/scripts/slurm/resume_512/launcher.sh b/scripts/slurm/resume_512/launcher.sh index d01a513..4778138 100644 --- a/scripts/slurm/resume_512/launcher.sh +++ b/scripts/slurm/resume_512/launcher.sh @@ -14,7 +14,16 @@ conda activate stable cd /fsx/stable-diffusion/stable-diffusion CONFIG=configs/stable-diffusion/txt2img-1p4B-multinode-clip-encoder-high-res-512.yaml -EXTRA="model.params.ckpt_path=/fsx/stable-diffusion/stable-diffusion/checkpoints/256f8ft512-2022-06-15-pruned.ckpt" + +# initial parameters +#EXTRA="model.params.ckpt_path=/fsx/stable-diffusion/stable-diffusion/checkpoints/256f8ft512-2022-06-15-pruned.ckpt" + +# resumed after crash +#EXTRA="model.params.ckpt_path=/fsx/stable-diffusion/stable-diffusion/logs/2022-07-06T23-43-51_txt2img-1p4B-multinode-clip-encoder-high-res-512/checkpoints/last.ckpt" + +# continue on improved aesthetics +EXTRA="model.params.ckpt_path=/fsx/stable-diffusion/stable-diffusion/logs/2022-07-07T16-15-18_txt2img-1p4B-multinode-clip-encoder-high-res-512/checkpoints/last.ckpt data.params.tar_base=__improvedaesthetic__ -f _improvedaesthetic" + DEBUG="-d True lightning.callbacks.image_logger.params.batch_frequency=5" python main.py --base $CONFIG --gpus 0,1,2,3,4,5,6,7 -t --num_nodes ${WORLD_SIZE} --scale_lr False $EXTRA #$DEBUG diff --git a/scripts/slurm/resume_512/sbatch.sh b/scripts/slurm/resume_512/sbatch.sh index 6bb87f6..53818f6 100644 --- a/scripts/slurm/resume_512/sbatch.sh +++ b/scripts/slurm/resume_512/sbatch.sh @@ -1,7 +1,7 @@ #!/bin/bash #SBATCH --partition=compute-od-gpu -#SBATCH --job-name=stable-diffusion-512cont -#SBATCH --nodes=24 +#SBATCH --job-name=stable-diffusion-512cont-improvedaesthetics +#SBATCH --nodes=20 #SBATCH --gpus-per-node=8 #SBATCH --cpus-per-gpu=4 #SBATCH --ntasks-per-node=1