From 89ac468ce849a37a97a838903cb3abe05af791e9 Mon Sep 17 00:00:00 2001 From: Patrick Esser Date: Wed, 6 Jul 2022 23:30:22 +0000 Subject: [PATCH] match num cpus to num workers, log each node separately --- scripts/slurm/resume_512/sbatch.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/slurm/resume_512/sbatch.sh b/scripts/slurm/resume_512/sbatch.sh index ac152c8..6bb87f6 100644 --- a/scripts/slurm/resume_512/sbatch.sh +++ b/scripts/slurm/resume_512/sbatch.sh @@ -3,6 +3,7 @@ #SBATCH --job-name=stable-diffusion-512cont #SBATCH --nodes=24 #SBATCH --gpus-per-node=8 +#SBATCH --cpus-per-gpu=4 #SBATCH --ntasks-per-node=1 #SBATCH --output=%x_%j.%n.out @@ -35,4 +36,4 @@ echo MASTER_ADDR=${MASTER_ADDR} echo MASTER_PORT=${MASTER_PORT} echo WORLD_SIZE=${WORLD_SIZE} -srun bash /fsx/stable-diffusion/stable-diffusion/scripts/slurm/resume_512/launcher.sh +srun --output=%x_%j.%n.out bash /fsx/stable-diffusion/stable-diffusion/scripts/slurm/resume_512/launcher.sh