match num cpus to num workers, log each node separately

This commit is contained in:
Patrick Esser 2022-07-06 23:30:22 +00:00 committed by pesser
parent 9300c0ccfc
commit 89ac468ce8

View file

@ -3,6 +3,7 @@
#SBATCH --job-name=stable-diffusion-512cont #SBATCH --job-name=stable-diffusion-512cont
#SBATCH --nodes=24 #SBATCH --nodes=24
#SBATCH --gpus-per-node=8 #SBATCH --gpus-per-node=8
#SBATCH --cpus-per-gpu=4
#SBATCH --ntasks-per-node=1 #SBATCH --ntasks-per-node=1
#SBATCH --output=%x_%j.%n.out #SBATCH --output=%x_%j.%n.out
@ -35,4 +36,4 @@ echo MASTER_ADDR=${MASTER_ADDR}
echo MASTER_PORT=${MASTER_PORT} echo MASTER_PORT=${MASTER_PORT}
echo WORLD_SIZE=${WORLD_SIZE} echo WORLD_SIZE=${WORLD_SIZE}
srun bash /fsx/stable-diffusion/stable-diffusion/scripts/slurm/resume_512/launcher.sh srun --output=%x_%j.%n.out bash /fsx/stable-diffusion/stable-diffusion/scripts/slurm/resume_512/launcher.sh