From 1dba1b17eee0b677a5fc551da43fe524f4f622eb Mon Sep 17 00:00:00 2001 From: root Date: Tue, 31 May 2022 13:18:23 +0000 Subject: [PATCH] dev it --- .../txt2img-clip-encoder-dev.yaml | 5 +- .../txt2img-ldm-frozen-dev.yaml | 129 ++++++++++++++++++ .../txt2img-ldm-unfrozen-dev.yaml | 129 ++++++++++++++++++ .../txt2img-t5-encoder-dev.yaml | 4 +- 4 files changed, 263 insertions(+), 4 deletions(-) create mode 100644 configs/stable-diffusion/txt2img-ldm-frozen-dev.yaml create mode 100644 configs/stable-diffusion/txt2img-ldm-unfrozen-dev.yaml diff --git a/configs/stable-diffusion/txt2img-clip-encoder-dev.yaml b/configs/stable-diffusion/txt2img-clip-encoder-dev.yaml index 24a9734..313998c 100644 --- a/configs/stable-diffusion/txt2img-clip-encoder-dev.yaml +++ b/configs/stable-diffusion/txt2img-clip-encoder-dev.yaml @@ -74,9 +74,9 @@ data: target: ldm.data.laion.WebDataModuleFromConfig params: tar_base: "pipe:aws s3 cp s3://s-datasets/laion5b/laion2B-data/" - batch_size: 12 + batch_size: 56 num_workers: 4 - multinode: False + multinode: True train: shards: '{000000..231317}.tar -' shuffle: 10000 @@ -121,6 +121,7 @@ lightning: benchmark: True val_check_interval: 50000 num_sanity_val_steps: 0 + accumulate_grad_batches: 2 diff --git a/configs/stable-diffusion/txt2img-ldm-frozen-dev.yaml b/configs/stable-diffusion/txt2img-ldm-frozen-dev.yaml new file mode 100644 index 0000000..443db68 --- /dev/null +++ b/configs/stable-diffusion/txt2img-ldm-frozen-dev.yaml @@ -0,0 +1,129 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 32 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 1280 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ckpt_path: "models/first_stage_models/kl-f8/model.ckpt" + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.BERTEmbedder + params: + n_embed: 1280 + n_layer: 32 + + +data: + target: ldm.data.laion.WebDataModuleFromConfig + params: + tar_base: "pipe:aws s3 cp s3://s-datasets/laion5b/laion2B-data/" + batch_size: 52 + num_workers: 4 + multinode: False + train: + shards: '{000000..231317}.tar -' + shuffle: 10000 + image_key: jpg + image_transforms: + - target: torchvision.transforms.Resize + params: + size: 256 + interpolation: 3 + - target: torchvision.transforms.RandomCrop + params: + size: 256 + + # NOTE use enough shards to avoid empty validation loops in workers + validation: + shards: '{231318..231349}.tar -' + shuffle: 0 + image_key: jpg + image_transforms: + - target: torchvision.transforms.Resize + params: + size: 256 + interpolation: 3 + - target: torchvision.transforms.CenterCrop + params: + size: 256 + + +lightning: + callbacks: + image_logger: + target: main.ImageLogger + params: + batch_frequency: 5000 + max_images: 8 + increase_log_steps: False + log_first_step: False + + + trainer: + #replace_sampler_ddp: False + benchmark: True + val_check_interval: 50000 + num_sanity_val_steps: 0 + + + diff --git a/configs/stable-diffusion/txt2img-ldm-unfrozen-dev.yaml b/configs/stable-diffusion/txt2img-ldm-unfrozen-dev.yaml new file mode 100644 index 0000000..317109e --- /dev/null +++ b/configs/stable-diffusion/txt2img-ldm-unfrozen-dev.yaml @@ -0,0 +1,129 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 32 + channels: 4 + cond_stage_trainable: true + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 1280 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ckpt_path: "models/first_stage_models/kl-f8/model.ckpt" + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.BERTEmbedder + params: + n_embed: 1280 + n_layer: 32 + + +data: + target: ldm.data.laion.WebDataModuleFromConfig + params: + tar_base: "pipe:aws s3 cp s3://s-datasets/laion5b/laion2B-data/" + batch_size: 12 + num_workers: 4 + multinode: False + train: + shards: '{000000..231317}.tar -' + shuffle: 10000 + image_key: jpg + image_transforms: + - target: torchvision.transforms.Resize + params: + size: 256 + interpolation: 3 + - target: torchvision.transforms.RandomCrop + params: + size: 256 + + # NOTE use enough shards to avoid empty validation loops in workers + validation: + shards: '{231318..231349}.tar -' + shuffle: 0 + image_key: jpg + image_transforms: + - target: torchvision.transforms.Resize + params: + size: 256 + interpolation: 3 + - target: torchvision.transforms.CenterCrop + params: + size: 256 + + +lightning: + callbacks: + image_logger: + target: main.ImageLogger + params: + batch_frequency: 5000 + max_images: 8 + increase_log_steps: False + log_first_step: False + + + trainer: + #replace_sampler_ddp: False + benchmark: True + val_check_interval: 50000 + num_sanity_val_steps: 0 + + + diff --git a/configs/stable-diffusion/txt2img-t5-encoder-dev.yaml b/configs/stable-diffusion/txt2img-t5-encoder-dev.yaml index 2770641..e87de0d 100644 --- a/configs/stable-diffusion/txt2img-t5-encoder-dev.yaml +++ b/configs/stable-diffusion/txt2img-t5-encoder-dev.yaml @@ -38,7 +38,7 @@ model: num_heads: 8 use_spatial_transformer: True transformer_depth: 1 - context_dim: 768 + context_dim: 2048 use_checkpoint: True legacy: False @@ -76,7 +76,7 @@ data: target: ldm.data.laion.WebDataModuleFromConfig params: tar_base: "pipe:aws s3 cp s3://s-datasets/laion5b/laion2B-data/" - batch_size: 12 + batch_size: 40 num_workers: 4 multinode: False train: