diff --git a/configs/stable-diffusion/dev_mn_dummy.yaml b/configs/stable-diffusion/dev_mn_dummy.yaml new file mode 100644 index 0000000..5db89a1 --- /dev/null +++ b/configs/stable-diffusion/dev_mn_dummy.yaml @@ -0,0 +1,108 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 32 + channels: 4 + cond_stage_trainable: true + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 + in_channels: 4 + out_channels: 4 + model_channels: 32 # 320 # TODO increase + attention_resolutions: [ ] # is equal to fixed spatial resolution: 32 , 16 , 8 + num_res_blocks: 2 + channel_mult: [ 1, ] + #num_head_channels: 32 + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 32 + use_checkpoint: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ckpt_path: "models/first_stage_models/kl-f8/model.ckpt" + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.BERTEmbedder + params: + n_embed: 32 + n_layer: 1 #32 # TODO: increase + + +data: + target: main.DataModuleFromConfig + params: + batch_size: 4 + num_workers: 4 + wrap: false + train: + target: ldm.data.dummy.DummyData + params: + length: 10000 + size: [256, 256, 3] + validation: + target: ldm.data.dummy.DummyData + params: + length: 10000 + size: [256, 256, 3] + + +lightning: + callbacks: + image_logger: + target: main.ImageLogger + params: + batch_frequency: 5000 # 5000 + max_images: 0 + increase_log_steps: False + log_first_step: True + + + trainer: + #replace_sampler_ddp: False + benchmark: True + num_sanity_val_steps: 0 diff --git a/ldm/data/dummy.py b/ldm/data/dummy.py new file mode 100644 index 0000000..be295a1 --- /dev/null +++ b/ldm/data/dummy.py @@ -0,0 +1,18 @@ +import numpy as np +import random +import string +from torch.utils.data import Dataset, Subset + +class DummyData(Dataset): + def __init__(self, length, size): + self.length = length + self.size = size + + def __len__(self): + return self.length + + def __getitem__(self, i): + x = np.random.randn(*self.size) + letters = string.ascii_lowercase + y = ''.join(random.choice(string.ascii_lowercase) for i in range(10)) + return {"jpg": x, "txt": y}