From cde78c3ead4211ef303926a407e566b50543abf7 Mon Sep 17 00:00:00 2001 From: rromb Date: Thu, 30 Jun 2022 00:22:21 +0200 Subject: [PATCH] inference config for 768x768 model --- ...ip-encoder-f16-768-laion-hr-inference.yaml | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 configs/stable-diffusion/txt2img-multinode-clip-encoder-f16-768-laion-hr-inference.yaml diff --git a/configs/stable-diffusion/txt2img-multinode-clip-encoder-f16-768-laion-hr-inference.yaml b/configs/stable-diffusion/txt2img-multinode-clip-encoder-f16-768-laion-hr-inference.yaml new file mode 100644 index 0000000..92ad949 --- /dev/null +++ b/configs/stable-diffusion/txt2img-multinode-clip-encoder-f16-768-laion-hr-inference.yaml @@ -0,0 +1,65 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.001 + linear_end: 0.015 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 48 + channels: 16 + cond_stage_trainable: false + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.22765929 # magic number + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 48 + in_channels: 16 + out_channels: 16 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 16 + monitor: val/rec_loss + ddconfig: + double_z: True + z_channels: 16 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: [ 1,1,2,2,4 ] # num_down = len(ch_mult)-1 + num_res_blocks: 2 + attn_resolutions: [ 16 ] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder \ No newline at end of file