stable-diffusion-finetune/models/first_stage_models/vq-f8/config.yaml

49 lines
1.0 KiB
YAML

model:
base_learning_rate: 4.5e-06
target: ldm.models.autoencoder.VQModel
params:
embed_dim: 4
n_embed: 16384
monitor: val/rec_loss
ddconfig:
double_z: false
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 2
- 4
num_res_blocks: 2
attn_resolutions:
- 32
dropout: 0.0
lossconfig:
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
params:
disc_conditional: false
disc_in_channels: 3
disc_num_layers: 2
disc_start: 1
disc_weight: 0.6
codebook_weight: 1.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 10
num_workers: 20
wrap: true
train:
target: ldm.data.openimages.FullOpenImagesTrain
params:
size: 384
crop_size: 256
validation:
target: ldm.data.openimages.FullOpenImagesValidation
params:
size: 384
crop_size: 256