StableSR
StableSR copied to clipboard
How many epochs should I train for fine tunning?
Dear Author,
thanks for sharing great works.
I wonder how many epochs do you recommand to fine tunning?
I have about 100 images of cutom traning datasets.
I'm training the data as below.
while checking the log images during training, Image quality at 6000 epochs seems not really good.
Should I train more epochs? or change some other parameters..?
sf: 4
model:
base_learning_rate: 5.0e-05
target: ldm.models.diffusion.ddpm.LatentDiffusionSRTextWT
params:
# parameterization: "v"
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image
cond_stage_key: caption
image_size: 512
channels: 4
cond_stage_trainable: False # Note: different from the one we trained before
conditioning_key: crossattn
monitor: val/loss_simple_ema
scale_factor: 0.18215
use_ema: False
# for training only
ckpt_path : '/data2/LHC/StableSR/stablesr_000117.ckpt'
unfrozen_diff: False
random_size: False
time_replace: 1000
use_usm: True
#P2 weighting, we do not use in final version
p2_gamma: ~
p2_k: ~
# ignore_keys: []
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModelDualcondV2
params:
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
use_checkpoint: False
legacy: False
semb_channels: 256
first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
# for training only
ckpt_path: '/data2/LHC/StableSR/stablesr_000117.ckpt'
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 512
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
params:
freeze: True
layer: "penultimate"
structcond_stage_config:
target: ldm.modules.diffusionmodules.openaimodel.EncoderUNetModelWT
params:
image_size: 96
in_channels: 4
model_channels: 256
out_channels: 256
num_res_blocks: 2
attention_resolutions: [ 4, 2, 1 ]
dropout: 0
channel_mult: [ 1, 1, 2, 2 ]
conv_resample: True
dims: 2
use_checkpoint: False
use_fp16: False
num_heads: 4
num_head_channels: -1
num_heads_upsample: -1
use_scale_shift_norm: False
resblock_updown: False
use_new_attention_order: False
degradation:
# the first degradation process
resize_prob: [0.2, 0.7, 0.1] # up, down, keep
resize_range: [0.3, 1.5]
gaussian_noise_prob: 0.5
noise_range: [1, 15]
poisson_scale_range: [0.05, 2.0]
gray_noise_prob: 0.4
jpeg_range: [60, 95]
# the second degradation process
second_blur_prob: 0.5
resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
resize_range2: [0.6, 1.2]
gaussian_noise_prob2: 0.5
noise_range2: [1, 12]
poisson_scale_range2: [0.05, 1.0]
gray_noise_prob2: 0.4
jpeg_range2: [60, 100]
gt_size: 512
no_degradation_prob: 0.01
data:
target: main.DataModuleFromConfig
params:
batch_size: 6
num_workers: 10
wrap: false
train:
target: basicsr.data.realesrgan_dataset.RealESRGANDataset
params:
queue_size: 180
gt_path: '/data2/LHC/StableSR/StableSR/DNA_512/train'
face_gt_path: '/mnt/lustre/share/jywang/dataset/FFHQ/1024/'
num_face: 10000
crop_size: 512
io_backend:
type: disk
blur_kernel_size: 21
kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob: 0.1
blur_sigma: [0.2, 1.5]
betag_range: [0.5, 2.0]
betap_range: [1, 1.5]
blur_kernel_size2: 11
kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob2: 0.1
blur_sigma2: [0.2, 1.0]
betag_range2: [0.5, 2.0]
betap_range2: [1, 1.5]
final_sinc_prob: 0.8
gt_size: 512
use_hflip: True
use_rot: False
validation:
target: basicsr.data.realesrgan_dataset.RealESRGANDataset
params:
gt_path: '/data2/LHC/StableSR/StableSR/DNA_512/val'
crop_size: 512
io_backend:
type: disk
blur_kernel_size: 21
kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob: 0.1
blur_sigma: [0.2, 1.5]
betag_range: [0.5, 2.0]
betap_range: [1, 1.5]
blur_kernel_size2: 11
kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob2: 0.1
blur_sigma2: [0.2, 1.0]
betag_range2: [0.5, 2.0]
betap_range2: [1, 1.5]
final_sinc_prob: 0.8
gt_size: 512
use_hflip: True
use_rot: False
test_data:
target: main.DataModuleFromConfig
params:
batch_size: 1
num_workers: 6
wrap: false
test:
target: basicsr.data.realesrgan_dataset.RealESRGANDataset
params:
gt_path: '/data2/LHC/StableSR/StableSR/DNA_512/test'
crop_size: 512
io_backend:
type: disk
blur_kernel_size: 21
kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob: 0.1
blur_sigma: [0.2, 1.5]
betag_range: [0.5, 2.0]
betap_range: [1, 1.5]
blur_kernel_size2: 11
kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob2: 0.1
blur_sigma2: [0.2, 1.0]
betag_range2: [0.5, 2.0]
betap_range2: [1, 1.5]
final_sinc_prob: 0.8
gt_size: 512
use_hflip: True
use_rot: True
lightning:
modelcheckpoint:
params:
every_n_train_steps: 1500
callbacks:
image_logger:
target: main.ImageLogger
params:
batch_frequency: 1500
max_images: 4
increase_log_steps: False
trainer:
benchmark: True
max_steps: 100000
accumulate_grad_batches: 4