Skip to content

Arch‐specific options

neosr-project edited this page Dec 6, 2024 · 48 revisions

Bellow are all arch-specific options. The values listed here are the defaults. You don't need to change any of those values and should use the default template configuration files, unless you have a specific user-case.

Note

If you want to train a monochrome model, you need to switch the input and output number of channels to 1, as well as use the color = "y" option on your configuration file (for both dataset and the validation). This does not work with OTF, only paired and single. Also note that some losses will not work on those conditions (such as color_opt) and some networks can expect more channels to work properly (such as DAT and OmniSR).

Important

The option flash_attn breaks compatibility with official code. Take this into consideration in case you want to deploy your model on third-party software such as chaiNNer. rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.


Discriminators

Bellow are supported discriminators and their parameters.


metagan

type = "metagan"
in_ch = 3
n_class = 1
dims = [48, 96, 192, 288]
blocks = [3, 3, 9, 3]
downs = [4, 4, 2, 2]
se_mode = "SSE"
mlp_ratio = 2.0
attention = true
attn_drop = 0
proj_drop = 0
head_dim = 32
drop = 0.2
sigmoid = false

ea2fpn

[network_d]
type = "ea2fpn"
class_num = 6
encoder_channels = [512, 256, 128, 64]
pyramid_channels = 64
segmentation_channels = 64
dropout = 0.2

Note

The discriminator ea2fpn uses learning rate of 1e-4 with GAN weight of 0.1. Different settings may result in incorrect convergence or NaN.


unet

[network_d]
type = "unet"
num_in_ch = 3
num_feat = 64
skip_connection = true

Note

The discriminator unet uses learning rate of 1e-4 and GAN weight of 0.1. Different settings may result in incorrect convergence or NaN.


dunet

[network_d]
type = "dunet"
in_ch = 3
dim = 64

Note

The discriminator dunet uses learning rate of 1e-4 and GAN weight of 0.1. Different settings may result in incorrect convergence or NaN.


patchgan

[network_d]
type = "patchgan"
num_in_ch = 3
num_feat = 64
num_layers = 3
max_nf_mult = 8
use_sigmoid = false
use_sn = true
#norm_type = # None

Note

The discriminator patchgan uses learning rate of 1e-4 and GAN weight of 0.1. Different settings may result in incorrect convergence or NaN.


Generators

Bellow are supported generators and their parameters.


atd, atd_light

[network_g]
type = "atd" # "atd_light"
norm = false
img_size = 96
patch_size = 1
in_chans = 3
embed_dim = 210
depths = [ 6, 6, 6, 6, 6, 6 ]
num_heads = [ 6, 6, 6, 6, 6, 6 ]
window_size = 16
category_size = 256
num_tokens = 128
reducted_dim = 20
convffn_kernel_size = 5
mlp_ratio = 2.0
qkv_bias = true
norm_layer = "nn.LayerNorm"
ape = false
patch_norm = true
img_range = 1.0
upsampler = "pixelshuffle"
resi_connection = "1conv"

Note

By default, ATD is not fully compatible with original implementation. To make it compatible, use option norm: true.


cfsr

type = "cfsr"
in_chans = 3
embed_dim = 48
depths = [6, 6]
dw_size = 9
mlp_ratio = 2.0
img_range = 1.0
upsampler = "pixelshuffledirect"
mean_norm = false

compact

[network_g]
type = "compact"
num_in_ch = 3
num_out_ch = 3
num_feat = 64
num_conv = 16
act_type = "prelu"

craft

[network_g]
type = "craft"
flash_attn = true
in_chans = 3
img_size = 64
window_size = 16
embed_dim = 48
depths = [ 2, 2, 2, 2 ]
num_heads = [ 6, 6, 6, 6 ]
split_size_0 = 4
split_size_1 = 16
mlp_ratio = 2.0
qkv_bias = true
#qk_scale = # None
img_range = 1.0
resi_connection = "1conv"

Note

The option flash_attn: true makes it incompatible with official implementation. rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.


cugan

[network_g]
type = "cugan"
in_channels = 3
out_channels = 3
pro = true

dat_small, dat_medium, dat_2

[network_g]
type = "dat_small"
#type = "dat_medium"
#type = "dat_2"
img_size = 64
in_chans = 3
embed_dim = 180
split_size = [ 2, 4 ]
depth = [ 2, 2, 2, 2 ]
num_heads = [ 2, 2, 2, 2 ]
expansion_factor = 4
qkv_bias = true
#qk_scale = # None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
use_chk = false
img_range = 1.0
resi_connection = "1conv"
upsampler = "pixelshuffle"

Note

rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.


dct

type = "dct"
img_size = 64
in_chans = 3
embed_dim = 80
depth = [20]
num_heads = [8]
expansion_factor = 4.0
qkv_bias = true
drop_rate = 0
attn_drop_rate = 0
drop_path_rate = 0.1
act_layer = "nn.GELU"
norm_layer = "nn.LayerNorm"
img_range = 1.0
resi_connection = "3conv"
upsampler = "pixelshuffledirect"

dctlsa

[network_g]
type = "dctlsa"
in_nc = 3
out_nc = 3
nf = 55
num_modules = 6
num_head = 5

Note

Added Dropout to DCTLSA may not be compatible with official implementation.


ditn

[network_g]
type = "ditn"
patch_size = 8
inp_channels = 3
dim = 60
ITL_blocks = 4
SAL_blocks = 4
UFONE_blocks = 1
ffn_expansion_factor = 2.0
bias = false
LayerNorm_type = "WithBias"

drct, drct_l, drct_s

[network_g]
type = "drct"
#type = "drct_l"
#type = "drct_s"
img_size = 64
patch_size = 1
in_chans = 3
embed_dim = 180
depths = [ 6, 6, 6, 6, 6, 6 ]
num_heads = [ 6, 6, 6, 6, 6, 6 ]
window_size = 16
compress_ratio = 3
squeeze_factor = 30
conv_scale = 0.01
overlap_ratio = 0.5
mlp_ratio = 2.0
qkv_bias = true
#qk_scale = # None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
norm_layer = "nn.LayerNorm"
ape = false
patch_norm = true
img_range = 1.0
upsampler = "pixelshuffle"
resi_connection = "1conv"
gc = 32

Note

rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.


flexnet, metaflexnet

type = "flexnet"
type = "metaflexnet"
inp_channels = 3
out_channels = 3
dim = 64
num_blocks = [6,6,6,6,6,6]
window_size = 8
hidden_rate = 4
channel_norm = false
attn_drop = 0
proj_drop = 0
pipeline_type = "linear"
upsampler = "ps"
flash_attn = true

hasn

type = "hasn"
in_channels = 3
out_channels = 3
feature_channels = 52

hat_s, hat_m, hat_l

[network_g]
type = "hat_m"
#type = "hat_s"
#type = "hat_l"
window_size = 16
img_size = 64
patch_size = 1
in_chans = 3
embed_dim = 96
depths = [ 6, 6, 6, 6 ]
num_heads = [ 6, 6, 6, 6 ]
compress_ratio = 3
squeeze_factor = 30
conv_scale = 0.01
overlap_ratio = 0.5
mlp_ratio = 4.0
qkv_bias = true
#qk_scale = # None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
ape = false
patch_norm = true
img_range = 1.0
upsampler = "pixelshuffle"
resi_connection = "1conv"

Note

rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.


hit_srf, hit_srf_medium, hit_srf_large

[network_g]
type = "hit_srf"
#type = "hit_srf_medium"
#type = "hit_srf_large"
img_size = 64
patch_size = 1
in_chans = 3
embed_dim = 60
depths = [6, 6, 6, 6]
num_heads = [6, 6, 6, 6]
base_win_size = [8, 8]
mlp_ratio = 2.0
drop_rate = 0.0
value_drop_rate = 0.0
drop_path_rate = 0.0
ape = false
patch_norm = true
use_checkpoint = false
img_range = 1.0
upsampler="pixelshuffledirect"
resi_connection = "1conv"
hier_win_ratios = [0.5, 1, 2, 4, 6, 8]

Note

rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.


hma, hma_medium, hma_large

type = "hma"
#type = "hma_medium"
#type = "hma_large"
img_size = 64
patch_size = 1
in_chans = 3
embed_dim = 60
depths = [6, 6, 6, 6]
num_heads = [6, 6, 6, 6]
window_size = 8
interval_size = 4
mlp_ratio = 2.0
qkv_bias = true
#qk_scale= # None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
ape = false
patch_norm = true
img_range = 1.0
upsampler = "pixelshuffle"
resi_connection = "1conv

Note

rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.


krgn

type = "krgn"
n_colors = 3
n_feats = 64
n_resgroups = 9
act = "lrelu"
rgb_range = 1.0
dilation = 3

lmlt, lmlt_tiny, lmlt_large

type = "lmlt"
#type = "lmlt_tiny"
#type = "lmlt_large"
dim = 60
n_blocks = 8
ffn_scale = 2.0
window_size = 8
drop_rate = 0
attn_drop_rate = 0
drop_path_rate = 0

man, man_tiny, man_light

type = "man"
#type = "man_tiny"
#type = "man_light"
n_resblocks = 36
n_resgroups = 1
n_colors = 3
n_feats = 180
res_scale = 1.0

mosr, mosr_t

type = "mosr"
#type = "mosr_t"
in_ch = 3
out_ch = 3
n_block = 24
dim = 64
upsampler = "ps" # "gps" "dys"
drop_path = 0.0
kernel_size = 7
expansion_ratio = 1.5
conv_ratio = 1.0

msdan

[network_g]
type = "msdan"
channels = 48
num_DFEB = 8

omnisr

[network_g]
type = "omnisr"
upsampling = 4 # value required, no defaults
window_size = 8 # value required, no defaults
num_in_ch = 3
num_out_ch = 3
num_feat = 64
res_num = 5
block_num = 1
bias = true
pe = true
ffn_bias = true

plainusr, plainusr_ultra, plainusr_large

type = "plainusr"
type = "plainusr_ultra"
type = "plainusr_large"
n_feat = 64
im_feat = [64, 48, 32]
attn_feat = 16

plksr, plksr_tiny

[network_g]
type = "plksr"
#type = "plksr_tiny"
dim = 64
n_blocks = 28
kernel_size = 17
split_ratio = 0.25
use_ea = true
ccm_type = "DCCM" # "CCM", "ICCM", "DCCM"
lk_type = "PLK" # "PLK", "SparsePLK", "RectSparsePLK"
sparse_kernels = [ 5, 5, 5, 5 ]
sparse_dilations = [ 1, 2, 3, 4 ]
with_idt = false

Note

The generator plksr uses learning rate of 5e-4 or lower. Different settings may result in incorrect convergence or NaN.


realplksr, realplksr_s

[network_g]
type = "realplksr"
#type = "realplksr_s"
dysample = false
dim = 64
n_blocks = 28
kernel_size = 17
split_ratio = 0.25
use_ea = true
norm_groups = 4
dropout = 0.0

Note

The generator realplksr uses learning rate of 5e-4 or lower. Different settings may result in incorrect convergence or NaN.


rgt, rgt_s

[network_g]
type = "rgt"
#type = "rgt_s"
img_size = 64
in_chans = 3
embed_dim = 180
depth = [ 6, 6, 6, 6, 6, 6, 6, 6 ]
num_heads = [ 6, 6, 6, 6, 6, 6, 6, 6 ]
mlp_ratio = 2.0
qkv_bias = true
#qk_scale = # None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
act_layer = "nn.GELU"
norm_layer = "nn.LayerNorm"
use_chk = false
img_range = 1.0
resi_connection = "1conv"
split_size = [ 8, 32 ]
c_ratio = 0.5

Note

rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.


eimn, eimn_a, eimn_l

[network_g]
type = "eimn"
#type = "eimn_a"
#type = "eimn_l"
embed_dims = 64
depths = 1
mlp_ratios = 2.66
drop_rate = 0.0
drop_path_rate = 0.0
num_stages = 16
freeze_param = false
norm = "nn.BatchNorm2d"

esrgan

[network_g]
type = "esrgan"
num_in_ch = 3
num_out_ch = 3
num_feat = 64
num_block = 23
num_grow_ch = 32

grformer, grformer_medium, grformer_large

[network_g]
type = "grformer"
#type = "grformer_medium"
#type = "grformer_large"
img_size = 64
in_chans = 3
window_size = [8, 32]
embed_dim = 60
depths = [ 6, 6, 6, 6 ]
num_heads = [ 3, 3, 3, 3 ]
mlp_ratio = 2.0
qkv_bias = true
#qk_scale = # None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
norm_layer = "nn.LayerNorm"
ape = false
patch_norm = true
img_range = 1.0

safmn, safmn_l, light_safmnpp

[network_g]
type = "safmn"
#type = "safmn_l"
bcie = false
dim = 36
n_blocks = 8
ffn_scale = 2.0

span

[network_g]
type = "span"
num_in_ch = 3
num_out_ch = 3
feature_channels = 48
bias = true
norm = false
img_range = 255 # only applied if norm = true
rgb_mean = [ 0.4488, 0.4371, 0.4040 ] # only applied if norm = true

Note

By default, SPAN is not fully compatible with original implementation. To make it compatible, use option norm: true. rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.


spanplus

[network_g]
type = "spanplus"
#type = "spanplus_sts"
#type = "spanplus_s"
#type = "spanplus_st"
num_in_ch = 3
num_out_ch = 3
blocks = [4]
feature_channels = 48
drop_rate = 0.0
upsampler = "dys"  # "lp", "ps", "conv"- only 1x

srformer_light, srformer_medium

[network_g]
type = "srformer_light"
#type = "srformer_medium"
window_size = 16
img_size = 64
patch_size = 1.0
in_chans = 3
embed_dim = 60
depths = [ 6, 6, 6, 6 ]
num_heads = [ 6, 6, 6, 6 ]
mlp_ratio = 2.0
qkv_bias = true
#qk_scale = # None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
ape = false
patch_norm = true
use_checkpoint = false
img_range = 1.0
upsampler = "pixelshuffledirect"
resi_connection = "1conv"

Note

rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.


swinir_small, swinir_medium

[network_g]
type = "swinir_small"
#type = "swinir_medium"
#type = "swinir_large"
flash_attn = false
window_size = 8
img_size = 32
patch_size = 1
in_chans = 3
embed_dim = 60
depths = [ 6, 6, 6, 6 ]
num_heads = [ 6, 6, 6, 6 ]
mlp_ratio = 2
qkv_bias = true
#qk_scale = # None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
ape = false
patch_norm = true
use_checkpoint = false
img_range = 1.0
upsampler = "pixelshuffle"
resi_connection = "1conv"

Note

The option flash_attn: true makes it incompatible with official implementation. rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.