Merge pull request #16 from the-database/dev

cast float32 for more losses, add missing scales for configs
the-database · Jun 28, 2024 · cc02af3 · cc02af3
2 parents 85b18eb + c819b6a
commit cc02af3
Show file tree

Hide file tree

Showing 15 changed files with 24 additions and 17 deletions.
diff --git a/options/train/Compact/Compact.yml b/options/train/Compact/Compact.yml
@@ -2,7 +2,7 @@
 # General Settings
 ####################
 name: 4x_Compact
-scale: 4  # 1, 2, 4
+scale: 4  # 1, 2, 3, 4, 8
 use_amp: true  # Speed up training and reduce VRAM usage.
 amp_bf16: true  # Use bf16 for AMP, RTX 3000 series or newer only.
 fast_matmul: false  # Trade precision for performance.

diff --git a/options/train/Compact/Compact_OTF.yml b/options/train/Compact/Compact_OTF.yml
@@ -2,7 +2,7 @@
 # General Settings
 ####################
 name: 4x_Compact_OTF
-scale: 4  # 1, 2, 4
+scale: 4  # 1, 2, 3, 4, 8
 use_amp: true  # Speed up training and reduce VRAM usage.
 amp_bf16: true  # Use bf16 for AMP, RTX 3000 series or newer only.
 fast_matmul: false  # Trade precision for performance.

diff --git a/options/train/DAT/DAT.yml b/options/train/DAT/DAT.yml
@@ -2,7 +2,7 @@
 # General Settings
 ####################
 name: 4x_DAT_2
-scale: 4  # 2, 3, 4
+scale: 4  # 1, 2, 3, 4, 8
 use_amp: true  # Speed up training and reduce VRAM usage.
 amp_bf16: true  # Use bf16 for AMP, RTX 3000 series or newer only.
 fast_matmul: false  # Trade precision for performance.

diff --git a/options/train/DAT/DAT_OTF.yml b/options/train/DAT/DAT_OTF.yml
@@ -2,7 +2,7 @@
 # General Settings
 ####################
 name: 4x_DAT_2_OTF
-scale: 4  # 2, 3, 4
+scale: 4  # 1, 2, 3, 4, 8
 use_amp: true  # Speed up training and reduce VRAM usage.
 amp_bf16: true  # Use bf16 for AMP, RTX 3000 series or newer only.
 fast_matmul: false  # Trade precision for performance.

diff --git a/options/train/OmniSR/OmniSR.yml b/options/train/OmniSR/OmniSR.yml
@@ -2,7 +2,7 @@
 # General Settings
 ####################
 name: 4x_OmniSR
-scale: 4  # 2, 3, 4
+scale: 4  # 1, 2, 3, 4, 8
 use_amp: true  # Speed up training and reduce VRAM usage.
 amp_bf16: true  # Use bf16 for AMP, RTX 3000 series or newer only.
 fast_matmul: false  # Trade precision for performance.

diff --git a/options/train/OmniSR/OmniSR_OTF.yml b/options/train/OmniSR/OmniSR_OTF.yml
@@ -2,7 +2,7 @@
 # General Settings
 ####################
 name: 4x_OmniSR_OTF
-scale: 4  # 2, 3, 4
+scale: 4  # 1, 2, 3, 4, 8
 use_amp: true  # Speed up training and reduce VRAM usage.
 amp_bf16: true  # Use bf16 for AMP, RTX 3000 series or newer only.
 fast_matmul: false  # Trade precision for performance.

diff --git a/options/train/PLKSR/PLKSR.yml b/options/train/PLKSR/PLKSR.yml
@@ -2,7 +2,7 @@
 # General Settings
 ####################
 name: 4x_PLKSR
-scale: 4  # 2, 3, 4
+scale: 4  # 1, 2, 3, 4, 8
 use_amp: true  # Speed up training and reduce VRAM usage.
 amp_bf16: true  # Use bf16 for AMP, RTX 3000 series or newer only.
 fast_matmul: false  # Trade precision for performance.

diff --git a/options/train/PLKSR/PLKSR_OTF.yml b/options/train/PLKSR/PLKSR_OTF.yml
@@ -2,7 +2,7 @@
 # General Settings
 ####################
 name: 4x_PLKSR_OTF
-scale: 4  # 2, 3, 4
+scale: 4  # 1, 2, 3, 4, 8
 use_amp: true  # Speed up training and reduce VRAM usage.
 amp_bf16: true  # Use bf16 for AMP, RTX 3000 series or newer only.
 fast_matmul: false  # Trade precision for performance.

diff --git a/options/train/RealPLKSR/RealPLKSR.yml b/options/train/RealPLKSR/RealPLKSR.yml
@@ -2,7 +2,7 @@
 # General Settings
 ####################
 name: 4x_RealPLKSR
-scale: 4  # 2, 3, 4
+scale: 4  # 1, 2, 3, 4, 8
 use_amp: true  # Speed up training and reduce VRAM usage.
 amp_bf16: true  # Use bf16 for AMP, RTX 3000 series or newer only.
 fast_matmul: false  # Trade precision for performance.

diff --git a/options/train/RealPLKSR/RealPLKSR_OTF.yml b/options/train/RealPLKSR/RealPLKSR_OTF.yml
@@ -2,7 +2,7 @@
 # General Settings
 ####################
 name: 4x_RealPLKSR_OTF
-scale: 4  # 2, 3, 4
+scale: 4  # 1, 2, 3, 4, 8
 use_amp: true  # Speed up training and reduce VRAM usage.
 amp_bf16: true  # Use bf16 for AMP, RTX 3000 series or newer only.
 fast_matmul: false  # Trade precision for performance.

diff --git a/scripts/options/generate_default_options.py b/scripts/options/generate_default_options.py
@@ -10,7 +10,6 @@ class ArchInfo(TypedDict):
 
 
 ALL_SCALES = [1, 2, 3, 4, 8]
-SCALES_234 = [2, 3, 4]
 
 
 def final_template(template: str, arch: ArchInfo) -> str:
@@ -46,19 +45,19 @@ def final_template(template: str, arch: ArchInfo) -> str:
         "extras": {"use_pixel_unshuffle": "true"},
     },
     {"names": ["ATD"], "scales": ALL_SCALES},
-    {"names": ["DAT_2"], "scales": SCALES_234},
+    {"names": ["DAT_2"], "scales": ALL_SCALES},
     {"names": ["HAT_L", "HAT_M", "HAT_S"], "scales": ALL_SCALES},
-    {"names": ["OmniSR"], "scales": SCALES_234},
-    {"names": ["PLKSR"], "scales": SCALES_234},
-    {"names": ["RealPLKSR"], "scales": SCALES_234},
+    {"names": ["OmniSR"], "scales": ALL_SCALES},
+    {"names": ["PLKSR"], "scales": ALL_SCALES},
+    {"names": ["RealPLKSR"], "scales": ALL_SCALES},
     {
         "names": ["RealCUGAN"],
-        "scales": SCALES_234,
+        "scales": [2, 3, 4],
         "extras": {"pro": "true", "fast": "false"},
     },
     {"names": ["SPAN"], "scales": [2, 4]},
     {"names": ["SRFormer", "SRFormer_light"], "scales": ALL_SCALES},
-    {"names": ["Compact", "UltraCompact", "SuperUltraCompact"], "scales": [1, 2, 4]},
+    {"names": ["Compact", "UltraCompact", "SuperUltraCompact"], "scales": ALL_SCALES},
     {"names": ["SwinIR_L", "SwinIR_M", "SwinIR_S"], "scales": ALL_SCALES},
     {"names": ["RGT", "RGT_S"], "scales": ALL_SCALES},
     {"names": ["DRCT", "DRCT_L", "DRCT_XL"], "scales": ALL_SCALES},

diff --git a/traiNNer/losses/basic_loss.py b/traiNNer/losses/basic_loss.py
@@ -122,6 +122,7 @@ def __init__(
         self.reduction = reduction
         self.eps = eps
 
+    @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
     def forward(
         self, pred: Tensor, target: Tensor, weight: Tensor | None = None, **kwargs
     ) -> Tensor:
@@ -156,6 +157,7 @@ def __init__(
         else:
             raise NotImplementedError(f"{criterion} criterion has not been supported.")
 
+    @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
     def forward(self, x: Tensor, y: Tensor) -> Tensor:
         input_yuv = rgb2ycbcr_pt(x)
         target_yuv = rgb2ycbcr_pt(y)
@@ -218,6 +220,7 @@ def __init__(
         else:
             raise NotImplementedError(f"{criterion} criterion has not been supported.")
 
+    @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
     def forward(self, x: Tensor, y: Tensor) -> Tensor:
         return self.criterion(self.ds_f(x), self.ds_f(y)) * self.loss_weight
 
@@ -238,6 +241,7 @@ def __init__(self, criterion: str = "l1", loss_weight: float = 1.0) -> None:
         else:
             raise NotImplementedError(f"{criterion} criterion has not been supported.")
 
+    @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
     def forward(self, x: Tensor, y: Tensor) -> Tensor:
         x_luma = rgb_to_luma(x)
         y_luma = rgb_to_luma(y)

diff --git a/traiNNer/losses/contextual_loss.py b/traiNNer/losses/contextual_loss.py
@@ -87,6 +87,7 @@ def __init__(
         else:  # if calc_type == 'regular':
             self.calculate_loss = self.calculate_cx_loss
 
+    @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
     def forward(self, images: Tensor, gt: Tensor) -> Tensor:
         device = images.device
 

diff --git a/traiNNer/losses/gan_loss.py b/traiNNer/losses/gan_loss.py
@@ -93,6 +93,7 @@ def get_target_label(self, input: Tensor, target_is_real: bool) -> Tensor | bool
         target_val = self.real_label_val if target_is_real else self.fake_label_val
         return input.new_ones(input.size()) * target_val
 
+    @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
     def forward(
         self, input: Tensor, target_is_real: bool, is_disc: bool = False
     ) -> Tensor:
@@ -137,6 +138,7 @@ def __init__(
     ) -> None:
         super().__init__(gan_type, real_label_val, fake_label_val, loss_weight)
 
+    @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
     def forward(
         self, input: Tensor | list[Tensor], target_is_real: bool, is_disc: bool = False
     ) -> Tensor:

diff --git a/traiNNer/losses/perceptual_loss.py b/traiNNer/losses/perceptual_loss.py
@@ -75,6 +75,7 @@ def __init__(
         else:
             raise NotImplementedError(f"{criterion} criterion has not been supported.")
 
+    @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
     def forward(self, x: Tensor, gt: Tensor) -> tuple[Tensor | None, Tensor | None]:
         """Forward function.