Add support for FFTformer (#171)

chaiNNer-org · Feb 22, 2024 · c793221 · c793221
1 parent 746f88d
commit c793221
Show file tree

Hide file tree

Showing 7 changed files with 591 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -102,6 +102,7 @@ Spandrel currently supports a limited amount of network architectures. If the ar
 - [KBNet](https://github.com/zhangyi-3/KBNet) | [Models](https://mycuhk-my.sharepoint.com/personal/1155135732_link_cuhk_edu_hk/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2F1155135732%5Flink%5Fcuhk%5Fedu%5Fhk%2FDocuments%2Fshare%2FKBNet%2FDenoising%2Fpretrained%5Fmodels)
 - [NAFNet](https://github.com/megvii-research/NAFNet) | [Models](https://github.com/megvii-research/NAFNet#results-and-pre-trained-models)
 - [Restormer](https://github.com/swz30/Restormer) | [Models](https://github.com/swz30/Restormer/releases/tag/v1.0)
+- [FFTformer](https://github.com/kkkls/FFTformer) | [Models](https://github.com/kkkls/FFTformer/releases/tag/pretrain_model)
 
 #### DeJPEG
 

diff --git a/src/spandrel/__helpers/main_registry.py b/src/spandrel/__helpers/main_registry.py
@@ -18,6 +18,7 @@
     Compact,
     DDColor,
     FeMaSR,
+    FFTformer,
     KBNet,
     LaMa,
     MMRealSR,
@@ -71,6 +72,7 @@
     ArchSupport.from_architecture(RealCUGAN.RealCUGANArch()),
     ArchSupport.from_architecture(DDColor.DDColorArch()),
     ArchSupport.from_architecture(SAFMN.SAFMNArch()),
+    ArchSupport.from_architecture(FFTformer.FFTformerArch()),
     ArchSupport.from_architecture(NAFNet.NAFNetArch()),
     ArchSupport.from_architecture(Restormer.RestormerArch()),
     ArchSupport.from_architecture(ESRGAN.ESRGANArch()),

diff --git a/src/spandrel/architectures/FFTformer/__init__.py b/src/spandrel/architectures/FFTformer/__init__.py
@@ -0,0 +1,100 @@
+from __future__ import annotations
+
+from typing_extensions import override
+
+from spandrel.util import KeyCondition, get_seq_len
+
+from ...__helpers.model_descriptor import (
+    Architecture,
+    ImageModelDescriptor,
+    SizeRequirements,
+    StateDict,
+)
+from .arch.fftformer_arch import FFTformer
+
+
+class FFTformerArch(Architecture[FFTformer]):
+    def __init__(self) -> None:
+        super().__init__(
+            id="FFTformer",
+            detect=KeyCondition.has_all(
+                "patch_embed.proj.weight",
+                "encoder_level1.0.norm2.body.weight",
+                "encoder_level1.0.norm2.body.bias",
+                "encoder_level1.0.ffn.fft",
+                "encoder_level1.0.ffn.project_in.weight",
+                "encoder_level1.0.ffn.dwconv.weight",
+                "encoder_level1.0.ffn.project_out.weight",
+                "down1_2.body.1.weight",
+                "encoder_level2.0.ffn.fft",
+                "down2_3.body.1.weight",
+                "encoder_level3.0.ffn.fft",
+                "decoder_level3.0.attn.to_hidden.weight",
+                "decoder_level3.0.attn.norm.body.weight",
+                "up3_2.body.1.weight",
+                "reduce_chan_level2.weight",
+                "decoder_level2.0.attn.to_hidden.weight",
+                "up2_1.body.1.weight",
+                "decoder_level1.0.attn.to_hidden.weight",
+                "refinement.0.norm1.body.weight",
+                "refinement.0.attn.to_hidden.weight",
+                "refinement.0.ffn.fft",
+                "fuse2.att_channel.norm2.body.weight",
+                "fuse2.att_channel.ffn.fft",
+                "fuse2.conv.weight",
+                "fuse1.att_channel.norm2.body.weight",
+                "fuse1.att_channel.ffn.fft",
+                "fuse1.conv.weight",
+                "output.weight",
+            ),
+        )
+
+    @override
+    def load(self, state_dict: StateDict) -> ImageModelDescriptor[FFTformer]:
+        inp_channels = 3
+        out_channels = 3
+        dim = 48
+        num_blocks = [6, 6, 12]
+        num_refinement_blocks = 4
+        ffn_expansion_factor = 3
+        bias = False
+
+        inp_channels = state_dict["patch_embed.proj.weight"].shape[1]
+        out_channels = state_dict["output.weight"].shape[0]
+        dim = state_dict["patch_embed.proj.weight"].shape[0]
+
+        num_blocks[0] = get_seq_len(state_dict, "encoder_level1")
+        num_blocks[1] = get_seq_len(state_dict, "encoder_level2")
+        num_blocks[2] = get_seq_len(state_dict, "encoder_level3")
+
+        num_refinement_blocks = get_seq_len(state_dict, "refinement")
+
+        # hidden_dim = int(dim * ffn_expansion_factor)
+        hidden_dim = state_dict["encoder_level1.0.ffn.project_out.weight"].shape[1]
+        ffn_expansion_factor = hidden_dim / dim
+
+        bias = "encoder_level1.0.ffn.project_in.bias" in state_dict
+
+        model = FFTformer(
+            inp_channels=inp_channels,
+            out_channels=out_channels,
+            dim=dim,
+            num_blocks=num_blocks,
+            num_refinement_blocks=num_refinement_blocks,
+            ffn_expansion_factor=ffn_expansion_factor,
+            bias=bias,
+        )
+
+        return ImageModelDescriptor(
+            model,
+            state_dict,
+            architecture=self,
+            purpose="Restoration",
+            tags=[f"{dim}dim"],
+            supports_half=False,  # TODO: verify
+            supports_bfloat16=True,
+            scale=1,
+            input_channels=inp_channels,
+            output_channels=out_channels,
+            size_requirements=SizeRequirements(multiple_of=32),
+        )
diff --git a/src/spandrel/architectures/FFTformer/arch/LICENSE b/src/spandrel/architectures/FFTformer/arch/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 kkkls
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.