brycedrennan · brycedrennan · Jan 16, 2024 · Jan 20, 2024 · Jan 20, 2024
diff --git a/imaginairy/api/generate_compvis.py b/imaginairy/api/generate_compvis.py
@@ -30,7 +30,7 @@ def _generate_single_image(
     from imaginairy.enhancers.clip_masking import get_img_mask
     from imaginairy.enhancers.describe_image_blip import generate_caption
     from imaginairy.enhancers.face_restoration_codeformer import enhance_faces
-    from imaginairy.enhancers.upscale_realesrgan import upscale_image
+    from imaginairy.enhancers.upscalers.realesrgan import upscale_image
     from imaginairy.modules.midas.api import torch_image_to_depth_map
     from imaginairy.samplers import SOLVER_LOOKUP
     from imaginairy.samplers.editing import CFGEditingDenoiser
@@ -534,7 +534,7 @@ def _generate_composition_image(
     result = _generate_single_image(composition_prompt, dtype=dtype)
     img = result.images["generated"]
     while img.width < target_width:
-        from imaginairy.enhancers.upscale_realesrgan import upscale_image
+        from imaginairy.enhancers.upscalers.realesrgan import upscale_image
 
         img = upscale_image(img)
 

diff --git a/imaginairy/api/generate_refiners.py b/imaginairy/api/generate_refiners.py
@@ -35,7 +35,7 @@ def generate_single_image(
     from imaginairy.enhancers.clip_masking import get_img_mask
     from imaginairy.enhancers.describe_image_blip import generate_caption
     from imaginairy.enhancers.face_restoration_codeformer import enhance_faces
-    from imaginairy.enhancers.upscale_realesrgan import upscale_image
+    from imaginairy.enhancers.upscalers.realesrgan import upscale_image
     from imaginairy.samplers import SolverName
     from imaginairy.schema import ImagineResult
     from imaginairy.utils import get_device, randn_seeded
@@ -587,7 +587,7 @@ def _generate_composition_image(
     )
     img = result.images["generated"]
     while img.width < target_width:
-        from imaginairy.enhancers.upscale_realesrgan import upscale_image
+        from imaginairy.enhancers.upscalers.realesrgan import upscale_image
 
         if prompt.fix_faces:
             from imaginairy.enhancers.face_restoration_codeformer import enhance_faces

diff --git a/imaginairy/cli/upscale.py b/imaginairy/cli/upscale.py
@@ -32,7 +32,7 @@ def upscale_cmd(image_filepaths, outdir, fix_faces, fix_faces_fidelity):
     from tqdm import tqdm
 
     from imaginairy.enhancers.face_restoration_codeformer import enhance_faces
-    from imaginairy.enhancers.upscale_realesrgan import upscale_image
+    from imaginairy.enhancers.upscalers.realesrgan import upscale_image
     from imaginairy.schema import LazyLoadingImage
     from imaginairy.utils import glob_expand_paths
 

diff --git a/imaginairy/enhancers/upscalers/__init__.py b/imaginairy/enhancers/upscalers/__init__.py
diff --git a/imaginairy/enhancers/upscalers/city96.py b/imaginairy/enhancers/upscalers/city96.py
@@ -0,0 +1,81 @@
+from typing import Literal
+
+import torch
+import torch.nn as nn
+from safetensors.torch import load_file
+
+from imaginairy.utils.downloads import get_cached_url_path
+
+LatentVerType = Literal["v1", "xl"]
+ScaleFactorType = Literal["1.25", "1.5", "2.0"]
+
+
+class Upscaler(nn.Module):
+    """
+    Basic NN layout, ported from:
+    https://github.com/city96/SD-Latent-Upscaler/blob/main/upscaler.py
+    """
+
+    version = 2.1  # network revision
+
+    def head(self):
+        return [
+            nn.Conv2d(self.chan, self.size, kernel_size=self.krn, padding=self.pad),
+            nn.ReLU(),
+            nn.Upsample(scale_factor=self.fac, mode="nearest"),
+            nn.ReLU(),
+        ]
+
+    def core(self):
+        layers = []
+        for _ in range(self.depth):
+            layers += [
+                nn.Conv2d(self.size, self.size, kernel_size=self.krn, padding=self.pad),
+                nn.ReLU(),
+            ]
+        return layers
+
+    def tail(self):
+        return [
+            nn.Conv2d(self.size, self.chan, kernel_size=self.krn, padding=self.pad),
+        ]
+
+    def __init__(self, fac, depth=16):
+        super().__init__()
+        self.size = 64  # Conv2d size
+        self.chan = 4  # in/out channels
+        self.depth = depth  # no. of layers
+        self.fac = fac  # scale factor
+        self.krn = 3  # kernel size
+        self.pad = 1  # padding
+
+        self.sequential = nn.Sequential(
+            *self.head(),
+            *self.core(),
+            *self.tail(),
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.sequential(x)
+
+
+def upscale_latent(
+    latent: torch.Tensor, latent_ver: LatentVerType, scale_factor: ScaleFactorType
+):
+    model = Upscaler(scale_factor)
+    orig_dtype, orig_device = latent.dtype, latent.device
+    latent = latent.to(dtype=torch.float32, device="cpu")
+    latent = latent / 0.13025
+    filename = (
+        f"latent-upscaler-v{model.version}_SD{latent_ver}-x{scale_factor}.safetensors"
+    )
+    weights_url = f"https://huggingface.co/city96/SD-Latent-Upscaler/resolve/99c65021fa947dfe3d71ec4e24793fe7533a3322/{filename}"
+    weights_path = get_cached_url_path(weights_url)
+
+    model.load_state_dict(load_file(weights_path), assign=True)
+
+    big_latent = model(latent)
+    big_latent = big_latent.to(dtype=orig_dtype, device=orig_device)
+    del model
+    big_latent = big_latent * 0.13025
+    return big_latent
diff --git a/imaginairy/enhancers/upscale_realesrgan.py → imaginairy/enhancers/upscalers/realesrgan.py b/imaginairy/enhancers/upscale_realesrgan.py → imaginairy/enhancers/upscalers/realesrgan.py
@@ -12,7 +12,7 @@
 
 
 @memory_managed_model("realesrgan_upsampler", memory_usage_mb=70)
-def realesrgan_upsampler(tile=512, tile_pad=50, ultrasharp=False):
+def realesrgan_upsampler(tile=512, tile_pad=50, ultrasharp=False, weights_url=None):
     model = RRDBNet(
         num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4
     )
@@ -21,6 +21,9 @@ def realesrgan_upsampler(tile=512, tile_pad=50, ultrasharp=False):
     else:
         url = "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth"
 
+    if weights_url:
+        url = weights_url
+
     model_path = get_cached_url_path(url)
     device = get_device()
 
@@ -40,11 +43,11 @@ def realesrgan_upsampler(tile=512, tile_pad=50, ultrasharp=False):
     return upsampler
 
 
-def upscale_image(img, ultrasharp=False):
+def upscale_image(img, ultrasharp=False, weights_url=None):
     img = img.convert("RGB")
 
     np_img = np.array(img, dtype=np.uint8)
-    upsampler_output, img_mode = realesrgan_upsampler(ultrasharp=ultrasharp).enhance(
-        np_img[:, :, ::-1]
-    )
+    upsampler_output, img_mode = realesrgan_upsampler(
+        ultrasharp=ultrasharp, weights_url=weights_url
+    ).enhance(np_img[:, :, ::-1])
     return Image.fromarray(upsampler_output[:, :, ::-1], mode=img_mode)
diff --git a/imaginairy/enhancers/upscale_riverwing.py → imaginairy/enhancers/upscalers/riverwing.py b/imaginairy/enhancers/upscale_riverwing.py → imaginairy/enhancers/upscalers/riverwing.py
diff --git a/imaginairy/utils/model_manager.py b/imaginairy/utils/model_manager.py
@@ -603,8 +603,8 @@ def load_sdxl_pipeline_from_diffusers_weights(
     text_encoder.load_state_dict(text_encoder_weights, assign=True)
     del text_encoder_weights
     lda = lda.to(device=device, dtype=torch.float32)
-    unet = unet.to(device=device)
-    text_encoder = text_encoder.to(device=device)
+    unet = unet.to(device=device, dtype=dtype)
+    text_encoder = text_encoder.to(device=device, dtype=dtype)
     if for_inpainting:
         StableDiffusionCls = StableDiffusion_XL_Inpainting
     else:

diff --git a/tests/data/upscale_challenges/freckles.jpg b/tests/data/upscale_challenges/freckles.jpg
diff --git a/tests/data/sand_upscale_difficult.jpg → tests/data/upscale_challenges/sand.jpg b/tests/data/sand_upscale_difficult.jpg → tests/data/upscale_challenges/sand.jpg
diff --git a/tests/test_cli/test_cmds.py b/tests/test_cli/test_cmds.py
@@ -144,12 +144,12 @@ def mock_surprise_me_prompts(*args, **kwargs):
 
 
 def test_upscale(monkeypatch):
-    from imaginairy.enhancers import upscale_realesrgan
+    from imaginairy.enhancers.upscalers import realesrgan
 
     def mock_upscale_image(*args, **kwargs):
         return LazyLoadingImage(filepath=f"{TESTS_FOLDER}/data/dog.jpg")
 
-    monkeypatch.setattr(upscale_realesrgan, "upscale_image", mock_upscale_image)
+    monkeypatch.setattr(realesrgan, "upscale_image", mock_upscale_image)
     runner = CliRunner()
     result = runner.invoke(
         upscale_cmd,

diff --git a/tests/test_enhancers/test_upscale_realesrgan.py b/tests/test_enhancers/test_upscale_realesrgan.py
diff --git a/tests/test_enhancers/test_upscalers/__init__.py b/tests/test_enhancers/test_upscalers/__init__.py
diff --git a/tests/test_enhancers/test_upscalers/test_upscale_realesrgan.py b/tests/test_enhancers/test_upscalers/test_upscale_realesrgan.py
@@ -0,0 +1,47 @@
+import os
+
+import pytest
+from PIL import Image
+
+from imaginairy.enhancers.upscalers.realesrgan import upscale_image
+from tests import TESTS_FOLDER
+from tests.utils import assert_image_similar_to_expectation
+
+upscale_challenges_folder = f"{TESTS_FOLDER}/data/upscale_challenges"
+def test_upscale_textured_image(filename_base_for_outputs):
+    img = Image.open(f"{upscale_challenges_folder}/sand.jpg")
+    upscaled_image = upscale_image(img, ultrasharp=True)
+    assert_image_similar_to_expectation(
+        upscaled_image, f"{filename_base_for_outputs}.jpg", threshold=25000
+    )
+
+
+@pytest.mark.skip()
+def test_upscalers_difficult_images(filename_base_for_outputs):
+    weight_urls = [
+        # "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth",  # blurry on sand
+        "https://huggingface.co/lokCX/4x-Ultrasharp/resolve/1856559b50de25116a7c07261177dd128f1f5664/4x-UltraSharp.pth",
+        "https://github.com/Phhofm/models/raw/main/4xLSDIRplus/4xLSDIRplusC.pth",
+        "https://github.com/Phhofm/models/raw/main/4xLSDIRplus/4xLSDIRplusN.pth",
+        # "https://github.com/Phhofm/models/raw/main/4xLSDIRplus/4xLSDIRplusR.pth",  # blurry on sand
+        "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/4x_RealisticRescaler_100000_G.pth",
+        "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/UniversalUpscaler/4x_UniversalUpscalerV2-Neutral_115000_swaG.pth",
+        "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/UniversalUpscaler/4x_UniversalUpscalerV2-Sharper_103000_G.pth",
+        "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/UniversalUpscaler/4x_UniversalUpscalerV2-Sharp_101000_G.pth",
+        "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/4x_foolhardy_Remacri.pth",
+        "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/4x_Valar_v1.pth",
+        "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/4x_NMKDSuperscale_Artisoft_120000_G.pth",
+        "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/4x_NMKD-Superscale-SP_178000_G.pth",
+        "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/4xPSNR.pth",
+    ]
+    for img_filename in os.listdir(upscale_challenges_folder):
+        if not img_filename.endswith(".jpg"):
+            continue
+        img_name = img_filename.split(".")[0]
+        img = Image.open(f"{upscale_challenges_folder}/{img_filename}")
+        for url in weight_urls:
+            weights_filename = url.split("/")[-1]
+            upscaled_image = upscale_image(img, weights_url=url)
+            upscaled_image.save(
+                f"{filename_base_for_outputs}_{img_name}_{weights_filename}.jpg"
+            )
diff --git a/tests/test_modules/test_autoencoders.py b/tests/test_modules/test_autoencoders.py
@@ -3,7 +3,7 @@
 from PIL import Image
 from torch.nn.functional import interpolate
 
-from imaginairy.enhancers.upscale_riverwing import upscale_latent
+from imaginairy.enhancers.upscalers.riverwing import upscale_latent
 from imaginairy.schema import LazyLoadingImage
 from imaginairy.utils import get_device
 from imaginairy.utils.img_utils import (