Merge branch 'dev' into master

AUTOMATIC1111 · Mar 6, 2024 · ea3aae9 · ea3aae9
2 parents db4632f + 8904e00
commit ea3aae9
Show file tree

Hide file tree

Showing 167 changed files with 6,293 additions and 5,809 deletions.
diff --git a/.eslintrc.js b/.eslintrc.js
@@ -78,6 +78,8 @@ module.exports = {
         //extraNetworks.js
         requestGet: "readonly",
         popup: "readonly",
+        // profilerVisualization.js
+        createVisualizationTable: "readonly",
         // from python
         localization: "readonly",
         // progrssbar.js
@@ -86,8 +88,6 @@ module.exports = {
         // imageviewer.js
         modalPrevImage: "readonly",
         modalNextImage: "readonly",
-        // token-counters.js
-        setupTokenCounters: "readonly",
         // localStorage.js
         localSet: "readonly",
         localGet: "readonly",

diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml
@@ -20,6 +20,12 @@ jobs:
           cache-dependency-path: |
             **/requirements*txt
             launch.py
+      - name: Cache models
+        id: cache-models
+        uses: actions/cache@v3
+        with:
+          path: models
+          key: "2023-12-30"
       - name: Install test dependencies
         run: pip install wait-for-it -r requirements-test.txt
         env:
@@ -33,6 +39,8 @@ jobs:
           TORCH_INDEX_URL: https://download.pytorch.org/whl/cpu
           WEBUI_LAUNCH_LIVE_OUTPUT: "1"
           PYTHONUNBUFFERED: "1"
+      - name: Print installed packages
+        run: pip freeze
       - name: Start test server
         run: >
           python -m coverage run
@@ -49,7 +57,7 @@ jobs:
           2>&1 | tee output.txt &
       - name: Run tests
         run: |
-          wait-for-it --service 127.0.0.1:7860 -t 600
+          wait-for-it --service 127.0.0.1:7860 -t 20
           python -m pytest -vv --junitxml=test/results.xml --cov . --cov-report=xml --verify-base-url test
       - name: Kill test server
         if: always()

diff --git a/.gitignore b/.gitignore
@@ -37,3 +37,4 @@ notification.mp3
 /node_modules
 /package-lock.json
 /.coverage*
+/test/test_outputs
diff --git a/CHANGELOG.md b/CHANGELOG.md
diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 # Stable Diffusion web UI
-A browser interface based on Gradio library for Stable Diffusion.
+A web interface for Stable Diffusion, implemented using Gradio library.
 
 ![](screenshot.png)
 
@@ -151,11 +151,12 @@ Licenses for borrowed code can be found in `Settings -> Licenses` screen, and al
 
 - Stable Diffusion - https://github.com/Stability-AI/stablediffusion, https://github.com/CompVis/taming-transformers
 - k-diffusion - https://github.com/crowsonkb/k-diffusion.git
-- GFPGAN - https://github.com/TencentARC/GFPGAN.git
-- CodeFormer - https://github.com/sczhou/CodeFormer
-- ESRGAN - https://github.com/xinntao/ESRGAN
-- SwinIR - https://github.com/JingyunLiang/SwinIR
-- Swin2SR - https://github.com/mv-lab/swin2sr
+- Spandrel - https://github.com/chaiNNer-org/spandrel implementing
+  - GFPGAN - https://github.com/TencentARC/GFPGAN.git
+  - CodeFormer - https://github.com/sczhou/CodeFormer
+  - ESRGAN - https://github.com/xinntao/ESRGAN
+  - SwinIR - https://github.com/JingyunLiang/SwinIR
+  - Swin2SR - https://github.com/mv-lab/swin2sr
 - LDSR - https://github.com/Hafiidz/latent-diffusion
 - MiDaS - https://github.com/isl-org/MiDaS
 - Ideas for optimizations - https://github.com/basujindal/stable-diffusion

diff --git a/_typos.toml b/_typos.toml
@@ -0,0 +1,5 @@
+[default.extend-words]
+# Part of "RGBa" (Pillow's pre-multiplied alpha RGB mode)
+Ba = "Ba"
+# HSA is something AMD uses for their GPUs
+HSA = "HSA"
diff --git a/configs/sd_xl_inpaint.yaml b/configs/sd_xl_inpaint.yaml
@@ -0,0 +1,98 @@
+model:
+  target: sgm.models.diffusion.DiffusionEngine
+  params:
+    scale_factor: 0.13025
+    disable_first_stage_autocast: True
+
+    denoiser_config:
+      target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
+      params:
+        num_idx: 1000
+
+        weighting_config:
+          target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
+        scaling_config:
+          target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
+        discretization_config:
+          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
+
+    network_config:
+      target: sgm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        adm_in_channels: 2816
+        num_classes: sequential
+        use_checkpoint: True
+        in_channels: 9
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [4, 2]
+        num_res_blocks: 2
+        channel_mult: [1, 2, 4]
+        num_head_channels: 64
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: [1, 2, 10]  # note: the first is unused (due to attn_res starting at 2) 32, 16, 8 --> 64, 32, 16
+        context_dim: 2048
+        spatial_transformer_attn_type: softmax-xformers
+        legacy: False
+
+    conditioner_config:
+      target: sgm.modules.GeneralConditioner
+      params:
+        emb_models:
+          # crossattn cond
+          - is_trainable: False
+            input_key: txt
+            target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
+            params:
+              layer: hidden
+              layer_idx: 11
+          # crossattn and vector cond
+          - is_trainable: False
+            input_key: txt
+            target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
+            params:
+              arch: ViT-bigG-14
+              version: laion2b_s39b_b160k
+              freeze: True
+              layer: penultimate
+              always_return_pooled: True
+              legacy: False
+          # vector cond
+          - is_trainable: False
+            input_key: original_size_as_tuple
+            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
+            params:
+              outdim: 256  # multiplied by two
+          # vector cond
+          - is_trainable: False
+            input_key: crop_coords_top_left
+            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
+            params:
+              outdim: 256  # multiplied by two
+          # vector cond
+          - is_trainable: False
+            input_key: target_size_as_tuple
+            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
+            params:
+              outdim: 256  # multiplied by two
+
+    first_stage_config:
+      target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          attn_type: vanilla-xformers
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult: [1, 2, 4, 4]
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
diff --git a/extensions-builtin/LDSR/sd_hijack_ddpm_v1.py b/extensions-builtin/LDSR/sd_hijack_ddpm_v1.py
@@ -301,7 +301,7 @@ def p_losses(self, x_start, t, noise=None):
         elif self.parameterization == "x0":
             target = x_start
         else:
-            raise NotImplementedError(f"Paramterization {self.parameterization} not yet supported")
+            raise NotImplementedError(f"Parameterization {self.parameterization} not yet supported")
 
         loss = self.get_loss(model_out, target, mean=False).mean(dim=[1, 2, 3])
 
@@ -880,7 +880,7 @@ def forward(self, x, c, *args, **kwargs):
     def apply_model(self, x_noisy, t, cond, return_ids=False):
 
         if isinstance(cond, dict):
-            # hybrid case, cond is exptected to be a dict
+            # hybrid case, cond is expected to be a dict
             pass
         else:
             if not isinstance(cond, list):
@@ -916,7 +916,7 @@ def apply_model(self, x_noisy, t, cond, return_ids=False):
                 cond_list = [{c_key: [c[:, :, :, :, i]]} for i in range(c.shape[-1])]
 
             elif self.cond_stage_key == 'coordinates_bbox':
-                assert 'original_image_size' in self.split_input_params, 'BoudingBoxRescaling is missing original_image_size'
+                assert 'original_image_size' in self.split_input_params, 'BoundingBoxRescaling is missing original_image_size'
 
                 # assuming padding of unfold is always 0 and its dilation is always 1
                 n_patches_per_row = int((w - ks[0]) / stride[0] + 1)
@@ -926,7 +926,7 @@ def apply_model(self, x_noisy, t, cond, return_ids=False):
                 num_downs = self.first_stage_model.encoder.num_resolutions - 1
                 rescale_latent = 2 ** (num_downs)
 
-                # get top left postions of patches as conforming for the bbbox tokenizer, therefore we
+                # get top left positions of patches as conforming for the bbbox tokenizer, therefore we
                 # need to rescale the tl patch coordinates to be in between (0,1)
                 tl_patch_coordinates = [(rescale_latent * stride[0] * (patch_nr % n_patches_per_row) / full_img_w,
                                          rescale_latent * stride[1] * (patch_nr // n_patches_per_row) / full_img_h)

diff --git a/extensions-builtin/Lora/lyco_helpers.py b/extensions-builtin/Lora/lyco_helpers.py
@@ -30,7 +30,7 @@ def factorization(dimension: int, factor:int=-1) -> tuple[int, int]:
     In LoRA with Kroneckor Product, first value is a value for weight scale.
     secon value is a value for weight.
 
-    Becuase of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different.
+    Because of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different.
 
     examples)
     factor

diff --git a/extensions-builtin/Lora/network.py b/extensions-builtin/Lora/network.py
@@ -3,6 +3,9 @@
 from collections import namedtuple
 import enum
 
+import torch.nn as nn
+import torch.nn.functional as F
+
 from modules import sd_models, cache, errors, hashes, shared
 
 NetworkWeights = namedtuple('NetworkWeights', ['network_key', 'sd_key', 'w', 'sd_module'])
@@ -115,6 +118,29 @@ def __init__(self, net: Network, weights: NetworkWeights):
         if hasattr(self.sd_module, 'weight'):
             self.shape = self.sd_module.weight.shape
 
+        self.ops = None
+        self.extra_kwargs = {}
+        if isinstance(self.sd_module, nn.Conv2d):
+            self.ops = F.conv2d
+            self.extra_kwargs = {
+                'stride': self.sd_module.stride,
+                'padding': self.sd_module.padding
+            }
+        elif isinstance(self.sd_module, nn.Linear):
+            self.ops = F.linear
+        elif isinstance(self.sd_module, nn.LayerNorm):
+            self.ops = F.layer_norm
+            self.extra_kwargs = {
+                'normalized_shape': self.sd_module.normalized_shape,
+                'eps': self.sd_module.eps
+            }
+        elif isinstance(self.sd_module, nn.GroupNorm):
+            self.ops = F.group_norm
+            self.extra_kwargs = {
+                'num_groups': self.sd_module.num_groups,
+                'eps': self.sd_module.eps
+            }
+
         self.dim = None
         self.bias = weights.w.get("bias")
         self.alpha = weights.w["alpha"].item() if "alpha" in weights.w else None
@@ -137,7 +163,7 @@ def calc_scale(self):
     def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
         if self.bias is not None:
             updown = updown.reshape(self.bias.shape)
-            updown += self.bias.to(orig_weight.device, dtype=orig_weight.dtype)
+            updown += self.bias.to(orig_weight.device, dtype=updown.dtype)
             updown = updown.reshape(output_shape)
 
         if len(output_shape) == 4:
@@ -155,5 +181,10 @@ def calc_updown(self, target):
         raise NotImplementedError()
 
     def forward(self, x, y):
-        raise NotImplementedError()
+        """A general forward implementation for all modules"""
+        if self.ops is None:
+            raise NotImplementedError()
+        else:
+            updown, ex_bias = self.calc_updown(self.sd_module.weight)
+            return y + self.ops(x, weight=updown, bias=ex_bias, **self.extra_kwargs)
 
diff --git a/extensions-builtin/Lora/network_full.py b/extensions-builtin/Lora/network_full.py
@@ -18,9 +18,9 @@ def __init__(self,  net: network.Network, weights: network.NetworkWeights):
 
     def calc_updown(self, orig_weight):
         output_shape = self.weight.shape
-        updown = self.weight.to(orig_weight.device, dtype=orig_weight.dtype)
+        updown = self.weight.to(orig_weight.device)
         if self.ex_bias is not None:
-            ex_bias = self.ex_bias.to(orig_weight.device, dtype=orig_weight.dtype)
+            ex_bias = self.ex_bias.to(orig_weight.device)
         else:
             ex_bias = None
 

diff --git a/extensions-builtin/Lora/network_glora.py b/extensions-builtin/Lora/network_glora.py
@@ -22,12 +22,12 @@ def __init__(self,  net: network.Network, weights: network.NetworkWeights):
         self.w2b = weights.w["b2.weight"]
 
     def calc_updown(self, orig_weight):
-        w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
-        w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
-        w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
-        w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+        w1a = self.w1a.to(orig_weight.device)
+        w1b = self.w1b.to(orig_weight.device)
+        w2a = self.w2a.to(orig_weight.device)
+        w2b = self.w2b.to(orig_weight.device)
 
         output_shape = [w1a.size(0), w1b.size(1)]
-        updown = ((w2b @ w1b) + ((orig_weight @ w2a) @ w1a))
+        updown = ((w2b @ w1b) + ((orig_weight.to(dtype = w1a.dtype) @ w2a) @ w1a))
 
         return self.finalize_updown(updown, orig_weight, output_shape)
diff --git a/extensions-builtin/Lora/network_hada.py b/extensions-builtin/Lora/network_hada.py
@@ -27,16 +27,16 @@ def __init__(self,  net: network.Network, weights: network.NetworkWeights):
         self.t2 = weights.w.get("hada_t2")
 
     def calc_updown(self, orig_weight):
-        w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
-        w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
-        w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
-        w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+        w1a = self.w1a.to(orig_weight.device)
+        w1b = self.w1b.to(orig_weight.device)
+        w2a = self.w2a.to(orig_weight.device)
+        w2b = self.w2b.to(orig_weight.device)
 
         output_shape = [w1a.size(0), w1b.size(1)]
 
         if self.t1 is not None:
             output_shape = [w1a.size(1), w1b.size(1)]
-            t1 = self.t1.to(orig_weight.device, dtype=orig_weight.dtype)
+            t1 = self.t1.to(orig_weight.device)
             updown1 = lyco_helpers.make_weight_cp(t1, w1a, w1b)
             output_shape += t1.shape[2:]
         else:
@@ -45,7 +45,7 @@ def calc_updown(self, orig_weight):
             updown1 = lyco_helpers.rebuild_conventional(w1a, w1b, output_shape)
 
         if self.t2 is not None:
-            t2 = self.t2.to(orig_weight.device, dtype=orig_weight.dtype)
+            t2 = self.t2.to(orig_weight.device)
             updown2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)
         else:
             updown2 = lyco_helpers.rebuild_conventional(w2a, w2b, output_shape)

diff --git a/extensions-builtin/Lora/network_ia3.py b/extensions-builtin/Lora/network_ia3.py
@@ -17,7 +17,7 @@ def __init__(self,  net: network.Network, weights: network.NetworkWeights):
         self.on_input = weights.w["on_input"].item()
 
     def calc_updown(self, orig_weight):
-        w = self.w.to(orig_weight.device, dtype=orig_weight.dtype)
+        w = self.w.to(orig_weight.device)
 
         output_shape = [w.size(0), orig_weight.size(1)]
         if self.on_input:

diff --git a/extensions-builtin/Lora/network_lokr.py b/extensions-builtin/Lora/network_lokr.py
@@ -37,22 +37,22 @@ def __init__(self,  net: network.Network, weights: network.NetworkWeights):
 
     def calc_updown(self, orig_weight):
         if self.w1 is not None:
-            w1 = self.w1.to(orig_weight.device, dtype=orig_weight.dtype)
+            w1 = self.w1.to(orig_weight.device)
         else:
-            w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
-            w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
+            w1a = self.w1a.to(orig_weight.device)
+            w1b = self.w1b.to(orig_weight.device)
             w1 = w1a @ w1b
 
         if self.w2 is not None:
-            w2 = self.w2.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2 = self.w2.to(orig_weight.device)
         elif self.t2 is None:
-            w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
-            w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2a = self.w2a.to(orig_weight.device)
+            w2b = self.w2b.to(orig_weight.device)
             w2 = w2a @ w2b
         else:
-            t2 = self.t2.to(orig_weight.device, dtype=orig_weight.dtype)
-            w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
-            w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+            t2 = self.t2.to(orig_weight.device)
+            w2a = self.w2a.to(orig_weight.device)
+            w2b = self.w2b.to(orig_weight.device)
             w2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)
 
         output_shape = [w1.size(0) * w2.size(0), w1.size(1) * w2.size(1)]