From 5dfe2589926594c5f64d51327dce073090d81e32 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 21 Mar 2024 18:17:53 +1100 Subject: [PATCH 01/11] feat(nodes): add missing detect_resolution to processors Some processors, like Canny, didn't use `detect_resolution`. The resultant control images were then resized by the processors from 512x512 to the desired dimensions. The result is that the control images are the right size, but very low quality. Using detect_resolution fixes this. --- .../controlnet_image_processors.py | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index 8db1f098665..ed563448476 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -171,11 +171,12 @@ def invoke(self, context: InvocationContext) -> ImageOutput: title="Canny Processor", tags=["controlnet", "canny"], category="controlnet", - version="1.3.1", + version="1.3.2", ) class CannyImageProcessorInvocation(ImageProcessorInvocation): """Canny edge detection for ControlNet""" + detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res) image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res) low_threshold: int = InputField( default=100, ge=0, le=255, description="The low threshold of the Canny pixel gradient (0-255)" @@ -195,6 +196,7 @@ def run_processor(self, image): self.low_threshold, self.high_threshold, image_resolution=self.image_resolution, + detect_resolution=self.detect_resolution, ) return processed_image @@ -278,13 +280,14 @@ def run_processor(self, image): title="Midas Depth Processor", tags=["controlnet", "midas"], category="controlnet", - version="1.2.2", + version="1.2.3", ) class MidasDepthImageProcessorInvocation(ImageProcessorInvocation): """Applies Midas depth processing to image""" a_mult: float = InputField(default=2.0, ge=0, description="Midas parameter `a_mult` (a = a_mult * PI)") bg_th: float = InputField(default=0.1, ge=0, description="Midas parameter `bg_th`") + detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res) image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res) # depth_and_normal not supported in controlnet_aux v0.0.3 # depth_and_normal: bool = InputField(default=False, description="whether to use depth and normal mode") @@ -296,6 +299,7 @@ def run_processor(self, image): a=np.pi * self.a_mult, bg_th=self.bg_th, image_resolution=self.image_resolution, + detect_resolution=self.detect_resolution, # dept_and_normal not supported in controlnet_aux v0.0.3 # depth_and_normal=self.depth_and_normal, ) @@ -420,19 +424,24 @@ def run_processor(self, image): title="Mediapipe Face Processor", tags=["controlnet", "mediapipe", "face"], category="controlnet", - version="1.2.2", + version="1.2.3", ) class MediapipeFaceProcessorInvocation(ImageProcessorInvocation): """Applies mediapipe face processing to image""" max_faces: int = InputField(default=1, ge=1, description="Maximum number of faces to detect") min_confidence: float = InputField(default=0.5, ge=0, le=1, description="Minimum confidence for face detection") + detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res) image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res) def run_processor(self, image): mediapipe_face_processor = MediapipeFaceDetector() processed_image = mediapipe_face_processor( - image, max_faces=self.max_faces, min_confidence=self.min_confidence, image_resolution=self.image_resolution + image, + max_faces=self.max_faces, + min_confidence=self.min_confidence, + image_resolution=self.image_resolution, + detect_resolution=self.detect_resolution, ) return processed_image @@ -511,11 +520,12 @@ def run_processor(self, img): title="Segment Anything Processor", tags=["controlnet", "segmentanything"], category="controlnet", - version="1.2.2", + version="1.2.3", ) class SegmentAnythingProcessorInvocation(ImageProcessorInvocation): """Applies segment anything processing to image""" + detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res) image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res) def run_processor(self, image): @@ -524,7 +534,9 @@ def run_processor(self, image): "ybelkada/segment-anything", subfolder="checkpoints" ) np_img = np.array(image, dtype=np.uint8) - processed_image = segment_anything_processor(np_img, image_resolution=self.image_resolution) + processed_image = segment_anything_processor( + np_img, image_resolution=self.image_resolution, detect_resolution=self.detect_resolution + ) return processed_image From cec027a5c14d0c6b459342105a4fdf7f7b3db076 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 21 Mar 2024 18:57:33 +1100 Subject: [PATCH 02/11] feat: add image utils These all support controlnet processors. - `pil_to_cv2` - `cv2_to_pil` - `pil_to_np` - `np_to_pil` - `normalize_image_channel_count` (a readable version of `HWC3` from the controlnet repo) - `fit_image_to_resolution` (a readable version of `resize_image` from the controlnet repo) - `non_maximum_suppression` (a readable version of `nms` from the controlnet repo) - `safe_step` (a readable version of `safe_step` from the controlnet repo) --- invokeai/backend/image_util/util.py | 133 ++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/invokeai/backend/image_util/util.py b/invokeai/backend/image_util/util.py index 5b8be7f1180..067adfa15e3 100644 --- a/invokeai/backend/image_util/util.py +++ b/invokeai/backend/image_util/util.py @@ -1,5 +1,7 @@ from math import ceil, floor, sqrt +import cv2 +import numpy as np from PIL import Image @@ -69,3 +71,134 @@ def make_grid(image_list, rows=None, cols=None): i = i + 1 return grid_img + + +def pil_to_np(image: Image.Image) -> np.ndarray: + """Converts a PIL image to a numpy array.""" + return np.array(image, dtype=np.uint8) + + +def np_to_pil(image: np.ndarray) -> Image.Image: + """Converts a numpy array to a PIL image.""" + return Image.fromarray(image) + + +def pil_to_cv2(image: Image.Image) -> np.ndarray: + """Converts a PIL image to a CV2 image.""" + return cv2.cvtColor(np.array(image, dtype=np.uint8), cv2.COLOR_RGB2BGR) + + +def cv2_to_pil(image: np.ndarray) -> Image.Image: + """Converts a CV2 image to a PIL image.""" + return Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) + + +def normalize_image_channel_count(image: np.ndarray) -> np.ndarray: + """Normalizes an image to have 3 channels. + + If the image has 1 channel, it will be duplicated 3 times. + If the image has 1 channel, a third empty channel will be added. + If the image has 4 channels, the alpha channel will be used to blend the image with a white background. + + This function is adapted from https://github.com/lllyasviel/ControlNet. + + Args: + image: The input image. + + Returns: + The normalized image. + """ + assert image.dtype == np.uint8 + if image.ndim == 2: + image = image[:, :, None] + assert image.ndim == 3 + _height, _width, channels = image.shape + assert channels == 1 or channels == 3 or channels == 4 + if channels == 3: + return image + if channels == 1: + return np.concatenate([image, image, image], axis=2) + if channels == 4: + color = image[:, :, 0:3].astype(np.float32) + alpha = image[:, :, 3:4].astype(np.float32) / 255.0 + normalized = color * alpha + 255.0 * (1.0 - alpha) + normalized = normalized.clip(0, 255).astype(np.uint8) + return normalized + + raise ValueError("Invalid number of channels.") + + +def fit_image_to_resolution(input_image: np.ndarray, resolution: int) -> np.ndarray: + """Resizes an image, fitting it to the given resolution. + + This function is adapted from https://github.com/lllyasviel/ControlNet. + + Args: + input_image: The input image. + resolution: The resolution to fit the image to. + + Returns: + The resized image. + """ + h = float(input_image.shape[0]) + w = float(input_image.shape[1]) + scaling_factor = float(resolution) / min(h, w) + h *= scaling_factor + w *= scaling_factor + h = int(np.round(h / 64.0)) * 64 + w = int(np.round(w / 64.0)) * 64 + if scaling_factor > 1: + return cv2.resize(input_image, (w, h), interpolation=cv2.INTER_LANCZOS4) + else: + return cv2.resize(input_image, (w, h), interpolation=cv2.INTER_AREA) + + +def non_maximum_suppression(image: np.ndarray, threshold: int, sigma: float): + """ + Apply non-maximum suppression to an image. + + This function is adapted from https://github.com/lllyasviel/ControlNet. + + Args: + image: The input image. + threshold: The threshold value for the suppression. Pixels with values greater than this will be set to 255. + sigma: The standard deviation for the Gaussian blur applied to the image. + + Returns: + The image after non-maximum suppression. + """ + + image = cv2.GaussianBlur(image.astype(np.float32), (0, 0), sigma) + + filter_1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8) + filter_2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8) + filter_3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8) + filter_4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8) + + y = np.zeros_like(image) + + for f in [filter_1, filter_2, filter_3, filter_4]: + np.putmask(y, cv2.dilate(image, kernel=f) == image, image) + + z = np.zeros_like(y, dtype=np.uint8) + z[y > threshold] = 255 + return z + + +def safe_step(x: np.ndarray, step: int = 2) -> np.ndarray: + """Apply the safe step operation to an array. + + I don't fully understand the purpose of this function, but it appears to be normalizing/quantizing the array. + + This function is adapted from https://github.com/lllyasviel/ControlNet. + + Args: + x: The input array. + step: The step value. + + Returns: + The array after the safe step operation. + """ + y = x.astype(np.float32) * float(step + 1) + y = y.astype(np.int32).astype(np.float32) / float(step) + return y From de6807ad42ce8bfdc430b1fcf8add39c707a22e3 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 21 Mar 2024 18:58:05 +1100 Subject: [PATCH 03/11] feat: adaptation of Canny processor Adapted from controlnet processors package fix: do final resize in canny processor canny --- .../controlnet_image_processors.py | 9 ++-- invokeai/backend/image_util/canny.py | 41 +++++++++++++++++++ 2 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 invokeai/backend/image_util/canny.py diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index ed563448476..547b335a707 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -7,7 +7,6 @@ import cv2 import numpy as np from controlnet_aux import ( - CannyDetector, ContentShuffleDetector, HEDdetector, LeresDetector, @@ -39,6 +38,7 @@ from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.invocations.util import validate_begin_end_step, validate_weights from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.image_util.canny import get_canny_edges from invokeai.backend.image_util.depth_anything import DepthAnythingDetector from invokeai.backend.image_util.dw_openpose import DWOpenposeDetector @@ -189,14 +189,13 @@ def load_image(self, context: InvocationContext) -> Image.Image: # Keep alpha channel for Canny processing to detect edges of transparent areas return context.images.get_pil(self.image.image_name, "RGBA") - def run_processor(self, image): - canny_processor = CannyDetector() - processed_image = canny_processor( + def run_processor(self, image: Image.Image) -> Image.Image: + processed_image = get_canny_edges( image, self.low_threshold, self.high_threshold, - image_resolution=self.image_resolution, detect_resolution=self.detect_resolution, + image_resolution=self.image_resolution, ) return processed_image diff --git a/invokeai/backend/image_util/canny.py b/invokeai/backend/image_util/canny.py new file mode 100644 index 00000000000..e0067bd7fde --- /dev/null +++ b/invokeai/backend/image_util/canny.py @@ -0,0 +1,41 @@ +import cv2 +from PIL import Image + +from invokeai.backend.image_util.util import ( + cv2_to_pil, + fit_image_to_resolution, + normalize_image_channel_count, + pil_to_cv2, +) + + +def get_canny_edges( + image: Image.Image, low_threshold: int, high_threshold: int, detect_resolution: int, image_resolution: int +) -> Image.Image: + """Returns the edges of an image using the Canny edge detection algorithm. + + This function is adapted from https://github.com/lllyasviel/ControlNet. + + Args: + image: The input image. + low_threshold: The lower threshold for the hysteresis procedure. + high_threshold: The upper threshold for the hysteresis procedure. + input_resolution: The resolution of the input image. The image will be resized to this resolution before edge detection. + output_resolution: The resolution of the output image. The edges will be resized to this resolution before returning. + + Returns: + The Canny edges of the input image. + """ + + if image.mode != "RGB": + image = image.convert("RGB") + + np_image = pil_to_cv2(image) + np_image = normalize_image_channel_count(np_image) + np_image = fit_image_to_resolution(np_image, detect_resolution) + + edge_map = cv2.Canny(np_image, low_threshold, high_threshold) + edge_map = normalize_image_channel_count(edge_map) + edge_map = fit_image_to_resolution(edge_map, image_resolution) + + return cv2_to_pil(edge_map) From 89cb9468637457baa0097430a28f28981a72e836 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 21 Mar 2024 19:39:42 +1100 Subject: [PATCH 04/11] feat: adaptation of HED processor Adapted from controlnet repo --- .../controlnet_image_processors.py | 8 +- invokeai/backend/image_util/hed.py | 142 ++++++++++++++++++ 2 files changed, 146 insertions(+), 4 deletions(-) create mode 100644 invokeai/backend/image_util/hed.py diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index 547b335a707..e09425d972e 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -8,7 +8,6 @@ import numpy as np from controlnet_aux import ( ContentShuffleDetector, - HEDdetector, LeresDetector, LineartAnimeDetector, LineartDetector, @@ -41,6 +40,7 @@ from invokeai.backend.image_util.canny import get_canny_edges from invokeai.backend.image_util.depth_anything import DepthAnythingDetector from invokeai.backend.image_util.dw_openpose import DWOpenposeDetector +from invokeai.backend.image_util.hed import HEDProcessor from .baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output @@ -216,9 +216,9 @@ class HedImageProcessorInvocation(ImageProcessorInvocation): # safe: bool = InputField(default=False, description=FieldDescriptions.safe_mode) scribble: bool = InputField(default=False, description=FieldDescriptions.scribble_mode) - def run_processor(self, image): - hed_processor = HEDdetector.from_pretrained("lllyasviel/Annotators") - processed_image = hed_processor( + def run_processor(self, image: Image.Image) -> Image.Image: + hed_processor = HEDProcessor() + processed_image = hed_processor.run( image, detect_resolution=self.detect_resolution, image_resolution=self.image_resolution, diff --git a/invokeai/backend/image_util/hed.py b/invokeai/backend/image_util/hed.py new file mode 100644 index 00000000000..36b07d43ed3 --- /dev/null +++ b/invokeai/backend/image_util/hed.py @@ -0,0 +1,142 @@ +"""Adapted from https://github.com/lllyasviel/ControlNet (Apache-2.0 license).""" + +import cv2 +import numpy as np +import torch +from einops import rearrange +from huggingface_hub import hf_hub_download +from PIL import Image + +from invokeai.backend.image_util.util import ( + fit_image_to_resolution, + non_maximum_suppression, + normalize_image_channel_count, + np_to_pil, + pil_to_np, + safe_step, +) + + +class DoubleConvBlock(torch.nn.Module): + def __init__(self, input_channel, output_channel, layer_number): + super().__init__() + self.convs = torch.nn.Sequential() + self.convs.append( + torch.nn.Conv2d( + in_channels=input_channel, out_channels=output_channel, kernel_size=(3, 3), stride=(1, 1), padding=1 + ) + ) + for i in range(1, layer_number): + self.convs.append( + torch.nn.Conv2d( + in_channels=output_channel, + out_channels=output_channel, + kernel_size=(3, 3), + stride=(1, 1), + padding=1, + ) + ) + self.projection = torch.nn.Conv2d( + in_channels=output_channel, out_channels=1, kernel_size=(1, 1), stride=(1, 1), padding=0 + ) + + def __call__(self, x, down_sampling=False): + h = x + if down_sampling: + h = torch.nn.functional.max_pool2d(h, kernel_size=(2, 2), stride=(2, 2)) + for conv in self.convs: + h = conv(h) + h = torch.nn.functional.relu(h) + return h, self.projection(h) + + +class ControlNetHED_Apache2(torch.nn.Module): + def __init__(self): + super().__init__() + self.norm = torch.nn.Parameter(torch.zeros(size=(1, 3, 1, 1))) + self.block1 = DoubleConvBlock(input_channel=3, output_channel=64, layer_number=2) + self.block2 = DoubleConvBlock(input_channel=64, output_channel=128, layer_number=2) + self.block3 = DoubleConvBlock(input_channel=128, output_channel=256, layer_number=3) + self.block4 = DoubleConvBlock(input_channel=256, output_channel=512, layer_number=3) + self.block5 = DoubleConvBlock(input_channel=512, output_channel=512, layer_number=3) + + def __call__(self, x): + h = x - self.norm + h, projection1 = self.block1(h) + h, projection2 = self.block2(h, down_sampling=True) + h, projection3 = self.block3(h, down_sampling=True) + h, projection4 = self.block4(h, down_sampling=True) + h, projection5 = self.block5(h, down_sampling=True) + return projection1, projection2, projection3, projection4, projection5 + + +class HEDProcessor: + """Holistically-Nested Edge Detection. + + On instantiation, loads the HED model from the HuggingFace Hub. + """ + + def __init__(self): + model_path = hf_hub_download("lllyasviel/Annotators", "ControlNetHED.pth") + self.network = ControlNetHED_Apache2() + self.network.load_state_dict(torch.load(model_path, map_location="cpu")) + self.network.float().eval() + + def to(self, device: torch.device): + self.network.to(device) + return self + + def run( + self, + input_image: Image.Image, + detect_resolution: int = 512, + image_resolution: int = 512, + safe: bool = False, + scribble: bool = False, + ) -> Image.Image: + """Processes an image and returns the detected edges. + + Args: + input_image: The input image. + detect_resolution: The resolution to fit the image to before edge detection. + image_resolution: The resolution to fit the edges to before returning. + safe: Whether to apply safe step to the detected edges. + scribble: Whether to apply non-maximum suppression and Gaussian blur to the detected edges. + + Returns: + The detected edges. + """ + device = next(iter(self.network.parameters())).device + np_image = pil_to_np(input_image) + np_image = normalize_image_channel_count(np_image) + np_image = fit_image_to_resolution(np_image, detect_resolution) + + assert np_image.ndim == 3 + height, width, _channels = np_image.shape + with torch.no_grad(): + image_hed = torch.from_numpy(np_image.copy()).float().to(device) + image_hed = rearrange(image_hed, "h w c -> 1 c h w") + edges = self.network(image_hed) + edges = [e.detach().cpu().numpy().astype(np.float32)[0, 0] for e in edges] + edges = [cv2.resize(e, (width, height), interpolation=cv2.INTER_LINEAR) for e in edges] + edges = np.stack(edges, axis=2) + edge = 1 / (1 + np.exp(-np.mean(edges, axis=2).astype(np.float64))) + if safe: + edge = safe_step(edge) + edge = (edge * 255.0).clip(0, 255).astype(np.uint8) + + detected_map = edge + detected_map = normalize_image_channel_count(detected_map) + + img = fit_image_to_resolution(np_image, image_resolution) + height, width, _channels = img.shape + + detected_map = cv2.resize(detected_map, (width, height), interpolation=cv2.INTER_LINEAR) + + if scribble: + detected_map = non_maximum_suppression(detected_map, 127, 3.0) + detected_map = cv2.GaussianBlur(detected_map, (0, 0), 3.0) + detected_map[detected_map > 4] = 255 + detected_map[detected_map < 255] = 0 + + return np_to_pil(detected_map) From bcbf41184e30821bbe1b1ea16db9beb509cdf255 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 21 Mar 2024 19:55:51 +1100 Subject: [PATCH 05/11] feat: adaptation of Lineart processor Adapted from https://github.com/huggingface/controlnet_aux --- .../controlnet_image_processors.py | 8 +- invokeai/backend/image_util/lineart.py | 158 ++++++++++++++++++ 2 files changed, 162 insertions(+), 4 deletions(-) create mode 100644 invokeai/backend/image_util/lineart.py diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index e09425d972e..2d09b54cc88 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -10,7 +10,6 @@ ContentShuffleDetector, LeresDetector, LineartAnimeDetector, - LineartDetector, MediapipeFaceDetector, MidasDetector, MLSDdetector, @@ -41,6 +40,7 @@ from invokeai.backend.image_util.depth_anything import DepthAnythingDetector from invokeai.backend.image_util.dw_openpose import DWOpenposeDetector from invokeai.backend.image_util.hed import HEDProcessor +from invokeai.backend.image_util.lineart import LineartProcessor from .baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output @@ -243,9 +243,9 @@ class LineartImageProcessorInvocation(ImageProcessorInvocation): image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res) coarse: bool = InputField(default=False, description="Whether to use coarse mode") - def run_processor(self, image): - lineart_processor = LineartDetector.from_pretrained("lllyasviel/Annotators") - processed_image = lineart_processor( + def run_processor(self, image: Image.Image) -> Image.Image: + lineart_processor = LineartProcessor() + processed_image = lineart_processor.run( image, detect_resolution=self.detect_resolution, image_resolution=self.image_resolution, coarse=self.coarse ) return processed_image diff --git a/invokeai/backend/image_util/lineart.py b/invokeai/backend/image_util/lineart.py new file mode 100644 index 00000000000..0a17add422d --- /dev/null +++ b/invokeai/backend/image_util/lineart.py @@ -0,0 +1,158 @@ +"""Adapted from https://github.com/huggingface/controlnet_aux (Apache-2.0 license).""" + +import cv2 +import numpy as np +import torch +import torch.nn as nn +from einops import rearrange +from huggingface_hub import hf_hub_download +from PIL import Image + +from invokeai.backend.image_util.util import ( + fit_image_to_resolution, + normalize_image_channel_count, + np_to_pil, + pil_to_np, +) + + +class ResidualBlock(nn.Module): + def __init__(self, in_features): + super(ResidualBlock, self).__init__() + + conv_block = [ + nn.ReflectionPad2d(1), + nn.Conv2d(in_features, in_features, 3), + nn.InstanceNorm2d(in_features), + nn.ReLU(inplace=True), + nn.ReflectionPad2d(1), + nn.Conv2d(in_features, in_features, 3), + nn.InstanceNorm2d(in_features), + ] + + self.conv_block = nn.Sequential(*conv_block) + + def forward(self, x): + return x + self.conv_block(x) + + +class Generator(nn.Module): + def __init__(self, input_nc, output_nc, n_residual_blocks=9, sigmoid=True): + super(Generator, self).__init__() + + # Initial convolution block + model0 = [nn.ReflectionPad2d(3), nn.Conv2d(input_nc, 64, 7), nn.InstanceNorm2d(64), nn.ReLU(inplace=True)] + self.model0 = nn.Sequential(*model0) + + # Downsampling + model1 = [] + in_features = 64 + out_features = in_features * 2 + for _ in range(2): + model1 += [ + nn.Conv2d(in_features, out_features, 3, stride=2, padding=1), + nn.InstanceNorm2d(out_features), + nn.ReLU(inplace=True), + ] + in_features = out_features + out_features = in_features * 2 + self.model1 = nn.Sequential(*model1) + + model2 = [] + # Residual blocks + for _ in range(n_residual_blocks): + model2 += [ResidualBlock(in_features)] + self.model2 = nn.Sequential(*model2) + + # Upsampling + model3 = [] + out_features = in_features // 2 + for _ in range(2): + model3 += [ + nn.ConvTranspose2d(in_features, out_features, 3, stride=2, padding=1, output_padding=1), + nn.InstanceNorm2d(out_features), + nn.ReLU(inplace=True), + ] + in_features = out_features + out_features = in_features // 2 + self.model3 = nn.Sequential(*model3) + + # Output layer + model4 = [nn.ReflectionPad2d(3), nn.Conv2d(64, output_nc, 7)] + if sigmoid: + model4 += [nn.Sigmoid()] + + self.model4 = nn.Sequential(*model4) + + def forward(self, x, cond=None): + out = self.model0(x) + out = self.model1(out) + out = self.model2(out) + out = self.model3(out) + out = self.model4(out) + + return out + + +class LineartProcessor: + """Processor for lineart detection.""" + + def __init__(self): + model_path = hf_hub_download("lllyasviel/Annotators", "sk_model.pth") + self.model = Generator(3, 1, 3) + self.model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu"))) + self.model.eval() + + coarse_model_path = hf_hub_download("lllyasviel/Annotators", "sk_model2.pth") + self.model_coarse = Generator(3, 1, 3) + self.model_coarse.load_state_dict(torch.load(coarse_model_path, map_location=torch.device("cpu"))) + self.model_coarse.eval() + + def to(self, device: torch.device): + self.model.to(device) + self.model_coarse.to(device) + return self + + def run( + self, input_image: Image.Image, coarse: bool = False, detect_resolution: int = 512, image_resolution: int = 512 + ) -> Image.Image: + """Processes an image to detect lineart. + + Args: + input_image: The input image. + coarse: Whether to use the coarse model. + detect_resolution: The resolution to fit the image to before edge detection. + image_resolution: The resolution of the output image. + + Returns: + The detected lineart. + """ + device = next(iter(self.model.parameters())).device + + np_image = pil_to_np(input_image) + np_image = normalize_image_channel_count(np_image) + np_image = fit_image_to_resolution(np_image, detect_resolution) + + model = self.model_coarse if coarse else self.model + assert np_image.ndim == 3 + image = np_image + with torch.no_grad(): + image = torch.from_numpy(image).float().to(device) + image = image / 255.0 + image = rearrange(image, "h w c -> 1 c h w") + line = model(image)[0][0] + + line = line.cpu().numpy() + line = (line * 255.0).clip(0, 255).astype(np.uint8) + + detected_map = line + + detected_map = normalize_image_channel_count(detected_map) + + img = fit_image_to_resolution(np_image, image_resolution) + H, W, C = img.shape + + detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) + detected_map = 255 - detected_map + + return np_to_pil(detected_map) From 675451b548386af2e1e181859553a54a9af5b535 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 21 Mar 2024 20:06:14 +1100 Subject: [PATCH 06/11] feat: adaptation of Lineart Anime processor Adapted from https://github.com/huggingface/controlnet_aux --- .../controlnet_image_processors.py | 8 +- invokeai/backend/image_util/lineart_anime.py | 203 ++++++++++++++++++ 2 files changed, 207 insertions(+), 4 deletions(-) create mode 100644 invokeai/backend/image_util/lineart_anime.py diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index 2d09b54cc88..a49c910eeb1 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -9,7 +9,6 @@ from controlnet_aux import ( ContentShuffleDetector, LeresDetector, - LineartAnimeDetector, MediapipeFaceDetector, MidasDetector, MLSDdetector, @@ -41,6 +40,7 @@ from invokeai.backend.image_util.dw_openpose import DWOpenposeDetector from invokeai.backend.image_util.hed import HEDProcessor from invokeai.backend.image_util.lineart import LineartProcessor +from invokeai.backend.image_util.lineart_anime import LineartAnimeProcessor from .baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output @@ -264,9 +264,9 @@ class LineartAnimeImageProcessorInvocation(ImageProcessorInvocation): detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res) image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res) - def run_processor(self, image): - processor = LineartAnimeDetector.from_pretrained("lllyasviel/Annotators") - processed_image = processor( + def run_processor(self, image: Image.Image) -> Image.Image: + processor = LineartAnimeProcessor() + processed_image = processor.run( image, detect_resolution=self.detect_resolution, image_resolution=self.image_resolution, diff --git a/invokeai/backend/image_util/lineart_anime.py b/invokeai/backend/image_util/lineart_anime.py new file mode 100644 index 00000000000..f547aac0d0a --- /dev/null +++ b/invokeai/backend/image_util/lineart_anime.py @@ -0,0 +1,203 @@ +"""Adapted from https://github.com/huggingface/controlnet_aux (Apache-2.0 license).""" + +import functools +from typing import Optional + +import cv2 +import numpy as np +import torch +import torch.nn as nn +from einops import rearrange +from huggingface_hub import hf_hub_download +from PIL import Image + +from invokeai.backend.image_util.util import ( + fit_image_to_resolution, + normalize_image_channel_count, + np_to_pil, + pil_to_np, +) + + +class UnetGenerator(nn.Module): + """Create a Unet-based generator""" + + def __init__( + self, + input_nc: int, + output_nc: int, + num_downs: int, + ngf: int = 64, + norm_layer=nn.BatchNorm2d, + use_dropout: bool = False, + ): + """Construct a Unet generator + Parameters: + input_nc (int) -- the number of channels in input images + output_nc (int) -- the number of channels in output images + num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7, + image of size 128x128 will become of size 1x1 # at the bottleneck + ngf (int) -- the number of filters in the last conv layer + norm_layer -- normalization layer + We construct the U-Net from the innermost layer to the outermost layer. + It is a recursive process. + """ + super(UnetGenerator, self).__init__() + # construct unet structure + unet_block = UnetSkipConnectionBlock( + ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True + ) # add the innermost layer + for _ in range(num_downs - 5): # add intermediate layers with ngf * 8 filters + unet_block = UnetSkipConnectionBlock( + ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout + ) + # gradually reduce the number of filters from ngf * 8 to ngf + unet_block = UnetSkipConnectionBlock( + ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer + ) + unet_block = UnetSkipConnectionBlock( + ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer + ) + unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer) + self.model = UnetSkipConnectionBlock( + output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer + ) # add the outermost layer + + def forward(self, input): + """Standard forward""" + return self.model(input) + + +class UnetSkipConnectionBlock(nn.Module): + """Defines the Unet submodule with skip connection. + X -------------------identity---------------------- + |-- downsampling -- |submodule| -- upsampling --| + """ + + def __init__( + self, + outer_nc: int, + inner_nc: int, + input_nc: Optional[int] = None, + submodule=None, + outermost: bool = False, + innermost: bool = False, + norm_layer=nn.BatchNorm2d, + use_dropout: bool = False, + ): + """Construct a Unet submodule with skip connections. + Parameters: + outer_nc (int) -- the number of filters in the outer conv layer + inner_nc (int) -- the number of filters in the inner conv layer + input_nc (int) -- the number of channels in input images/features + submodule (UnetSkipConnectionBlock) -- previously defined submodules + outermost (bool) -- if this module is the outermost module + innermost (bool) -- if this module is the innermost module + norm_layer -- normalization layer + use_dropout (bool) -- if use dropout layers. + """ + super(UnetSkipConnectionBlock, self).__init__() + self.outermost = outermost + if type(norm_layer) == functools.partial: + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + if input_nc is None: + input_nc = outer_nc + downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4, stride=2, padding=1, bias=use_bias) + downrelu = nn.LeakyReLU(0.2, True) + downnorm = norm_layer(inner_nc) + uprelu = nn.ReLU(True) + upnorm = norm_layer(outer_nc) + + if outermost: + upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1) + down = [downconv] + up = [uprelu, upconv, nn.Tanh()] + model = down + [submodule] + up + elif innermost: + upconv = nn.ConvTranspose2d(inner_nc, outer_nc, kernel_size=4, stride=2, padding=1, bias=use_bias) + down = [downrelu, downconv] + up = [uprelu, upconv, upnorm] + model = down + up + else: + upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1, bias=use_bias) + down = [downrelu, downconv, downnorm] + up = [uprelu, upconv, upnorm] + + if use_dropout: + model = down + [submodule] + up + [nn.Dropout(0.5)] + else: + model = down + [submodule] + up + + self.model = nn.Sequential(*model) + + def forward(self, x): + if self.outermost: + return self.model(x) + else: # add skip connections + return torch.cat([x, self.model(x)], 1) + + +class LineartAnimeProcessor: + """Processes an image to detect lineart.""" + + def __init__(self): + model_path = hf_hub_download("lllyasviel/Annotators", "netG.pth") + norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False) + self.model = UnetGenerator(3, 1, 8, 64, norm_layer=norm_layer, use_dropout=False) + ckpt = torch.load(model_path) + for key in list(ckpt.keys()): + if "module." in key: + ckpt[key.replace("module.", "")] = ckpt[key] + del ckpt[key] + self.model.load_state_dict(ckpt) + self.model.eval() + + def to(self, device: torch.device): + self.model.to(device) + return self + + def run(self, input_image: Image.Image, detect_resolution: int = 512, image_resolution: int = 512) -> Image.Image: + """Processes an image to detect lineart. + + Args: + input_image: The input image. + detect_resolution: The resolution to use for detection. + image_resolution: The resolution to use for the output image. + + Returns: + The detected lineart. + """ + device = next(iter(self.model.parameters())).device + np_image = pil_to_np(input_image) + + np_image = normalize_image_channel_count(np_image) + np_image = fit_image_to_resolution(np_image, detect_resolution) + + H, W, C = np_image.shape + Hn = 256 * int(np.ceil(float(H) / 256.0)) + Wn = 256 * int(np.ceil(float(W) / 256.0)) + img = cv2.resize(np_image, (Wn, Hn), interpolation=cv2.INTER_CUBIC) + with torch.no_grad(): + image_feed = torch.from_numpy(img).float().to(device) + image_feed = image_feed / 127.5 - 1.0 + image_feed = rearrange(image_feed, "h w c -> 1 c h w") + + line = self.model(image_feed)[0, 0] * 127.5 + 127.5 + line = line.cpu().numpy() + + line = cv2.resize(line, (W, H), interpolation=cv2.INTER_CUBIC) + line = line.clip(0, 255).astype(np.uint8) + + detected_map = line + + detected_map = normalize_image_channel_count(detected_map) + + img = fit_image_to_resolution(np_image, image_resolution) + H, W, C = img.shape + + detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) + detected_map = 255 - detected_map + + return np_to_pil(detected_map) From 3c5a06cf967ad3e072975d4eb01bcf3af1cd3d9a Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 21 Mar 2024 21:15:25 +1100 Subject: [PATCH 07/11] tidy: correct attributions for controlnet processors --- invokeai/backend/image_util/canny.py | 2 +- invokeai/backend/image_util/hed.py | 2 +- invokeai/backend/image_util/util.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/invokeai/backend/image_util/canny.py b/invokeai/backend/image_util/canny.py index e0067bd7fde..87219a9356a 100644 --- a/invokeai/backend/image_util/canny.py +++ b/invokeai/backend/image_util/canny.py @@ -14,7 +14,7 @@ def get_canny_edges( ) -> Image.Image: """Returns the edges of an image using the Canny edge detection algorithm. - This function is adapted from https://github.com/lllyasviel/ControlNet. + Adapted from https://github.com/huggingface/controlnet_aux (Apache-2.0 license). Args: image: The input image. diff --git a/invokeai/backend/image_util/hed.py b/invokeai/backend/image_util/hed.py index 36b07d43ed3..38ad424b484 100644 --- a/invokeai/backend/image_util/hed.py +++ b/invokeai/backend/image_util/hed.py @@ -1,4 +1,4 @@ -"""Adapted from https://github.com/lllyasviel/ControlNet (Apache-2.0 license).""" +"""Adapted from https://github.com/huggingface/controlnet_aux (Apache-2.0 license).""" import cv2 import numpy as np diff --git a/invokeai/backend/image_util/util.py b/invokeai/backend/image_util/util.py index 067adfa15e3..6a403efe55e 100644 --- a/invokeai/backend/image_util/util.py +++ b/invokeai/backend/image_util/util.py @@ -100,7 +100,7 @@ def normalize_image_channel_count(image: np.ndarray) -> np.ndarray: If the image has 1 channel, a third empty channel will be added. If the image has 4 channels, the alpha channel will be used to blend the image with a white background. - This function is adapted from https://github.com/lllyasviel/ControlNet. + Adapted from https://github.com/huggingface/controlnet_aux (Apache-2.0 license). Args: image: The input image. @@ -131,7 +131,7 @@ def normalize_image_channel_count(image: np.ndarray) -> np.ndarray: def fit_image_to_resolution(input_image: np.ndarray, resolution: int) -> np.ndarray: """Resizes an image, fitting it to the given resolution. - This function is adapted from https://github.com/lllyasviel/ControlNet. + Adapted from https://github.com/huggingface/controlnet_aux (Apache-2.0 license). Args: input_image: The input image. @@ -190,7 +190,7 @@ def safe_step(x: np.ndarray, step: int = 2) -> np.ndarray: I don't fully understand the purpose of this function, but it appears to be normalizing/quantizing the array. - This function is adapted from https://github.com/lllyasviel/ControlNet. + Adapted from https://github.com/huggingface/controlnet_aux (Apache-2.0 license). Args: x: The input array. From 3c4899b93eb905ceedef991e67bdc39bbe390d68 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 21 Mar 2024 21:45:51 +1100 Subject: [PATCH 08/11] chore(ui): typegen --- .../frontend/web/src/services/api/schema.ts | 28 +++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index 956e6e5bb1c..0649dfd1695 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -1682,6 +1682,12 @@ export type components = { use_cache?: boolean; /** @description The image to process */ image?: components["schemas"]["ImageField"]; + /** + * Detect Resolution + * @description Pixel resolution for detection + * @default 512 + */ + detect_resolution?: number; /** * Image Resolution * @description Pixel resolution for output image @@ -4130,7 +4136,7 @@ export type components = { * @description The nodes in this graph */ nodes: { - [key: string]: components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["LatentsToImageInvocation"]; + [key: string]: components["schemas"]["RoundInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["ImagePasteInvocation"]; }; /** * Edges @@ -4167,7 +4173,7 @@ export type components = { * @description The results of node executions */ results: { - [key: string]: components["schemas"]["FaceMaskOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["String2Output"] | components["schemas"]["IntegerOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["LoRALoaderOutput"]; + [key: string]: components["schemas"]["IntegerOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["String2Output"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["IterateInvocationOutput"]; }; /** * Errors @@ -7044,6 +7050,12 @@ export type components = { * @default 0.5 */ min_confidence?: number; + /** + * Detect Resolution + * @description Pixel resolution for detection + * @default 512 + */ + detect_resolution?: number; /** * Image Resolution * @description Pixel resolution for output image @@ -7299,6 +7311,12 @@ export type components = { * @default 0.1 */ bg_th?: number; + /** + * Detect Resolution + * @description Pixel resolution for detection + * @default 512 + */ + detect_resolution?: number; /** * Image Resolution * @description Pixel resolution for output image @@ -8992,6 +9010,12 @@ export type components = { use_cache?: boolean; /** @description The image to process */ image?: components["schemas"]["ImageField"]; + /** + * Detect Resolution + * @description Pixel resolution for detection + * @default 512 + */ + detect_resolution?: number; /** * Image Resolution * @description Pixel resolution for output image From 4767023572fd5b672a642ae60dfb5f1748d0d3c6 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 21 Mar 2024 21:46:06 +1100 Subject: [PATCH 09/11] feat(ui): add missing detect_resolution to processors --- .../components/processors/CannyProcessor.tsx | 27 ++++++++++++++++++- .../processors/MediapipeFaceProcessor.tsx | 27 ++++++++++++++++++- .../processors/MidasDepthProcessor.tsx | 27 ++++++++++++++++++- .../controlAdapters/store/constants.ts | 3 +++ .../features/controlAdapters/store/types.ts | 6 ++--- 5 files changed, 84 insertions(+), 6 deletions(-) diff --git a/invokeai/frontend/web/src/features/controlAdapters/components/processors/CannyProcessor.tsx b/invokeai/frontend/web/src/features/controlAdapters/components/processors/CannyProcessor.tsx index 427a23963e6..0d547b9490b 100644 --- a/invokeai/frontend/web/src/features/controlAdapters/components/processors/CannyProcessor.tsx +++ b/invokeai/frontend/web/src/features/controlAdapters/components/processors/CannyProcessor.tsx @@ -15,7 +15,7 @@ type CannyProcessorProps = { const CannyProcessor = (props: CannyProcessorProps) => { const { controlNetId, processorNode, isEnabled } = props; - const { low_threshold, high_threshold, image_resolution } = processorNode; + const { low_threshold, high_threshold, image_resolution, detect_resolution } = processorNode; const processorChanged = useProcessorNodeChanged(); const { t } = useTranslation(); const defaults = useGetDefaultForControlnetProcessor( @@ -43,6 +43,13 @@ const CannyProcessor = (props: CannyProcessorProps) => { [controlNetId, processorChanged] ); + const handleDetectResolutionChanged = useCallback( + (v: number) => { + processorChanged(controlNetId, { detect_resolution: v }); + }, + [controlNetId, processorChanged] + ); + return ( @@ -97,6 +104,24 @@ const CannyProcessor = (props: CannyProcessorProps) => { max={4096} /> + + {t('controlnet.detectResolution')} + + + ); }; diff --git a/invokeai/frontend/web/src/features/controlAdapters/components/processors/MediapipeFaceProcessor.tsx b/invokeai/frontend/web/src/features/controlAdapters/components/processors/MediapipeFaceProcessor.tsx index 3cf0758504a..de35d628d7c 100644 --- a/invokeai/frontend/web/src/features/controlAdapters/components/processors/MediapipeFaceProcessor.tsx +++ b/invokeai/frontend/web/src/features/controlAdapters/components/processors/MediapipeFaceProcessor.tsx @@ -15,7 +15,7 @@ type Props = { const MediapipeFaceProcessor = (props: Props) => { const { controlNetId, processorNode, isEnabled } = props; - const { max_faces, min_confidence, image_resolution } = processorNode; + const { max_faces, min_confidence, image_resolution, detect_resolution } = processorNode; const processorChanged = useProcessorNodeChanged(); const { t } = useTranslation(); @@ -44,6 +44,13 @@ const MediapipeFaceProcessor = (props: Props) => { [controlNetId, processorChanged] ); + const handleDetectResolutionChanged = useCallback( + (v: number) => { + processorChanged(controlNetId, { detect_resolution: v }); + }, + [controlNetId, processorChanged] + ); + return ( @@ -102,6 +109,24 @@ const MediapipeFaceProcessor = (props: Props) => { max={4096} /> + + {t('controlnet.detectResolution')} + + + ); }; diff --git a/invokeai/frontend/web/src/features/controlAdapters/components/processors/MidasDepthProcessor.tsx b/invokeai/frontend/web/src/features/controlAdapters/components/processors/MidasDepthProcessor.tsx index c60baea0d90..f4089ed48f3 100644 --- a/invokeai/frontend/web/src/features/controlAdapters/components/processors/MidasDepthProcessor.tsx +++ b/invokeai/frontend/web/src/features/controlAdapters/components/processors/MidasDepthProcessor.tsx @@ -15,7 +15,7 @@ type Props = { const MidasDepthProcessor = (props: Props) => { const { controlNetId, processorNode, isEnabled } = props; - const { a_mult, bg_th, image_resolution } = processorNode; + const { a_mult, bg_th, image_resolution, detect_resolution } = processorNode; const processorChanged = useProcessorNodeChanged(); const { t } = useTranslation(); @@ -44,6 +44,13 @@ const MidasDepthProcessor = (props: Props) => { [controlNetId, processorChanged] ); + const handleDetectResolutionChanged = useCallback( + (v: number) => { + processorChanged(controlNetId, { detect_resolution: v }); + }, + [controlNetId, processorChanged] + ); + return ( @@ -104,6 +111,24 @@ const MidasDepthProcessor = (props: Props) => { max={4096} /> + + {t('controlnet.detectResolution')} + + + ); }; diff --git a/invokeai/frontend/web/src/features/controlAdapters/store/constants.ts b/invokeai/frontend/web/src/features/controlAdapters/store/constants.ts index a4f4eb74181..152e977e5c6 100644 --- a/invokeai/frontend/web/src/features/controlAdapters/store/constants.ts +++ b/invokeai/frontend/web/src/features/controlAdapters/store/constants.ts @@ -48,6 +48,7 @@ export const CONTROLNET_PROCESSORS: ControlNetProcessorsDict = { low_threshold: 100, high_threshold: 200, image_resolution: baseModel === 'sdxl' ? 1024 : 512, + detect_resolution: baseModel === 'sdxl' ? 1024 : 512, }), }, color_map_image_processor: { @@ -158,6 +159,7 @@ export const CONTROLNET_PROCESSORS: ControlNetProcessorsDict = { max_faces: 1, min_confidence: 0.5, image_resolution: baseModel === 'sdxl' ? 1024 : 512, + detect_resolution: baseModel === 'sdxl' ? 1024 : 512, }), }, midas_depth_image_processor: { @@ -174,6 +176,7 @@ export const CONTROLNET_PROCESSORS: ControlNetProcessorsDict = { a_mult: 2, bg_th: 0.1, image_resolution: baseModel === 'sdxl' ? 1024 : 512, + detect_resolution: baseModel === 'sdxl' ? 1024 : 512, }), }, mlsd_image_processor: { diff --git a/invokeai/frontend/web/src/features/controlAdapters/store/types.ts b/invokeai/frontend/web/src/features/controlAdapters/store/types.ts index fb52b2a451b..93d4915cdf1 100644 --- a/invokeai/frontend/web/src/features/controlAdapters/store/types.ts +++ b/invokeai/frontend/web/src/features/controlAdapters/store/types.ts @@ -72,7 +72,7 @@ export const isControlAdapterProcessorType = (v: unknown): v is ControlAdapterPr */ export type RequiredCannyImageProcessorInvocation = O.Required< CannyImageProcessorInvocation, - 'type' | 'low_threshold' | 'high_threshold' | 'image_resolution' + 'type' | 'low_threshold' | 'high_threshold' | 'image_resolution' | 'detect_resolution' >; /** @@ -133,7 +133,7 @@ export type RequiredLineartImageProcessorInvocation = O.Required< */ export type RequiredMediapipeFaceProcessorInvocation = O.Required< MediapipeFaceProcessorInvocation, - 'type' | 'max_faces' | 'min_confidence' | 'image_resolution' + 'type' | 'max_faces' | 'min_confidence' | 'image_resolution' | 'detect_resolution' >; /** @@ -141,7 +141,7 @@ export type RequiredMediapipeFaceProcessorInvocation = O.Required< */ export type RequiredMidasDepthImageProcessorInvocation = O.Required< MidasDepthImageProcessorInvocation, - 'type' | 'a_mult' | 'bg_th' | 'image_resolution' + 'type' | 'a_mult' | 'bg_th' | 'image_resolution' | 'detect_resolution' >; /** From 245d47baa559b57e494a79ab55ace4986dce38b0 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 21 Mar 2024 21:50:01 +1100 Subject: [PATCH 10/11] chore: ruff --- invokeai/backend/image_util/hed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/backend/image_util/hed.py b/invokeai/backend/image_util/hed.py index 38ad424b484..80519ce013d 100644 --- a/invokeai/backend/image_util/hed.py +++ b/invokeai/backend/image_util/hed.py @@ -26,7 +26,7 @@ def __init__(self, input_channel, output_channel, layer_number): in_channels=input_channel, out_channels=output_channel, kernel_size=(3, 3), stride=(1, 1), padding=1 ) ) - for i in range(1, layer_number): + for _i in range(1, layer_number): self.convs.append( torch.nn.Conv2d( in_channels=output_channel, From 889ee8132a8c2bce95738139d2bca773b7027f22 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 21 Mar 2024 23:00:29 +1100 Subject: [PATCH 11/11] tidy: "fit_image_to_resolution" -> "resize_image_to_resolution" --- invokeai/backend/image_util/canny.py | 6 +++--- invokeai/backend/image_util/hed.py | 6 +++--- invokeai/backend/image_util/lineart.py | 6 +++--- invokeai/backend/image_util/lineart_anime.py | 6 +++--- invokeai/backend/image_util/util.py | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/invokeai/backend/image_util/canny.py b/invokeai/backend/image_util/canny.py index 87219a9356a..c1628dc1828 100644 --- a/invokeai/backend/image_util/canny.py +++ b/invokeai/backend/image_util/canny.py @@ -3,9 +3,9 @@ from invokeai.backend.image_util.util import ( cv2_to_pil, - fit_image_to_resolution, normalize_image_channel_count, pil_to_cv2, + resize_image_to_resolution, ) @@ -32,10 +32,10 @@ def get_canny_edges( np_image = pil_to_cv2(image) np_image = normalize_image_channel_count(np_image) - np_image = fit_image_to_resolution(np_image, detect_resolution) + np_image = resize_image_to_resolution(np_image, detect_resolution) edge_map = cv2.Canny(np_image, low_threshold, high_threshold) edge_map = normalize_image_channel_count(edge_map) - edge_map = fit_image_to_resolution(edge_map, image_resolution) + edge_map = resize_image_to_resolution(edge_map, image_resolution) return cv2_to_pil(edge_map) diff --git a/invokeai/backend/image_util/hed.py b/invokeai/backend/image_util/hed.py index 80519ce013d..378e3b96e90 100644 --- a/invokeai/backend/image_util/hed.py +++ b/invokeai/backend/image_util/hed.py @@ -8,11 +8,11 @@ from PIL import Image from invokeai.backend.image_util.util import ( - fit_image_to_resolution, non_maximum_suppression, normalize_image_channel_count, np_to_pil, pil_to_np, + resize_image_to_resolution, safe_step, ) @@ -109,7 +109,7 @@ def run( device = next(iter(self.network.parameters())).device np_image = pil_to_np(input_image) np_image = normalize_image_channel_count(np_image) - np_image = fit_image_to_resolution(np_image, detect_resolution) + np_image = resize_image_to_resolution(np_image, detect_resolution) assert np_image.ndim == 3 height, width, _channels = np_image.shape @@ -128,7 +128,7 @@ def run( detected_map = edge detected_map = normalize_image_channel_count(detected_map) - img = fit_image_to_resolution(np_image, image_resolution) + img = resize_image_to_resolution(np_image, image_resolution) height, width, _channels = img.shape detected_map = cv2.resize(detected_map, (width, height), interpolation=cv2.INTER_LINEAR) diff --git a/invokeai/backend/image_util/lineart.py b/invokeai/backend/image_util/lineart.py index 0a17add422d..3d19262822e 100644 --- a/invokeai/backend/image_util/lineart.py +++ b/invokeai/backend/image_util/lineart.py @@ -9,10 +9,10 @@ from PIL import Image from invokeai.backend.image_util.util import ( - fit_image_to_resolution, normalize_image_channel_count, np_to_pil, pil_to_np, + resize_image_to_resolution, ) @@ -131,7 +131,7 @@ def run( np_image = pil_to_np(input_image) np_image = normalize_image_channel_count(np_image) - np_image = fit_image_to_resolution(np_image, detect_resolution) + np_image = resize_image_to_resolution(np_image, detect_resolution) model = self.model_coarse if coarse else self.model assert np_image.ndim == 3 @@ -149,7 +149,7 @@ def run( detected_map = normalize_image_channel_count(detected_map) - img = fit_image_to_resolution(np_image, image_resolution) + img = resize_image_to_resolution(np_image, image_resolution) H, W, C = img.shape detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) diff --git a/invokeai/backend/image_util/lineart_anime.py b/invokeai/backend/image_util/lineart_anime.py index f547aac0d0a..5185d92c512 100644 --- a/invokeai/backend/image_util/lineart_anime.py +++ b/invokeai/backend/image_util/lineart_anime.py @@ -12,10 +12,10 @@ from PIL import Image from invokeai.backend.image_util.util import ( - fit_image_to_resolution, normalize_image_channel_count, np_to_pil, pil_to_np, + resize_image_to_resolution, ) @@ -173,7 +173,7 @@ def run(self, input_image: Image.Image, detect_resolution: int = 512, image_reso np_image = pil_to_np(input_image) np_image = normalize_image_channel_count(np_image) - np_image = fit_image_to_resolution(np_image, detect_resolution) + np_image = resize_image_to_resolution(np_image, detect_resolution) H, W, C = np_image.shape Hn = 256 * int(np.ceil(float(H) / 256.0)) @@ -194,7 +194,7 @@ def run(self, input_image: Image.Image, detect_resolution: int = 512, image_reso detected_map = normalize_image_channel_count(detected_map) - img = fit_image_to_resolution(np_image, image_resolution) + img = resize_image_to_resolution(np_image, image_resolution) H, W, C = img.shape detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) diff --git a/invokeai/backend/image_util/util.py b/invokeai/backend/image_util/util.py index 6a403efe55e..7cfe0ad1a5f 100644 --- a/invokeai/backend/image_util/util.py +++ b/invokeai/backend/image_util/util.py @@ -128,7 +128,7 @@ def normalize_image_channel_count(image: np.ndarray) -> np.ndarray: raise ValueError("Invalid number of channels.") -def fit_image_to_resolution(input_image: np.ndarray, resolution: int) -> np.ndarray: +def resize_image_to_resolution(input_image: np.ndarray, resolution: int) -> np.ndarray: """Resizes an image, fitting it to the given resolution. Adapted from https://github.com/huggingface/controlnet_aux (Apache-2.0 license).