diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py
index e302c2b97a0..40a667c9d0d 100644
--- a/invokeai/app/invocations/ip_adapter.py
+++ b/invokeai/app/invocations/ip_adapter.py
@@ -1,21 +1,22 @@
 from builtins import float
-from typing import List, Union
+from typing import List, Literal, Union
 
 from pydantic import BaseModel, Field, field_validator, model_validator
 from typing_extensions import Self
 
-from invokeai.app.invocations.baseinvocation import (
-    BaseInvocation,
-    BaseInvocationOutput,
-    invocation,
-    invocation_output,
-)
+from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
 from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType
 from invokeai.app.invocations.model import ModelIdentifierField
 from invokeai.app.invocations.primitives import ImageField
 from invokeai.app.invocations.util import validate_begin_end_step, validate_weights
 from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.model_manager.config import AnyModelConfig, BaseModelType, IPAdapterConfig, ModelType
+from invokeai.backend.model_manager.config import (
+    AnyModelConfig,
+    BaseModelType,
+    IPAdapterCheckpointConfig,
+    IPAdapterInvokeAIConfig,
+    ModelType,
+)
 
 
 class IPAdapterField(BaseModel):
@@ -48,12 +49,15 @@ class IPAdapterOutput(BaseInvocationOutput):
     ip_adapter: IPAdapterField = OutputField(description=FieldDescriptions.ip_adapter, title="IP-Adapter")
 
 
+CLIP_VISION_MODEL_MAP = {"ViT-H": "ip_adapter_sd_image_encoder", "ViT-G": "ip_adapter_sdxl_image_encoder"}
+
+
 @invocation("ip_adapter", title="IP-Adapter", tags=["ip_adapter", "control"], category="ip_adapter", version="1.2.2")
 class IPAdapterInvocation(BaseInvocation):
     """Collects IP-Adapter info to pass to other nodes."""
 
     # Inputs
-    image: Union[ImageField, List[ImageField]] = InputField(description="The IP-Adapter image prompt(s).")
+    image: Union[ImageField, List[ImageField]] = InputField(description="The IP-Adapter image prompt(s).", ui_order=1)
     ip_adapter_model: ModelIdentifierField = InputField(
         description="The IP-Adapter model.",
         title="IP-Adapter Model",
@@ -61,7 +65,11 @@ class IPAdapterInvocation(BaseInvocation):
         ui_order=-1,
         ui_type=UIType.IPAdapterModel,
     )
-
+    clip_vision_model: Literal["auto", "ViT-H", "ViT-G"] = InputField(
+        description="CLIP Vision model to use. Overrides model settings. Mandatory for checkpoint models.",
+        default="auto",
+        ui_order=2,
+    )
     weight: Union[float, List[float]] = InputField(
         default=1, description="The weight given to the IP-Adapter", title="Weight"
     )
@@ -86,10 +94,21 @@ def validate_begin_end_step_percent(self) -> Self:
     def invoke(self, context: InvocationContext) -> IPAdapterOutput:
         # Lookup the CLIP Vision encoder that is intended to be used with the IP-Adapter model.
         ip_adapter_info = context.models.get_config(self.ip_adapter_model.key)
-        assert isinstance(ip_adapter_info, IPAdapterConfig)
-        image_encoder_model_id = ip_adapter_info.image_encoder_model_id
-        image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
+        assert isinstance(ip_adapter_info, (IPAdapterInvokeAIConfig, IPAdapterCheckpointConfig))
+
+        if self.clip_vision_model == "auto":
+            if isinstance(ip_adapter_info, IPAdapterInvokeAIConfig):
+                image_encoder_model_id = ip_adapter_info.image_encoder_model_id
+                image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
+            else:
+                raise RuntimeError(
+                    "You need to set the appropriate CLIP Vision model for checkpoint IP Adapter models."
+                )
+        else:
+            image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
+
         image_encoder_model = self._get_image_encoder(context, image_encoder_model_name)
+
         return IPAdapterOutput(
             ip_adapter=IPAdapterField(
                 image=self.image,
@@ -102,19 +121,25 @@ def invoke(self, context: InvocationContext) -> IPAdapterOutput:
         )
 
     def _get_image_encoder(self, context: InvocationContext, image_encoder_model_name: str) -> AnyModelConfig:
-        found = False
-        while not found:
+        image_encoder_models = context.models.search_by_attrs(
+            name=image_encoder_model_name, base=BaseModelType.Any, type=ModelType.CLIPVision
+        )
+
+        if not len(image_encoder_models) > 0:
+            context.logger.warning(
+                f"The image encoder required by this IP Adapter ({image_encoder_model_name}) is not installed. \
+                    Downloading and installing now. This may take a while."
+            )
+
+            installer = context._services.model_manager.install
+            job = installer.heuristic_import(f"InvokeAI/{image_encoder_model_name}")
+            installer.wait_for_job(job, timeout=600)  # Wait for up to 10 minutes
             image_encoder_models = context.models.search_by_attrs(
                 name=image_encoder_model_name, base=BaseModelType.Any, type=ModelType.CLIPVision
             )
-            found = len(image_encoder_models) > 0
-            if not found:
-                context.logger.warning(
-                    f"The image encoder required by this IP Adapter ({image_encoder_model_name}) is not installed."
-                )
-                context.logger.warning("Downloading and installing now. This may take a while.")
-                installer = context._services.model_manager.install
-                job = installer.heuristic_import(f"InvokeAI/{image_encoder_model_name}")
-                installer.wait_for_job(job, timeout=600)  # wait up to 10 minutes - then raise a TimeoutException
-        assert len(image_encoder_models) == 1
+
+            if len(image_encoder_models) == 0:
+                context.logger.error("Error while fetching CLIP Vision Image Encoder")
+                assert len(image_encoder_models) == 1
+
         return image_encoder_models[0]
diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py
index bc79efdeba4..3c66b7014f5 100644
--- a/invokeai/app/invocations/latent.py
+++ b/invokeai/app/invocations/latent.py
@@ -43,11 +43,7 @@
     WithMetadata,
 )
 from invokeai.app.invocations.ip_adapter import IPAdapterField
-from invokeai.app.invocations.primitives import (
-    DenoiseMaskOutput,
-    ImageOutput,
-    LatentsOutput,
-)
+from invokeai.app.invocations.primitives import DenoiseMaskOutput, ImageOutput, LatentsOutput
 from invokeai.app.invocations.t2i_adapter import T2IAdapterField
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.controlnet_utils import prepare_control_image
@@ -68,12 +64,7 @@
 )
 from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP
 from ...backend.util.devices import choose_precision, choose_torch_device
-from .baseinvocation import (
-    BaseInvocation,
-    BaseInvocationOutput,
-    invocation,
-    invocation_output,
-)
+from .baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
 from .controlnet_image_processors import ControlField
 from .model import ModelIdentifierField, UNetField, VAEField
 
diff --git a/invokeai/app/invocations/metadata.py b/invokeai/app/invocations/metadata.py
index 6fc72a1c3fc..2da482c8331 100644
--- a/invokeai/app/invocations/metadata.py
+++ b/invokeai/app/invocations/metadata.py
@@ -2,16 +2,8 @@
 
 from pydantic import BaseModel, ConfigDict, Field
 
-from invokeai.app.invocations.baseinvocation import (
-    BaseInvocation,
-    BaseInvocationOutput,
-    invocation,
-    invocation_output,
-)
-from invokeai.app.invocations.controlnet_image_processors import (
-    CONTROLNET_MODE_VALUES,
-    CONTROLNET_RESIZE_VALUES,
-)
+from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
+from invokeai.app.invocations.controlnet_image_processors import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES
 from invokeai.app.invocations.fields import (
     FieldDescriptions,
     ImageField,
@@ -43,6 +35,7 @@ class IPAdapterMetadataField(BaseModel):
 
     image: ImageField = Field(description="The IP-Adapter image prompt.")
     ip_adapter_model: ModelIdentifierField = Field(description="The IP-Adapter model.")
+    clip_vision_model: Literal["ViT-H", "ViT-G"] = Field(description="The CLIP Vision model")
     weight: Union[float, list[float]] = Field(description="The weight given to the IP-Adapter")
     begin_step_percent: float = Field(description="When the IP-Adapter is first applied (% of total steps)")
     end_step_percent: float = Field(description="When the IP-Adapter is last applied (% of total steps)")
diff --git a/invokeai/backend/ip_adapter/ip_adapter.py b/invokeai/backend/ip_adapter/ip_adapter.py
index e51966c779c..f3be0421469 100644
--- a/invokeai/backend/ip_adapter/ip_adapter.py
+++ b/invokeai/backend/ip_adapter/ip_adapter.py
@@ -1,8 +1,11 @@
 # copied from https://github.com/tencent-ailab/IP-Adapter (Apache License 2.0)
 #   and modified as needed
 
-from typing import Optional, Union
+import pathlib
+from typing import List, Optional, TypedDict, Union
 
+import safetensors
+import safetensors.torch
 import torch
 from PIL import Image
 from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
@@ -13,10 +16,17 @@
 from .resampler import Resampler
 
 
+class IPAdapterStateDict(TypedDict):
+    ip_adapter: dict[str, torch.Tensor]
+    image_proj: dict[str, torch.Tensor]
+
+
 class ImageProjModel(torch.nn.Module):
     """Image Projection Model"""
 
-    def __init__(self, cross_attention_dim=1024, clip_embeddings_dim=1024, clip_extra_context_tokens=4):
+    def __init__(
+        self, cross_attention_dim: int = 1024, clip_embeddings_dim: int = 1024, clip_extra_context_tokens: int = 4
+    ):
         super().__init__()
 
         self.cross_attention_dim = cross_attention_dim
@@ -25,7 +35,7 @@ def __init__(self, cross_attention_dim=1024, clip_embeddings_dim=1024, clip_extr
         self.norm = torch.nn.LayerNorm(cross_attention_dim)
 
     @classmethod
-    def from_state_dict(cls, state_dict: dict[torch.Tensor], clip_extra_context_tokens=4):
+    def from_state_dict(cls, state_dict: dict[str, torch.Tensor], clip_extra_context_tokens: int = 4):
         """Initialize an ImageProjModel from a state_dict.
 
         The cross_attention_dim and clip_embeddings_dim are inferred from the shape of the tensors in the state_dict.
@@ -45,7 +55,7 @@ def from_state_dict(cls, state_dict: dict[torch.Tensor], clip_extra_context_toke
         model.load_state_dict(state_dict)
         return model
 
-    def forward(self, image_embeds):
+    def forward(self, image_embeds: torch.Tensor):
         embeds = image_embeds
         clip_extra_context_tokens = self.proj(embeds).reshape(
             -1, self.clip_extra_context_tokens, self.cross_attention_dim
@@ -57,7 +67,7 @@ def forward(self, image_embeds):
 class MLPProjModel(torch.nn.Module):
     """SD model with image prompt"""
 
-    def __init__(self, cross_attention_dim=1024, clip_embeddings_dim=1024):
+    def __init__(self, cross_attention_dim: int = 1024, clip_embeddings_dim: int = 1024):
         super().__init__()
 
         self.proj = torch.nn.Sequential(
@@ -68,7 +78,7 @@ def __init__(self, cross_attention_dim=1024, clip_embeddings_dim=1024):
         )
 
     @classmethod
-    def from_state_dict(cls, state_dict: dict[torch.Tensor]):
+    def from_state_dict(cls, state_dict: dict[str, torch.Tensor]):
         """Initialize an MLPProjModel from a state_dict.
 
         The cross_attention_dim and clip_embeddings_dim are inferred from the shape of the tensors in the state_dict.
@@ -87,7 +97,7 @@ def from_state_dict(cls, state_dict: dict[torch.Tensor]):
         model.load_state_dict(state_dict)
         return model
 
-    def forward(self, image_embeds):
+    def forward(self, image_embeds: torch.Tensor):
         clip_extra_context_tokens = self.proj(image_embeds)
         return clip_extra_context_tokens
 
@@ -97,7 +107,7 @@ class IPAdapter(RawModel):
 
     def __init__(
         self,
-        state_dict: dict[str, torch.Tensor],
+        state_dict: IPAdapterStateDict,
         device: torch.device,
         dtype: torch.dtype = torch.float16,
         num_tokens: int = 4,
@@ -129,24 +139,27 @@ def calc_size(self):
 
         return calc_model_size_by_data(self._image_proj_model) + calc_model_size_by_data(self.attn_weights)
 
-    def _init_image_proj_model(self, state_dict):
+    def _init_image_proj_model(
+        self, state_dict: dict[str, torch.Tensor]
+    ) -> Union[ImageProjModel, Resampler, MLPProjModel]:
         return ImageProjModel.from_state_dict(state_dict, self._num_tokens).to(self.device, dtype=self.dtype)
 
     @torch.inference_mode()
-    def get_image_embeds(self, pil_image, image_encoder: CLIPVisionModelWithProjection):
-        if isinstance(pil_image, Image.Image):
-            pil_image = [pil_image]
+    def get_image_embeds(self, pil_image: List[Image.Image], image_encoder: CLIPVisionModelWithProjection):
         clip_image = self._clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
         clip_image_embeds = image_encoder(clip_image.to(self.device, dtype=self.dtype)).image_embeds
-        image_prompt_embeds = self._image_proj_model(clip_image_embeds)
-        uncond_image_prompt_embeds = self._image_proj_model(torch.zeros_like(clip_image_embeds))
-        return image_prompt_embeds, uncond_image_prompt_embeds
+        try:
+            image_prompt_embeds = self._image_proj_model(clip_image_embeds)
+            uncond_image_prompt_embeds = self._image_proj_model(torch.zeros_like(clip_image_embeds))
+            return image_prompt_embeds, uncond_image_prompt_embeds
+        except RuntimeError as e:
+            raise RuntimeError("Selected CLIP Vision Model is incompatible with the current IP Adapter") from e
 
 
 class IPAdapterPlus(IPAdapter):
     """IP-Adapter with fine-grained features"""
 
-    def _init_image_proj_model(self, state_dict):
+    def _init_image_proj_model(self, state_dict: dict[str, torch.Tensor]) -> Union[Resampler, MLPProjModel]:
         return Resampler.from_state_dict(
             state_dict=state_dict,
             depth=4,
@@ -157,31 +170,32 @@ def _init_image_proj_model(self, state_dict):
         ).to(self.device, dtype=self.dtype)
 
     @torch.inference_mode()
-    def get_image_embeds(self, pil_image, image_encoder: CLIPVisionModelWithProjection):
-        if isinstance(pil_image, Image.Image):
-            pil_image = [pil_image]
+    def get_image_embeds(self, pil_image: List[Image.Image], image_encoder: CLIPVisionModelWithProjection):
         clip_image = self._clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
         clip_image = clip_image.to(self.device, dtype=self.dtype)
         clip_image_embeds = image_encoder(clip_image, output_hidden_states=True).hidden_states[-2]
-        image_prompt_embeds = self._image_proj_model(clip_image_embeds)
         uncond_clip_image_embeds = image_encoder(torch.zeros_like(clip_image), output_hidden_states=True).hidden_states[
             -2
         ]
-        uncond_image_prompt_embeds = self._image_proj_model(uncond_clip_image_embeds)
-        return image_prompt_embeds, uncond_image_prompt_embeds
+        try:
+            image_prompt_embeds = self._image_proj_model(clip_image_embeds)
+            uncond_image_prompt_embeds = self._image_proj_model(uncond_clip_image_embeds)
+            return image_prompt_embeds, uncond_image_prompt_embeds
+        except RuntimeError as e:
+            raise RuntimeError("Selected CLIP Vision Model is incompatible with the current IP Adapter") from e
 
 
 class IPAdapterFull(IPAdapterPlus):
     """IP-Adapter Plus with full features."""
 
-    def _init_image_proj_model(self, state_dict: dict[torch.Tensor]):
+    def _init_image_proj_model(self, state_dict: dict[str, torch.Tensor]):
         return MLPProjModel.from_state_dict(state_dict).to(self.device, dtype=self.dtype)
 
 
 class IPAdapterPlusXL(IPAdapterPlus):
     """IP-Adapter Plus for SDXL."""
 
-    def _init_image_proj_model(self, state_dict):
+    def _init_image_proj_model(self, state_dict: dict[str, torch.Tensor]):
         return Resampler.from_state_dict(
             state_dict=state_dict,
             depth=4,
@@ -192,24 +206,48 @@ def _init_image_proj_model(self, state_dict):
         ).to(self.device, dtype=self.dtype)
 
 
+def load_ip_adapter_tensors(ip_adapter_ckpt_path: pathlib.Path, device: str) -> IPAdapterStateDict:
+    state_dict: IPAdapterStateDict = {"ip_adapter": {}, "image_proj": {}}
+
+    if ip_adapter_ckpt_path.suffix == ".safetensors":
+        model = safetensors.torch.load_file(ip_adapter_ckpt_path, device=device)
+        for key in model.keys():
+            if key.startswith("image_proj."):
+                state_dict["image_proj"][key.replace("image_proj.", "")] = model[key]
+            elif key.startswith("ip_adapter."):
+                state_dict["ip_adapter"][key.replace("ip_adapter.", "")] = model[key]
+            else:
+                raise RuntimeError(f"Encountered unexpected IP Adapter state dict key: '{key}'.")
+    else:
+        ip_adapter_diffusers_checkpoint_path = ip_adapter_ckpt_path / "ip_adapter.bin"
+        state_dict = torch.load(ip_adapter_diffusers_checkpoint_path, map_location="cpu")
+
+    return state_dict
+
+
 def build_ip_adapter(
-    ip_adapter_ckpt_path: str, device: torch.device, dtype: torch.dtype = torch.float16
-) -> Union[IPAdapter, IPAdapterPlus]:
-    state_dict = torch.load(ip_adapter_ckpt_path, map_location="cpu")
+    ip_adapter_ckpt_path: pathlib.Path, device: torch.device, dtype: torch.dtype = torch.float16
+) -> Union[IPAdapter, IPAdapterPlus, IPAdapterPlusXL, IPAdapterPlus]:
+    state_dict = load_ip_adapter_tensors(ip_adapter_ckpt_path, device.type)
 
-    if "proj.weight" in state_dict["image_proj"]:  # IPAdapter (with ImageProjModel).
+    # IPAdapter (with ImageProjModel)
+    if "proj.weight" in state_dict["image_proj"]:
         return IPAdapter(state_dict, device=device, dtype=dtype)
-    elif "proj_in.weight" in state_dict["image_proj"]:  # IPAdaterPlus or IPAdapterPlusXL (with Resampler).
+
+    # IPAdaterPlus or IPAdapterPlusXL (with Resampler)
+    elif "proj_in.weight" in state_dict["image_proj"]:
         cross_attention_dim = state_dict["ip_adapter"]["1.to_k_ip.weight"].shape[-1]
         if cross_attention_dim == 768:
-            # SD1 IP-Adapter Plus
-            return IPAdapterPlus(state_dict, device=device, dtype=dtype)
+            return IPAdapterPlus(state_dict, device=device, dtype=dtype)  # SD1 IP-Adapter Plus
         elif cross_attention_dim == 2048:
-            # SDXL IP-Adapter Plus
-            return IPAdapterPlusXL(state_dict, device=device, dtype=dtype)
+            return IPAdapterPlusXL(state_dict, device=device, dtype=dtype)  # SDXL IP-Adapter Plus
         else:
             raise Exception(f"Unsupported IP-Adapter Plus cross-attention dimension: {cross_attention_dim}.")
-    elif "proj.0.weight" in state_dict["image_proj"]:  # IPAdapterFull (with MLPProjModel).
+
+    # IPAdapterFull (with MLPProjModel)
+    elif "proj.0.weight" in state_dict["image_proj"]:
         return IPAdapterFull(state_dict, device=device, dtype=dtype)
+
+    # Unrecognized IP Adapter Architectures
     else:
         raise ValueError(f"'{ip_adapter_ckpt_path}' has an unrecognized IP-Adapter model architecture.")
diff --git a/invokeai/backend/ip_adapter/resampler.py b/invokeai/backend/ip_adapter/resampler.py
index a8db22c0fd9..a32eeacfdc2 100644
--- a/invokeai/backend/ip_adapter/resampler.py
+++ b/invokeai/backend/ip_adapter/resampler.py
@@ -9,8 +9,8 @@
 
 
 # FFN
-def FeedForward(dim, mult=4):
-    inner_dim = int(dim * mult)
+def FeedForward(dim: int, mult: int = 4):
+    inner_dim = dim * mult
     return nn.Sequential(
         nn.LayerNorm(dim),
         nn.Linear(dim, inner_dim, bias=False),
@@ -19,8 +19,8 @@ def FeedForward(dim, mult=4):
     )
 
 
-def reshape_tensor(x, heads):
-    bs, length, width = x.shape
+def reshape_tensor(x: torch.Tensor, heads: int):
+    bs, length, _ = x.shape
     # (bs, length, width) --> (bs, length, n_heads, dim_per_head)
     x = x.view(bs, length, heads, -1)
     # (bs, length, n_heads, dim_per_head) --> (bs, n_heads, length, dim_per_head)
@@ -31,7 +31,7 @@ def reshape_tensor(x, heads):
 
 
 class PerceiverAttention(nn.Module):
-    def __init__(self, *, dim, dim_head=64, heads=8):
+    def __init__(self, *, dim: int, dim_head: int = 64, heads: int = 8):
         super().__init__()
         self.scale = dim_head**-0.5
         self.dim_head = dim_head
@@ -45,7 +45,7 @@ def __init__(self, *, dim, dim_head=64, heads=8):
         self.to_kv = nn.Linear(dim, inner_dim * 2, bias=False)
         self.to_out = nn.Linear(inner_dim, dim, bias=False)
 
-    def forward(self, x, latents):
+    def forward(self, x: torch.Tensor, latents: torch.Tensor):
         """
         Args:
             x (torch.Tensor): image features
@@ -80,14 +80,14 @@ def forward(self, x, latents):
 class Resampler(nn.Module):
     def __init__(
         self,
-        dim=1024,
-        depth=8,
-        dim_head=64,
-        heads=16,
-        num_queries=8,
-        embedding_dim=768,
-        output_dim=1024,
-        ff_mult=4,
+        dim: int = 1024,
+        depth: int = 8,
+        dim_head: int = 64,
+        heads: int = 16,
+        num_queries: int = 8,
+        embedding_dim: int = 768,
+        output_dim: int = 1024,
+        ff_mult: int = 4,
     ):
         super().__init__()
 
@@ -110,7 +110,15 @@ def __init__(
             )
 
     @classmethod
-    def from_state_dict(cls, state_dict: dict[torch.Tensor], depth=8, dim_head=64, heads=16, num_queries=8, ff_mult=4):
+    def from_state_dict(
+        cls,
+        state_dict: dict[str, torch.Tensor],
+        depth: int = 8,
+        dim_head: int = 64,
+        heads: int = 16,
+        num_queries: int = 8,
+        ff_mult: int = 4,
+    ):
         """A convenience function that initializes a Resampler from a state_dict.
 
         Some of the shape parameters are inferred from the state_dict (e.g. dim, embedding_dim, etc.). At the time of
@@ -145,7 +153,7 @@ def from_state_dict(cls, state_dict: dict[torch.Tensor], depth=8, dim_head=64, h
         model.load_state_dict(state_dict)
         return model
 
-    def forward(self, x):
+    def forward(self, x: torch.Tensor):
         latents = self.latents.repeat(x.size(0), 1, 1)
 
         x = self.proj_in(x)
diff --git a/invokeai/backend/model_manager/config.py b/invokeai/backend/model_manager/config.py
index 524e39b2a19..82f88c0e817 100644
--- a/invokeai/backend/model_manager/config.py
+++ b/invokeai/backend/model_manager/config.py
@@ -323,10 +323,13 @@ def get_tag() -> Tag:
         return Tag(f"{ModelType.Main.value}.{ModelFormat.Diffusers.value}")
 
 
-class IPAdapterConfig(ModelConfigBase):
-    """Model config for IP Adaptor format models."""
-
+class IPAdapterBaseConfig(ModelConfigBase):
     type: Literal[ModelType.IPAdapter] = ModelType.IPAdapter
+
+
+class IPAdapterInvokeAIConfig(IPAdapterBaseConfig):
+    """Model config for IP Adapter diffusers format models."""
+
     image_encoder_model_id: str
     format: Literal[ModelFormat.InvokeAI]
 
@@ -335,6 +338,16 @@ def get_tag() -> Tag:
         return Tag(f"{ModelType.IPAdapter.value}.{ModelFormat.InvokeAI.value}")
 
 
+class IPAdapterCheckpointConfig(IPAdapterBaseConfig):
+    """Model config for IP Adapter checkpoint format models."""
+
+    format: Literal[ModelFormat.Checkpoint]
+
+    @staticmethod
+    def get_tag() -> Tag:
+        return Tag(f"{ModelType.IPAdapter.value}.{ModelFormat.Checkpoint.value}")
+
+
 class CLIPVisionDiffusersConfig(DiffusersConfigBase):
     """Model config for CLIPVision."""
 
@@ -390,7 +403,8 @@ def get_model_discriminator_value(v: Any) -> str:
         Annotated[LoRADiffusersConfig, LoRADiffusersConfig.get_tag()],
         Annotated[TextualInversionFileConfig, TextualInversionFileConfig.get_tag()],
         Annotated[TextualInversionFolderConfig, TextualInversionFolderConfig.get_tag()],
-        Annotated[IPAdapterConfig, IPAdapterConfig.get_tag()],
+        Annotated[IPAdapterInvokeAIConfig, IPAdapterInvokeAIConfig.get_tag()],
+        Annotated[IPAdapterCheckpointConfig, IPAdapterCheckpointConfig.get_tag()],
         Annotated[T2IAdapterConfig, T2IAdapterConfig.get_tag()],
         Annotated[CLIPVisionDiffusersConfig, CLIPVisionDiffusersConfig.get_tag()],
     ],
diff --git a/invokeai/backend/model_manager/load/model_loaders/ip_adapter.py b/invokeai/backend/model_manager/load/model_loaders/ip_adapter.py
index 89dd46e929b..55eed81fcd5 100644
--- a/invokeai/backend/model_manager/load/model_loaders/ip_adapter.py
+++ b/invokeai/backend/model_manager/load/model_loaders/ip_adapter.py
@@ -7,19 +7,13 @@
 import torch
 
 from invokeai.backend.ip_adapter.ip_adapter import build_ip_adapter
-from invokeai.backend.model_manager import (
-    AnyModel,
-    AnyModelConfig,
-    BaseModelType,
-    ModelFormat,
-    ModelType,
-    SubModelType,
-)
+from invokeai.backend.model_manager import AnyModel, AnyModelConfig, BaseModelType, ModelFormat, ModelType, SubModelType
 from invokeai.backend.model_manager.load import ModelLoader, ModelLoaderRegistry
 from invokeai.backend.raw_model import RawModel
 
 
 @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.IPAdapter, format=ModelFormat.InvokeAI)
+@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.IPAdapter, format=ModelFormat.Checkpoint)
 class IPAdapterInvokeAILoader(ModelLoader):
     """Class to load IP Adapter diffusers models."""
 
@@ -32,7 +26,7 @@ def _load_model(
             raise ValueError("There are no submodels in an IP-Adapter model.")
         model_path = Path(config.path)
         model: RawModel = build_ip_adapter(
-            ip_adapter_ckpt_path=str(model_path / "ip_adapter.bin"),
+            ip_adapter_ckpt_path=model_path,
             device=torch.device("cpu"),
             dtype=self._torch_dtype,
         )
diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py
index ddd9e99eda9..7aa650afaa4 100644
--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@@ -230,9 +230,10 @@ def get_model_type_from_checkpoint(cls, model_path: Path, checkpoint: Optional[C
                 return ModelType.LoRA
             elif any(key.startswith(v) for v in {"controlnet", "control_model", "input_blocks"}):
                 return ModelType.ControlNet
+            elif any(key.startswith(v) for v in {"image_proj.", "ip_adapter."}):
+                return ModelType.IPAdapter
             elif key in {"emb_params", "string_to_param"}:
                 return ModelType.TextualInversion
-
         else:
             # diffusers-ti
             if len(ckpt) < 10 and all(isinstance(v, torch.Tensor) for v in ckpt.values()):
@@ -527,8 +528,25 @@ def get_base_type(self) -> BaseModelType:
 
 
 class IPAdapterCheckpointProbe(CheckpointProbeBase):
+    """Class for probing IP Adapters"""
+
     def get_base_type(self) -> BaseModelType:
-        raise NotImplementedError()
+        checkpoint = self.checkpoint
+        for key in checkpoint.keys():
+            if not key.startswith(("image_proj.", "ip_adapter.")):
+                continue
+            cross_attention_dim = checkpoint["ip_adapter.1.to_k_ip.weight"].shape[-1]
+            if cross_attention_dim == 768:
+                return BaseModelType.StableDiffusion1
+            elif cross_attention_dim == 1024:
+                return BaseModelType.StableDiffusion2
+            elif cross_attention_dim == 2048:
+                return BaseModelType.StableDiffusionXL
+            else:
+                raise InvalidModelConfigException(
+                    f"IP-Adapter had unexpected cross-attention dimension: {cross_attention_dim}."
+                )
+        raise InvalidModelConfigException(f"{self.model_path}: Unable to determine base type")
 
 
 class CLIPVisionCheckpointProbe(CheckpointProbeBase):
@@ -768,7 +786,7 @@ def get_base_type(self) -> BaseModelType:
             )
 
 
-############## register probe classes ######
+# Register probe classes
 ModelProbe.register_probe("diffusers", ModelType.Main, PipelineFolderProbe)
 ModelProbe.register_probe("diffusers", ModelType.VAE, VaeFolderProbe)
 ModelProbe.register_probe("diffusers", ModelType.LoRA, LoRAFolderProbe)
diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json
index 1601169e03c..5872d22dfed 100644
--- a/invokeai/frontend/web/public/locales/en.json
+++ b/invokeai/frontend/web/public/locales/en.json
@@ -217,6 +217,7 @@
         "saveControlImage": "Save Control Image",
         "scribble": "scribble",
         "selectModel": "Select a model",
+        "selectCLIPVisionModel": "Select a CLIP Vision model",
         "setControlImageDimensions": "Set Control Image Dimensions To W/H",
         "showAdvanced": "Show Advanced",
         "small": "Small",
@@ -655,6 +656,7 @@
         "install": "Install",
         "installAll": "Install All",
         "installRepo": "Install Repo",
+        "ipAdapters": "IP Adapters",
         "load": "Load",
         "localOnly": "local only",
         "manual": "Manual",
diff --git a/invokeai/frontend/web/src/features/controlAdapters/components/parameters/ParamControlAdapterModel.tsx b/invokeai/frontend/web/src/features/controlAdapters/components/parameters/ParamControlAdapterModel.tsx
index 25d327e54e3..91f88223523 100644
--- a/invokeai/frontend/web/src/features/controlAdapters/components/parameters/ParamControlAdapterModel.tsx
+++ b/invokeai/frontend/web/src/features/controlAdapters/components/parameters/ParamControlAdapterModel.tsx
@@ -1,12 +1,18 @@
-import { Combobox, FormControl, Tooltip } from '@invoke-ai/ui-library';
+import type { ComboboxOnChange, ComboboxOption } from '@invoke-ai/ui-library';
+import { Combobox, Flex, FormControl, Tooltip } from '@invoke-ai/ui-library';
 import { createMemoizedSelector } from 'app/store/createMemoizedSelector';
 import { useAppDispatch, useAppSelector } from 'app/store/storeHooks';
 import { useGroupedModelCombobox } from 'common/hooks/useGroupedModelCombobox';
+import { useControlAdapterCLIPVisionModel } from 'features/controlAdapters/hooks/useControlAdapterCLIPVisionModel';
 import { useControlAdapterIsEnabled } from 'features/controlAdapters/hooks/useControlAdapterIsEnabled';
 import { useControlAdapterModel } from 'features/controlAdapters/hooks/useControlAdapterModel';
 import { useControlAdapterModels } from 'features/controlAdapters/hooks/useControlAdapterModels';
 import { useControlAdapterType } from 'features/controlAdapters/hooks/useControlAdapterType';
-import { controlAdapterModelChanged } from 'features/controlAdapters/store/controlAdaptersSlice';
+import {
+  controlAdapterCLIPVisionModelChanged,
+  controlAdapterModelChanged,
+} from 'features/controlAdapters/store/controlAdaptersSlice';
+import type { CLIPVisionModel } from 'features/controlAdapters/store/types';
 import { selectGenerationSlice } from 'features/parameters/store/generationSlice';
 import { memo, useCallback, useMemo } from 'react';
 import { useTranslation } from 'react-i18next';
@@ -29,6 +35,7 @@ const ParamControlAdapterModel = ({ id }: ParamControlAdapterModelProps) => {
   const { modelConfig } = useControlAdapterModel(id);
   const dispatch = useAppDispatch();
   const currentBaseModel = useAppSelector((s) => s.generation.model?.base);
+  const currentCLIPVisionModel = useControlAdapterCLIPVisionModel(id);
   const mainModel = useAppSelector(selectMainModel);
   const { t } = useTranslation();
 
@@ -49,6 +56,16 @@ const ParamControlAdapterModel = ({ id }: ParamControlAdapterModelProps) => {
     [dispatch, id]
   );
 
+  const onCLIPVisionModelChange = useCallback<ComboboxOnChange>(
+    (v) => {
+      if (!v?.value) {
+        return;
+      }
+      dispatch(controlAdapterCLIPVisionModelChanged({ id, clipVisionModel: v.value as CLIPVisionModel }));
+    },
+    [dispatch, id]
+  );
+
   const selectedModel = useMemo(
     () => (modelConfig && controlAdapterType ? { ...modelConfig, model_type: controlAdapterType } : null),
     [controlAdapterType, modelConfig]
@@ -71,18 +88,51 @@ const ParamControlAdapterModel = ({ id }: ParamControlAdapterModelProps) => {
     isLoading,
   });
 
+  const clipVisionOptions = useMemo<ComboboxOption[]>(
+    () => [
+      { label: 'ViT-H', value: 'ViT-H' },
+      { label: 'ViT-G', value: 'ViT-G' },
+    ],
+    []
+  );
+
+  const clipVisionModel = useMemo(
+    () => clipVisionOptions.find((o) => o.value === currentCLIPVisionModel),
+    [clipVisionOptions, currentCLIPVisionModel]
+  );
+
   return (
-    <Tooltip label={value?.description}>
-      <FormControl isDisabled={!isEnabled} isInvalid={!value || mainModel?.base !== modelConfig?.base}>
-        <Combobox
-          options={options}
-          placeholder={t('controlnet.selectModel')}
-          value={value}
-          onChange={onChange}
-          noOptionsMessage={noOptionsMessage}
-        />
-      </FormControl>
-    </Tooltip>
+    <Flex sx={{ gap: 2 }}>
+      <Tooltip label={value?.description}>
+        <FormControl
+          isDisabled={!isEnabled}
+          isInvalid={!value || mainModel?.base !== modelConfig?.base}
+          sx={{ width: '100%' }}
+        >
+          <Combobox
+            options={options}
+            placeholder={t('controlnet.selectModel')}
+            value={value}
+            onChange={onChange}
+            noOptionsMessage={noOptionsMessage}
+          />
+        </FormControl>
+      </Tooltip>
+      {modelConfig?.type === 'ip_adapter' && modelConfig.format === 'checkpoint' && (
+        <FormControl
+          isDisabled={!isEnabled}
+          isInvalid={!value || mainModel?.base !== modelConfig?.base}
+          sx={{ width: 'max-content', minWidth: 28 }}
+        >
+          <Combobox
+            options={clipVisionOptions}
+            placeholder={t('controlnet.selectCLIPVisionModel')}
+            value={clipVisionModel}
+            onChange={onCLIPVisionModelChange}
+          />
+        </FormControl>
+      )}
+    </Flex>
   );
 };
 
diff --git a/invokeai/frontend/web/src/features/controlAdapters/hooks/useControlAdapterCLIPVisionModel.ts b/invokeai/frontend/web/src/features/controlAdapters/hooks/useControlAdapterCLIPVisionModel.ts
new file mode 100644
index 00000000000..249d2022fe8
--- /dev/null
+++ b/invokeai/frontend/web/src/features/controlAdapters/hooks/useControlAdapterCLIPVisionModel.ts
@@ -0,0 +1,24 @@
+import { createMemoizedSelector } from 'app/store/createMemoizedSelector';
+import { useAppSelector } from 'app/store/storeHooks';
+import {
+  selectControlAdapterById,
+  selectControlAdaptersSlice,
+} from 'features/controlAdapters/store/controlAdaptersSlice';
+import { useMemo } from 'react';
+
+export const useControlAdapterCLIPVisionModel = (id: string) => {
+  const selector = useMemo(
+    () =>
+      createMemoizedSelector(selectControlAdaptersSlice, (controlAdapters) => {
+        const cn = selectControlAdapterById(controlAdapters, id);
+        if (cn && cn?.type === 'ip_adapter') {
+          return cn.clipVisionModel;
+        }
+      }),
+    [id]
+  );
+
+  const clipVisionModel = useAppSelector(selector);
+
+  return clipVisionModel;
+};
diff --git a/invokeai/frontend/web/src/features/controlAdapters/store/controlAdaptersSlice.ts b/invokeai/frontend/web/src/features/controlAdapters/store/controlAdaptersSlice.ts
index f4edca41bb5..100bb3f6ad6 100644
--- a/invokeai/frontend/web/src/features/controlAdapters/store/controlAdaptersSlice.ts
+++ b/invokeai/frontend/web/src/features/controlAdapters/store/controlAdaptersSlice.ts
@@ -14,6 +14,7 @@ import { v4 as uuidv4 } from 'uuid';
 import { controlAdapterImageProcessed } from './actions';
 import { CONTROLNET_PROCESSORS } from './constants';
 import type {
+  CLIPVisionModel,
   ControlAdapterConfig,
   ControlAdapterProcessorType,
   ControlAdaptersState,
@@ -244,6 +245,13 @@ export const controlAdaptersSlice = createSlice({
       }
       caAdapter.updateOne(state, { id, changes: { controlMode } });
     },
+    controlAdapterCLIPVisionModelChanged: (
+      state,
+      action: PayloadAction<{ id: string; clipVisionModel: CLIPVisionModel }>
+    ) => {
+      const { id, clipVisionModel } = action.payload;
+      caAdapter.updateOne(state, { id, changes: { clipVisionModel } });
+    },
     controlAdapterResizeModeChanged: (
       state,
       action: PayloadAction<{
@@ -381,6 +389,7 @@ export const {
   controlAdapterProcessedImageChanged,
   controlAdapterIsEnabledChanged,
   controlAdapterModelChanged,
+  controlAdapterCLIPVisionModelChanged,
   controlAdapterWeightChanged,
   controlAdapterBeginStepPctChanged,
   controlAdapterEndStepPctChanged,
diff --git a/invokeai/frontend/web/src/features/controlAdapters/store/types.ts b/invokeai/frontend/web/src/features/controlAdapters/store/types.ts
index 93d4915cdf1..329c3187598 100644
--- a/invokeai/frontend/web/src/features/controlAdapters/store/types.ts
+++ b/invokeai/frontend/web/src/features/controlAdapters/store/types.ts
@@ -243,12 +243,15 @@ export type T2IAdapterConfig = {
   shouldAutoConfig: boolean;
 };
 
+export type CLIPVisionModel = 'ViT-H' | 'ViT-G';
+
 export type IPAdapterConfig = {
   type: 'ip_adapter';
   id: string;
   isEnabled: boolean;
   controlImage: string | null;
   model: ParameterIPAdapterModel | null;
+  clipVisionModel: CLIPVisionModel;
   weight: number;
   beginStepPct: number;
   endStepPct: number;
diff --git a/invokeai/frontend/web/src/features/controlAdapters/util/buildControlAdapter.ts b/invokeai/frontend/web/src/features/controlAdapters/util/buildControlAdapter.ts
index d4796572d47..dc893ceb1c5 100644
--- a/invokeai/frontend/web/src/features/controlAdapters/util/buildControlAdapter.ts
+++ b/invokeai/frontend/web/src/features/controlAdapters/util/buildControlAdapter.ts
@@ -46,6 +46,7 @@ export const initialIPAdapter: Omit<IPAdapterConfig, 'id'> = {
   isEnabled: true,
   controlImage: null,
   model: null,
+  clipVisionModel: 'ViT-H',
   weight: 1,
   beginStepPct: 0,
   endStepPct: 1,
diff --git a/invokeai/frontend/web/src/features/metadata/util/parsers.ts b/invokeai/frontend/web/src/features/metadata/util/parsers.ts
index c7c9616bd02..635a63a8dec 100644
--- a/invokeai/frontend/web/src/features/metadata/util/parsers.ts
+++ b/invokeai/frontend/web/src/features/metadata/util/parsers.ts
@@ -372,6 +372,7 @@ const parseIPAdapter: MetadataParseFunc<IPAdapterConfigMetadata> = async (metada
     type: 'ip_adapter',
     isEnabled: true,
     model: zModelIdentifierField.parse(ipAdapterModel),
+    clipVisionModel: 'ViT-H',
     controlImage: image?.image_name ?? null,
     weight: weight ?? initialIPAdapter.weight,
     beginStepPct: begin_step_percent ?? initialIPAdapter.beginStepPct,
diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx
index adb123f24db..0618af5dd04 100644
--- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx
+++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx
@@ -53,7 +53,7 @@ export const ModelView = () => {
             </>
           )}
 
-          {data.type === 'ip_adapter' && (
+          {data.type === 'ip_adapter' && data.format === 'invokeai' && (
             <Flex gap={2}>
               <ModelAttrView label={t('modelManager.imageEncoderModelId')} value={data.image_encoder_model_id} />
             </Flex>
diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/addIPAdapterToLinearGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graph/addIPAdapterToLinearGraph.ts
index 2298e84d43e..ad563de4684 100644
--- a/invokeai/frontend/web/src/features/nodes/util/graph/addIPAdapterToLinearGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graph/addIPAdapterToLinearGraph.ts
@@ -48,7 +48,7 @@ export const addIPAdapterToLinearGraph = async (
       if (!ipAdapter.model) {
         return;
       }
-      const { id, weight, model, beginStepPct, endStepPct, controlImage } = ipAdapter;
+      const { id, weight, model, clipVisionModel, beginStepPct, endStepPct, controlImage } = ipAdapter;
 
       assert(controlImage, 'IP Adapter image is required');
 
@@ -58,6 +58,7 @@ export const addIPAdapterToLinearGraph = async (
         is_intermediate: true,
         weight: weight,
         ip_adapter_model: model,
+        clip_vision_model: clipVisionModel,
         begin_step_percent: beginStepPct,
         end_step_percent: endStepPct,
         image: {
@@ -83,7 +84,7 @@ export const addIPAdapterToLinearGraph = async (
 };
 
 const buildIPAdapterMetadata = (ipAdapter: IPAdapterConfig): S['IPAdapterMetadataField'] => {
-  const { controlImage, beginStepPct, endStepPct, model, weight } = ipAdapter;
+  const { controlImage, beginStepPct, endStepPct, model, clipVisionModel, weight } = ipAdapter;
 
   assert(model, 'IP Adapter model is required');
 
@@ -99,6 +100,7 @@ const buildIPAdapterMetadata = (ipAdapter: IPAdapterConfig): S['IPAdapterMetadat
 
   return {
     ip_adapter_model: model,
+    clip_vision_model: clipVisionModel,
     weight,
     begin_step_percent: beginStepPct,
     end_step_percent: endStepPct,
diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts
index 2de0f89d138..6e3ddf678b5 100644
--- a/invokeai/frontend/web/src/services/api/schema.ts
+++ b/invokeai/frontend/web/src/services/api/schema.ts
@@ -4112,7 +4112,7 @@ export type components = {
        * @description The nodes in this graph
        */
       nodes: {
-        [key: string]: components["schemas"]["ControlNetInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"];
+        [key: string]: components["schemas"]["StringInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["LoRALoaderInvocation"];
       };
       /**
        * Edges
@@ -4149,7 +4149,7 @@ export type components = {
        * @description The results of node executions
        */
       results: {
-        [key: string]: components["schemas"]["FaceMaskOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["String2Output"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LatentsCollectionOutput"];
+        [key: string]: components["schemas"]["CLIPOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["String2Output"] | components["schemas"]["IntegerOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["CalculateImageTilesOutput"];
       };
       /**
        * Errors
@@ -4310,10 +4310,10 @@ export type components = {
       is_diffusers: boolean;
     };
     /**
-     * IPAdapterConfig
-     * @description Model config for IP Adaptor format models.
+     * IPAdapterCheckpointConfig
+     * @description Model config for IP Adapter checkpoint format models.
      */
-    IPAdapterConfig: {
+    IPAdapterCheckpointConfig: {
       /**
        * Key
        * @description A unique key for this model.
@@ -4364,13 +4364,11 @@ export type components = {
        * @constant
        */
       type: "ip_adapter";
-      /** Image Encoder Model Id */
-      image_encoder_model_id: string;
       /**
        * Format
        * @constant
        */
-      format: "invokeai";
+      format: "checkpoint";
     };
     /** IPAdapterField */
     IPAdapterField: {
@@ -4434,6 +4432,13 @@ export type components = {
        * @description The IP-Adapter model.
        */
       ip_adapter_model: components["schemas"]["ModelIdentifierField"];
+      /**
+       * Clip Vision Model
+       * @description CLIP Vision model to use. Overrides model settings. Mandatory for checkpoint models.
+       * @default auto
+       * @enum {string}
+       */
+      clip_vision_model?: "auto" | "ViT-H" | "ViT-G";
       /**
        * Weight
        * @description The weight given to the IP-Adapter
@@ -4459,6 +4464,69 @@ export type components = {
        */
       type: "ip_adapter";
     };
+    /**
+     * IPAdapterInvokeAIConfig
+     * @description Model config for IP Adapter diffusers format models.
+     */
+    IPAdapterInvokeAIConfig: {
+      /**
+       * Key
+       * @description A unique key for this model.
+       */
+      key: string;
+      /**
+       * Hash
+       * @description The hash of the model file(s).
+       */
+      hash: string;
+      /**
+       * Path
+       * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory.
+       */
+      path: string;
+      /**
+       * Name
+       * @description Name of the model.
+       */
+      name: string;
+      /** @description The base model. */
+      base: components["schemas"]["BaseModelType"];
+      /**
+       * Description
+       * @description Model description
+       */
+      description?: string | null;
+      /**
+       * Source
+       * @description The original source of the model (path, URL or repo_id).
+       */
+      source: string;
+      /** @description The type of source */
+      source_type: components["schemas"]["ModelSourceType"];
+      /**
+       * Source Api Response
+       * @description The original API response from the source, as stringified JSON.
+       */
+      source_api_response?: string | null;
+      /**
+       * Cover Image
+       * @description Url for image to preview model
+       */
+      cover_image?: string | null;
+      /**
+       * Type
+       * @default ip_adapter
+       * @constant
+       */
+      type: "ip_adapter";
+      /** Image Encoder Model Id */
+      image_encoder_model_id: string;
+      /**
+       * Format
+       * @constant
+       */
+      format: "invokeai";
+    };
     /**
      * IPAdapterMetadataField
      * @description IP Adapter Field, minus the CLIP Vision Encoder model
@@ -4468,6 +4536,12 @@ export type components = {
       image: components["schemas"]["ImageField"];
       /** @description The IP-Adapter model. */
       ip_adapter_model: components["schemas"]["ModelIdentifierField"];
+      /**
+       * Clip Vision Model
+       * @description The CLIP Vision model
+       * @enum {string}
+       */
+      clip_vision_model: "ViT-H" | "ViT-G";
       /**
        * Weight
        * @description The weight given to the IP-Adapter
@@ -7471,7 +7545,7 @@ export type components = {
        * Config Out
        * @description After successful installation, this will hold the configuration object.
        */
-      config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"]) | null;
+      config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"]) | null;
       /**
        * Inplace
        * @description Leave model in its current location; otherwise install under models directory
@@ -7626,7 +7700,7 @@ export type components = {
      */
     ModelsList: {
       /** Models */
-      models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"])[];
+      models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"])[];
     };
     /**
      * Multiply Integers
@@ -11155,7 +11229,7 @@ export type operations = {
       /** @description Successful Response */
       200: {
         content: {
-          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
+          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
         };
       };
       /** @description Validation Error */
@@ -11181,7 +11255,7 @@ export type operations = {
       /** @description The model configuration was retrieved successfully */
       200: {
         content: {
-          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
+          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
         };
       };
       /** @description Bad request */
@@ -11263,7 +11337,7 @@ export type operations = {
       /** @description The model was updated successfully */
       200: {
         content: {
-          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
+          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
         };
       };
       /** @description Bad request */
@@ -11637,7 +11711,7 @@ export type operations = {
       /** @description Model converted successfully */
       200: {
         content: {
-          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
+          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
         };
       };
       /** @description Bad request */
diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts
index 0bc141960d4..39d5890e51a 100644
--- a/invokeai/frontend/web/src/services/api/types.ts
+++ b/invokeai/frontend/web/src/services/api/types.ts
@@ -46,7 +46,7 @@ export type LoRAModelConfig = S['LoRADiffusersConfig'] | S['LoRALyCORISConfig'];
 // TODO(MM2): Can we rename this from Vae -> VAE
 export type VAEModelConfig = S['VAECheckpointConfig'] | S['VAEDiffusersConfig'];
 export type ControlNetModelConfig = S['ControlNetDiffusersConfig'] | S['ControlNetCheckpointConfig'];
-export type IPAdapterModelConfig = S['IPAdapterConfig'];
+export type IPAdapterModelConfig = S['IPAdapterInvokeAIConfig'] | S['IPAdapterCheckpointConfig'];
 export type T2IAdapterModelConfig = S['T2IAdapterConfig'];
 type TextualInversionModelConfig = S['TextualInversionFileConfig'] | S['TextualInversionFolderConfig'];
 type DiffusersModelConfig = S['MainDiffusersConfig'];