diff --git a/comfy/model_management.py b/comfy/model_management.py index d5d77d7d737..6f667dfc57e 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -314,6 +314,9 @@ def model(self): def model_memory(self): return self.model.model_size() + def model_loaded_memory(self): + return self.model.loaded_size() + def model_offloaded_memory(self): return self.model.model_size() - self.model.loaded_size() @@ -504,8 +507,10 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu lowvram_model_memory = 0 if lowvram_available and (vram_set_state == VRAMState.LOW_VRAM or vram_set_state == VRAMState.NORMAL_VRAM) and not force_full_load: model_size = loaded_model.model_memory_required(torch_dev) - current_free_mem = get_free_memory(torch_dev) - lowvram_model_memory = max(1, (current_free_mem - minimum_memory_required), min(current_free_mem * 0.4, current_free_mem - minimum_inference_memory())) + loaded_memory = loaded_model.model_loaded_memory() + current_free_mem = get_free_memory(torch_dev) + loaded_memory + lowvram_model_memory = max(64 * 1024 * 1024, (current_free_mem - minimum_memory_required), min(current_free_mem * 0.4, current_free_mem - minimum_inference_memory())) + lowvram_model_memory = max(0.1, lowvram_model_memory - loaded_memory) if model_size <= lowvram_model_memory: #only switch to lowvram if really necessary lowvram_model_memory = 0