From f43ff9fcbdd183b0ec5bd18524c10061b2619eec Mon Sep 17 00:00:00 2001 From: Rattus Date: Sun, 7 Dec 2025 22:47:18 +1000 Subject: [PATCH] retune lowVramPatch VRAM accounting In the lowvram case, this now does its math in the model dtype in the post de-quantization domain. Account for that. The patching was also put back on the compute stream getting it off-peak so relax the MATH_FACTOR to only x2 so get out of the worst-case assumption of everything peaking at once. --- comfy/model_patcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py index 5b1ccb824..8b5edeb52 100644 --- a/comfy/model_patcher.py +++ b/comfy/model_patcher.py @@ -132,14 +132,14 @@ class LowVramPatch: def __call__(self, weight): return comfy.lora.calculate_weight(self.patches[self.key], weight, self.key, intermediate_dtype=weight.dtype) -#The above patch logic may cast up the weight to fp32, and do math. Go with fp32 x 3 -LOWVRAM_PATCH_ESTIMATE_MATH_FACTOR = 3 +LOWVRAM_PATCH_ESTIMATE_MATH_FACTOR = 2 def low_vram_patch_estimate_vram(model, key): weight, set_func, convert_func = get_key_weight(model, key) if weight is None: return 0 - return weight.numel() * torch.float32.itemsize * LOWVRAM_PATCH_ESTIMATE_MATH_FACTOR + model_dtype = getattr(model, "manual_cast_dtype", torch.float32) + return weight.numel() * model_dtype.itemsize * LOWVRAM_PATCH_ESTIMATE_MATH_FACTOR def get_key_weight(model, key): set_func = None