From deff0ac83550fc2177ae164cff557a57539ac994 Mon Sep 17 00:00:00 2001 From: Rattus Date: Sat, 6 Dec 2025 17:14:30 +1000 Subject: [PATCH] Fix on-load VRAM OOM slow down the CPU on model load to not run ahead. This fixes a VRAM on flux 2 load. I went to try and debug this with the memory trace pickles, which needs --disable-cuda-malloc which made the bug go away. So I tried this synchronize and it worked. The has some very complex interactions with the cuda malloc async and I dont have solid theory on this one yet. Still debugging but this gets us over the OOM for the moment. --- comfy/model_patcher.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py index 215784874..9b76b87b6 100644 --- a/comfy/model_patcher.py +++ b/comfy/model_patcher.py @@ -761,6 +761,8 @@ class ModelPatcher: key = "{}.{}".format(n, param) self.unpin_weight(key) self.patch_weight_to_device(key, device_to=device_to) + if comfy.model_management.is_device_cuda(device_to): + torch.cuda.synchronize() logging.debug("lowvram: loaded module regularly {} {}".format(n, m)) m.comfy_patched_weights = True