From 3f382a4f9884f7b672557028adb9bb85d075820d Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Fri, 28 Nov 2025 02:06:30 +1000
Subject: [PATCH] quant ops: Dequantize weight in-place (#10935)

In flux2 these weights are huge (200MB). As plain_tensor is a throw-away
deep copy, do this multiplication in-place to save VRAM.
---
 comfy/quant_ops.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py
index d2f3e7397..9b924560b 100644
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@@ -425,7 +425,8 @@ class TensorCoreFP8Layout(QuantizedLayout):
     @staticmethod
     def dequantize(qdata, scale, orig_dtype, **kwargs):
         plain_tensor = torch.ops.aten._to_copy.default(qdata, dtype=orig_dtype)
-        return plain_tensor * scale
+        plain_tensor.mul_(scale)
+        return plain_tensor
 
     @classmethod
     def get_plain_tensors(cls, qtensor):