From 3f382a4f9884f7b672557028adb9bb85d075820d Mon Sep 17 00:00:00 2001 From: rattus <46076784+rattus128@users.noreply.github.com> Date: Fri, 28 Nov 2025 02:06:30 +1000 Subject: [PATCH] quant ops: Dequantize weight in-place (#10935) In flux2 these weights are huge (200MB). As plain_tensor is a throw-away deep copy, do this multiplication in-place to save VRAM. --- comfy/quant_ops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py index d2f3e7397..9b924560b 100644 --- a/comfy/quant_ops.py +++ b/comfy/quant_ops.py @@ -425,7 +425,8 @@ class TensorCoreFP8Layout(QuantizedLayout): @staticmethod def dequantize(qdata, scale, orig_dtype, **kwargs): plain_tensor = torch.ops.aten._to_copy.default(qdata, dtype=orig_dtype) - return plain_tensor * scale + plain_tensor.mul_(scale) + return plain_tensor @classmethod def get_plain_tensors(cls, qtensor):