mirror of
https://git.datalinker.icu/comfyanonymous/ComfyUI
synced 2025-12-09 22:14:34 +08:00
quant ops: Dequantize weight in-place (#10935)
In flux2 these weights are huge (200MB). As plain_tensor is a throw-away deep copy, do this multiplication in-place to save VRAM.
This commit is contained in:
parent
f17251bec6
commit
3f382a4f98
@ -425,7 +425,8 @@ class TensorCoreFP8Layout(QuantizedLayout):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def dequantize(qdata, scale, orig_dtype, **kwargs):
|
def dequantize(qdata, scale, orig_dtype, **kwargs):
|
||||||
plain_tensor = torch.ops.aten._to_copy.default(qdata, dtype=orig_dtype)
|
plain_tensor = torch.ops.aten._to_copy.default(qdata, dtype=orig_dtype)
|
||||||
return plain_tensor * scale
|
plain_tensor.mul_(scale)
|
||||||
|
return plain_tensor
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_plain_tensors(cls, qtensor):
|
def get_plain_tensors(cls, qtensor):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user