[Bugfix] Support MLA for CompressedTensorsWNA16 (#13725)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin 2025-02-25 01:10:31 -05:00 committed by GitHub
parent 4a8cfc7551
commit 18e505930d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1130,13 +1130,13 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
)
def get_layer_weight(layer):
if hasattr(layer, "weight"):
return layer.weight
elif hasattr(layer, "qweight"):
return layer.qweight
else:
raise AttributeError(
f"Layer '{layer}' has neither weight nor qweight")
WEIGHT_NAMES = ("weight", "qweight", "weight_packed")
for attr in WEIGHT_NAMES:
if hasattr(layer, attr):
return getattr(layer, attr)
raise AttributeError(
f"Layer '{layer}' has no recognized weight attribute:"
f" {WEIGHT_NAMES}.")
def get_and_maybe_dequant_weights(layer: LinearBase):
if not isinstance(layer.quant_method, UnquantizedLinearMethod):