From 18e505930d789a2ad57c8a048ff7d84c025530bd Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Tue, 25 Feb 2025 01:10:31 -0500 Subject: [PATCH] [Bugfix] Support MLA for CompressedTensorsWNA16 (#13725) Signed-off-by: mgoin --- vllm/attention/backends/mla/common.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/vllm/attention/backends/mla/common.py b/vllm/attention/backends/mla/common.py index f47ea3684e03c..4dd562be38381 100644 --- a/vllm/attention/backends/mla/common.py +++ b/vllm/attention/backends/mla/common.py @@ -1130,13 +1130,13 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]): ) def get_layer_weight(layer): - if hasattr(layer, "weight"): - return layer.weight - elif hasattr(layer, "qweight"): - return layer.qweight - else: - raise AttributeError( - f"Layer '{layer}' has neither weight nor qweight") + WEIGHT_NAMES = ("weight", "qweight", "weight_packed") + for attr in WEIGHT_NAMES: + if hasattr(layer, attr): + return getattr(layer, attr) + raise AttributeError( + f"Layer '{layer}' has no recognized weight attribute:" + f" {WEIGHT_NAMES}.") def get_and_maybe_dequant_weights(layer: LinearBase): if not isinstance(layer.quant_method, UnquantizedLinearMethod):