diff --git a/vllm/attention/backends/mla/common.py b/vllm/attention/backends/mla/common.py index f47ea3684e03c..4dd562be38381 100644 --- a/vllm/attention/backends/mla/common.py +++ b/vllm/attention/backends/mla/common.py @@ -1130,13 +1130,13 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]): ) def get_layer_weight(layer): - if hasattr(layer, "weight"): - return layer.weight - elif hasattr(layer, "qweight"): - return layer.qweight - else: - raise AttributeError( - f"Layer '{layer}' has neither weight nor qweight") + WEIGHT_NAMES = ("weight", "qweight", "weight_packed") + for attr in WEIGHT_NAMES: + if hasattr(layer, attr): + return getattr(layer, attr) + raise AttributeError( + f"Layer '{layer}' has no recognized weight attribute:" + f" {WEIGHT_NAMES}.") def get_and_maybe_dequant_weights(layer: LinearBase): if not isinstance(layer.quant_method, UnquantizedLinearMethod):