[Bugfix] Support MLA for CompressedTensorsWNA16 (#13725)

Signed-off-by: mgoin <mgoin64@gmail.com>
2026-07-29 00:07:53 +08:00 · 2025-02-25 01:10:31 -05:00 · 2025-02-25 01:10:31 -05:00 · 18e505930d
commit 18e505930d
parent 4a8cfc7551
1 changed files with 7 additions and 7 deletions
--- a/vllm/attention/backends/mla/common.py
+++ b/vllm/attention/backends/mla/common.py
@ -1130,13 +1130,13 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
                )
        def get_layer_weight(layer):
-            if hasattr(layer, "weight"):
+            WEIGHT_NAMES = ("weight", "qweight", "weight_packed")
-                return layer.weight
+            for attr in WEIGHT_NAMES:
-            elif hasattr(layer, "qweight"):
+                if hasattr(layer, attr):
-                return layer.qweight
+                    return getattr(layer, attr)
-            else:
+            raise AttributeError(
-                raise AttributeError(
+                f"Layer '{layer}' has no recognized weight attribute:"
-                    f"Layer '{layer}' has neither weight nor qweight")
+                f" {WEIGHT_NAMES}.")
        def get_and_maybe_dequant_weights(layer: LinearBase):
            if not isinstance(layer.quant_method, UnquantizedLinearMethod):