Support moe_wna16 as well

Signed-off-by: mgoin <mgoin64@gmail.com>
2026-06-03 05:37:53 +08:00 · 2025-02-12 19:18:29 +00:00 · 2025-02-12 19:18:29 +00:00 · 243408b6b4
commit 243408b6b4
parent b8510f1081
1 changed files with 1 additions and 1 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -990,7 +990,7 @@ class ModelConfig:
            return False
        if self.quantization is not None and self.quantization not in [\
-            "fp8", "compressed-tensors", "awq_marlin"]:
+            "fp8", "compressed-tensors", "awq_marlin", "moe_wna16"]:
            logger.warning(
                "MLA is not supported with %s quantization. "
                "Disabling MLA.", self.quantization)