From 243408b6b495f5d39478bd6cfc4fb9f41f368e4f Mon Sep 17 00:00:00 2001 From: mgoin Date: Wed, 12 Feb 2025 19:18:29 +0000 Subject: [PATCH] Support moe_wna16 as well Signed-off-by: mgoin --- vllm/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/config.py b/vllm/config.py index 2a4aebb4a3927..29a5b20149534 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -990,7 +990,7 @@ class ModelConfig: return False if self.quantization is not None and self.quantization not in [\ - "fp8", "compressed-tensors", "awq_marlin"]: + "fp8", "compressed-tensors", "awq_marlin", "moe_wna16"]: logger.warning( "MLA is not supported with %s quantization. " "Disabling MLA.", self.quantization)