diff --git a/vllm/config.py b/vllm/config.py index 2a4aebb4a3927..29a5b20149534 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -990,7 +990,7 @@ class ModelConfig: return False if self.quantization is not None and self.quantization not in [\ - "fp8", "compressed-tensors", "awq_marlin"]: + "fp8", "compressed-tensors", "awq_marlin", "moe_wna16"]: logger.warning( "MLA is not supported with %s quantization. " "Disabling MLA.", self.quantization)