Support moe_wna16 as well

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
mgoin 2025-02-12 19:18:29 +00:00
parent b8510f1081
commit 243408b6b4

View File

@ -990,7 +990,7 @@ class ModelConfig:
return False
if self.quantization is not None and self.quantization not in [\
"fp8", "compressed-tensors", "awq_marlin"]:
"fp8", "compressed-tensors", "awq_marlin", "moe_wna16"]:
logger.warning(
"MLA is not supported with %s quantization. "
"Disabling MLA.", self.quantization)