mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-16 11:47:09 +08:00
Support moe_wna16 as well
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
b8510f1081
commit
243408b6b4
@ -990,7 +990,7 @@ class ModelConfig:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
if self.quantization is not None and self.quantization not in [\
|
if self.quantization is not None and self.quantization not in [\
|
||||||
"fp8", "compressed-tensors", "awq_marlin"]:
|
"fp8", "compressed-tensors", "awq_marlin", "moe_wna16"]:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"MLA is not supported with %s quantization. "
|
"MLA is not supported with %s quantization. "
|
||||||
"Disabling MLA.", self.quantization)
|
"Disabling MLA.", self.quantization)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user