diff --git a/vllm/model_executor/models/granitemoe.py b/vllm/model_executor/models/granitemoe.py index 51296ef0cc08..b518a0a6cbde 100644 --- a/vllm/model_executor/models/granitemoe.py +++ b/vllm/model_executor/models/granitemoe.py @@ -348,6 +348,7 @@ class GraniteMoeForCausalLM(nn.Module, SupportsLoRA, SupportsPP): self.config = config self.lora_config = lora_config + self.quant_config = quant_config # Required by MixtralForCausalLM self.model = GraniteMoeModel(vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model"))