diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py index 3d1e72299b4b8..9b49952f37244 100644 --- a/vllm/model_executor/models/qwen3_moe.py +++ b/vllm/model_executor/models/qwen3_moe.py @@ -149,7 +149,7 @@ class Qwen3MoeSparseMoeBlock(nn.Module): self.gate = ReplicatedLinear(config.hidden_size, config.num_experts, bias=False, - quant_config=None, + quant_config=quant_config, prefix=f"{prefix}.gate") def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: