diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 0e35d8a80988c..80ac5f42dfb89 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -437,7 +437,7 @@ class FusedMoE(torch.nn.Module): # Use expert parallelism instead of tensor parallelism? vllm_config = get_current_vllm_config() use_ep = (vllm_config.parallel_config.enable_expert_parallel - and self.tp_size > 1) + and self.tp_size * self.dp_size > 1) # For smuggling this layer into the fused moe custom op self.use_direct_call = self.dp_size == 1