From ad971af8c7dc2c006b58853207b36942569a10ee Mon Sep 17 00:00:00 2001 From: zxfan-cpu Date: Tue, 8 Apr 2025 11:48:47 +0800 Subject: [PATCH] [Bugfix] fix use-ep bug to enable ep by dp/tp size > 1 (#16161) --- vllm/model_executor/layers/fused_moe/layer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 0e35d8a80988c..80ac5f42dfb89 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -437,7 +437,7 @@ class FusedMoE(torch.nn.Module): # Use expert parallelism instead of tensor parallelism? vllm_config = get_current_vllm_config() use_ep = (vllm_config.parallel_config.enable_expert_parallel - and self.tp_size > 1) + and self.tp_size * self.dp_size > 1) # For smuggling this layer into the fused moe custom op self.use_direct_call = self.dp_size == 1