diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 499c4573cacd5..9b117f3b5d418 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -1137,7 +1137,7 @@ class FusedMoE(CustomOp): ) self.local_num_experts = local_num_experts self.register_buffer("expert_map", expert_map) - logger.debug_once( + logger.info_once( "[EP Rank %s/%s] Expert parallelism is enabled. Expert " "placement strategy: %s. Local/global" " number of experts: %s/%s. Experts local to global index map:" diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 23a3fd50d2312..fc64a7a93aa58 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -725,6 +725,7 @@ class EngineCoreProc(EngineCore): ) # Receive initialization message. + logger.debug("Waiting for init message from front-end.") if not handshake_socket.poll(timeout=HANDSHAKE_TIMEOUT_MINS * 60_000): raise RuntimeError( "Did not receive response from front-end "