diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index ca636bf5a6f7d..2c8cf2b443ad0 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -422,9 +422,8 @@ class EngineCoreProc(EngineCore): addresses.frontend_stats_publish_address) # Only publish request queue stats to coordinator for "internal" # LB mode. - self.publish_dp_lb_stats = ( - self.has_coordinator - and not vllm_config.parallel_config.data_parallel_external_lb) + self.publish_dp_lb_stats = (self.has_coordinator and True) + # and not vllm_config.parallel_config.data_parallel_external_lb) self._init_data_parallel(vllm_config) diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index cc52dc113d233..643997e34c8ae 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -903,8 +903,7 @@ class DPAsyncMPClient(AsyncMPClient): assert len(self.engine_ranks_managed) > 1 start_idx = self.engine_ranks_managed[0] - end_idx = self.engine_ranks_managed[-1] - logger.info(f"=============== {start_idx=}, {end_idx=}") + end_idx = self.engine_ranks_managed[-1] + 1 async def run_engine_stats_update_task(): with make_zmq_socket(self.ctx, self.stats_update_address, @@ -973,7 +972,7 @@ class DPAsyncMPClient(AsyncMPClient): # to get get the Core's managed by this client. # TODO(rob): update this so we only pull in # the counts that we are load balancing across. - logger.info(f"{counts=}") + # logger.info(f"{counts=}") self.lb_engines = counts[start_idx:end_idx] resources.stats_update_task = asyncio.create_task(