diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 3ee804f10c17..be94598e69ec 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -1050,6 +1050,7 @@ class DPEngineCoreProc(EngineCoreProc): self._maybe_publish_request_counts() local_unfinished_reqs = self.scheduler.has_unfinished_requests() + logger.info(f"{local_unfinished_reqs=}") if not executed: if not local_unfinished_reqs and not self.engines_running: # All engines are idle. @@ -1057,6 +1058,7 @@ class DPEngineCoreProc(EngineCoreProc): # We are in a running state and so must execute a dummy pass # if the model didn't execute any ready requests. + logger.info("===EXECUTE_DUMMY_BATCH===") self.execute_dummy_batch() # 3) All-reduce operation to determine global unfinished reqs. diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index a84b0e55105b..c74254020eba 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -1077,7 +1077,7 @@ class DPAsyncMPClient(AsyncMPClient): if counts is not None: sliced_counts = counts[count_slice] self.lb_engines = sliced_counts - logger.debug("Received counts: %s (%s)", sliced_counts, + logger.debug("Received counts: %s (%s)", counts, count_slice) resources.stats_update_task = asyncio.create_task(