From e484ecb9473d4f8c74eba9d7d1c807e5655d704c Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 12 Mar 2025 22:09:30 -0700 Subject: [PATCH] Add logging Signed-off-by: Woosuk Kwon --- vllm/v1/core/sched/logging.py | 18 ++++++++++++++++++ vllm/v1/core/sched/scheduler.py | 33 ++++++++------------------------- 2 files changed, 26 insertions(+), 25 deletions(-) create mode 100644 vllm/v1/core/sched/logging.py diff --git a/vllm/v1/core/sched/logging.py b/vllm/v1/core/sched/logging.py new file mode 100644 index 0000000000000..1c0960c962c22 --- /dev/null +++ b/vllm/v1/core/sched/logging.py @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: Apache-2.0 +from vllm.v1.engine import EngineCoreEvent, EngineCoreEventType +from vllm.v1.request import Request + + +def record_queued(request: Request) -> None: + request.events.append(EngineCoreEvent.new_event( + EngineCoreEventType.QUEUED)) + + +def record_scheduled(request: Request, timestamp: float) -> None: + request.events.append( + EngineCoreEvent.new_event(EngineCoreEventType.SCHEDULED, timestamp)) + + +def record_preempted(request: Request, timestamp: float) -> None: + request.events.append( + EngineCoreEvent.new_event(EngineCoreEventType.PREEMPTED, timestamp)) diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index 75e93eaf67f7a..f5caff10bb10d 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -15,10 +15,11 @@ from vllm.v1.core.encoder_cache_manager import (EncoderCacheManager, from vllm.v1.core.kv_cache_manager import KVCacheManager from vllm.v1.core.sched.common import CommonSchedulerStates from vllm.v1.core.sched.interface import SchedulerInterface +from vllm.v1.core.sched.logging import (record_preempted, record_queued, + record_scheduled) from vllm.v1.core.sched.output import NewRequestData, SchedulerOutput from vllm.v1.core.sched.utils import check_stop -from vllm.v1.engine import (EngineCoreEvent, EngineCoreEventType, - EngineCoreOutput, EngineCoreOutputs) +from vllm.v1.engine import EngineCoreOutput, EngineCoreOutputs from vllm.v1.metrics.stats import SchedulerStats from vllm.v1.outputs import ModelRunnerOutput from vllm.v1.request import Request, RequestStatus @@ -172,7 +173,8 @@ class Scheduler(SchedulerInterface): self.kv_cache_manager.free(preempted_req) preempted_req.status = RequestStatus.PREEMPTED preempted_req.num_computed_tokens = 0 - self.request_preempted(preempted_req, scheduled_timestamp) + if self.log_stats: + record_preempted(preempted_req, scheduled_timestamp) self.waiting.appendleft(preempted_req) preempted_reqs.append(preempted_req) @@ -314,7 +316,8 @@ class Scheduler(SchedulerInterface): req_index += 1 self.running.append(request) self.scheduled_req_ids.add(request.request_id) - self.request_scheduled(request, scheduled_timestamp) + if self.log_stats: + record_scheduled(request, scheduled_timestamp) if request.status == RequestStatus.WAITING: scheduled_new_reqs.append(request) elif request.status == RequestStatus.PREEMPTED: @@ -614,7 +617,7 @@ class Scheduler(SchedulerInterface): def add_request(self, request: Request) -> None: self.waiting.append(request) self.requests[request.request_id] = request - self.request_queued(request) + record_queued(request) def finish_requests( self, @@ -668,26 +671,6 @@ class Scheduler(SchedulerInterface): def reset_prefix_cache(self) -> bool: return self.kv_cache_manager.reset_prefix_cache() - def request_queued(self, request: Request): - if not self.log_stats: - return - request.events.append( - EngineCoreEvent.new_event(EngineCoreEventType.QUEUED)) - - def request_scheduled(self, request: Request, timestamp: float): - if not self.log_stats: - return - request.events.append( - EngineCoreEvent.new_event(EngineCoreEventType.SCHEDULED, - timestamp)) - - def request_preempted(self, request: Request, timestamp: float): - if not self.log_stats: - return - request.events.append( - EngineCoreEvent.new_event(EngineCoreEventType.PREEMPTED, - timestamp)) - def make_stats(self) -> Optional[SchedulerStats]: if not self.log_stats: return None