[V1][Minor] Minor code cleanup for scheduling metrics (#14800)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon 2025-03-14 01:21:28 -07:00 committed by GitHub
parent 989ecd2007
commit c77620d22d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 23 additions and 40 deletions

View File

@ -15,8 +15,8 @@ from vllm.v1.core.encoder_cache_manager import (EncoderCacheManager,
from vllm.v1.core.kv_cache_manager import KVCacheManager
from vllm.v1.core.scheduler_output import (CachedRequestData, NewRequestData,
SchedulerOutput)
from vllm.v1.engine import (EngineCoreEvent, EngineCoreEventType,
EngineCoreOutput, EngineCoreOutputs)
from vllm.v1.engine import (EngineCoreEventType, EngineCoreOutput,
EngineCoreOutputs)
from vllm.v1.metrics.stats import SchedulerStats
from vllm.v1.outputs import ModelRunnerOutput
from vllm.v1.request import Request, RequestStatus
@ -178,7 +178,9 @@ class Scheduler:
self.kv_cache_manager.free(preempted_req)
preempted_req.status = RequestStatus.PREEMPTED
preempted_req.num_computed_tokens = 0
self.request_preempted(preempted_req, scheduled_timestamp)
if self.log_stats:
preempted_req.record_event(
EngineCoreEventType.PREEMPTED, scheduled_timestamp)
self.waiting.appendleft(preempted_req)
preempted_reqs.append(preempted_req)
@ -320,7 +322,9 @@ class Scheduler:
req_index += 1
self.running.append(request)
self.scheduled_req_ids.add(request.request_id)
self.request_scheduled(request, scheduled_timestamp)
if self.log_stats:
request.record_event(EngineCoreEventType.SCHEDULED,
scheduled_timestamp)
if request.status == RequestStatus.WAITING:
scheduled_new_reqs.append(request)
elif request.status == RequestStatus.PREEMPTED:
@ -666,7 +670,8 @@ class Scheduler:
def add_request(self, request: Request) -> None:
self.waiting.append(request)
self.requests[request.request_id] = request
self.request_queued(request)
if self.log_stats:
request.record_event(EngineCoreEventType.QUEUED)
def finish_requests(
self,
@ -728,26 +733,6 @@ class Scheduler:
def reset_prefix_cache(self) -> bool:
return self.kv_cache_manager.reset_prefix_cache()
def request_queued(self, request: Request):
if not self.log_stats:
return
request.events.append(
EngineCoreEvent.new_event(EngineCoreEventType.QUEUED))
def request_scheduled(self, request: Request, timestamp: float):
if not self.log_stats:
return
request.events.append(
EngineCoreEvent.new_event(EngineCoreEventType.SCHEDULED,
timestamp))
def request_preempted(self, request: Request, timestamp: float):
if not self.log_stats:
return
request.events.append(
EngineCoreEvent.new_event(EngineCoreEventType.PREEMPTED,
timestamp))
def make_stats(self) -> Optional[SchedulerStats]:
if not self.log_stats:
return None

View File

@ -88,21 +88,6 @@ class Request:
sampling_params=request.sampling_params),
)
def queued(self, timestamp: Optional[float] = None) -> None:
self.events.append(
EngineCoreEvent.new_event(EngineCoreEventType.QUEUED, timestamp))
def scheduled(self, timestamp: Optional[float] = None) -> None:
self.events.append(
EngineCoreEvent.new_event(EngineCoreEventType.SCHEDULED,
timestamp))
def take_events(self) -> Optional[list[EngineCoreEvent]]:
if not self.events:
return None
events, self.events = self.events, []
return events
def append_output_token_ids(
self,
token_ids: Union[int, list[int]],
@ -146,6 +131,19 @@ class Request:
def use_structured_output(self) -> bool:
return self.sampling_params.guided_decoding is not None
def record_event(
self,
event_type: EngineCoreEventType,
timestamp: Optional[float] = None,
) -> None:
self.events.append(EngineCoreEvent.new_event(event_type, timestamp))
def take_events(self) -> Optional[list[EngineCoreEvent]]:
if not self.events:
return None
events, self.events = self.events, []
return events
class RequestStatus(enum.IntEnum):
"""Status of a request."""