[Scheduer] Simplify stop checking for pooling models (#30591)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill 2025-12-13 01:45:26 -08:00 committed by GitHub
parent b09806e28f
commit 1cec5b7ea9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 7 additions and 16 deletions

View File

@ -1117,6 +1117,7 @@ class Scheduler(SchedulerInterface):
stopped = False
new_logprobs = None
new_token_ids = generated_token_ids
pooler_output = pooler_outputs[req_index] if pooler_outputs else None
kv_transfer_params = None
status_before_stop = request.status
@ -1125,12 +1126,10 @@ class Scheduler(SchedulerInterface):
new_token_ids, stopped = self._update_request_with_output(
request, new_token_ids
)
# Stop checking for pooler models.
pooler_output = None
if pooler_outputs:
pooler_output = pooler_outputs[req_index]
stopped = check_stop(request, self.max_model_len, pooler_output)
elif request.pooling_params and pooler_output is not None:
# Pooling stops as soon as there is output.
request.status = RequestStatus.FINISHED_STOPPED
stopped = True
if stopped:
kv_transfer_params = self._free_request(request)

View File

@ -2,8 +2,6 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import contextlib
import torch
from vllm.v1.request import Request, RequestStatus
@ -39,14 +37,8 @@ def remove_all(lst: list, items_to_remove: set) -> list:
return [item for item in lst if item not in items_to_remove]
def check_stop(
request: Request, max_model_len: int, pooler_output: torch.Tensor | None = None
) -> bool:
if request.pooling_params:
if pooler_output is not None:
request.status = RequestStatus.FINISHED_STOPPED
return True
return False
def check_stop(request: Request, max_model_len: int) -> bool:
assert not request.pooling_params
sampling_params = request.sampling_params
assert sampling_params is not None