mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-24 00:45:02 +08:00
[Scheduer] Simplify stop checking for pooling models (#30591)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
b09806e28f
commit
1cec5b7ea9
@ -1117,6 +1117,7 @@ class Scheduler(SchedulerInterface):
|
||||
stopped = False
|
||||
new_logprobs = None
|
||||
new_token_ids = generated_token_ids
|
||||
pooler_output = pooler_outputs[req_index] if pooler_outputs else None
|
||||
kv_transfer_params = None
|
||||
status_before_stop = request.status
|
||||
|
||||
@ -1125,12 +1126,10 @@ class Scheduler(SchedulerInterface):
|
||||
new_token_ids, stopped = self._update_request_with_output(
|
||||
request, new_token_ids
|
||||
)
|
||||
|
||||
# Stop checking for pooler models.
|
||||
pooler_output = None
|
||||
if pooler_outputs:
|
||||
pooler_output = pooler_outputs[req_index]
|
||||
stopped = check_stop(request, self.max_model_len, pooler_output)
|
||||
elif request.pooling_params and pooler_output is not None:
|
||||
# Pooling stops as soon as there is output.
|
||||
request.status = RequestStatus.FINISHED_STOPPED
|
||||
stopped = True
|
||||
|
||||
if stopped:
|
||||
kv_transfer_params = self._free_request(request)
|
||||
|
||||
@ -2,8 +2,6 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import contextlib
|
||||
|
||||
import torch
|
||||
|
||||
from vllm.v1.request import Request, RequestStatus
|
||||
|
||||
|
||||
@ -39,14 +37,8 @@ def remove_all(lst: list, items_to_remove: set) -> list:
|
||||
return [item for item in lst if item not in items_to_remove]
|
||||
|
||||
|
||||
def check_stop(
|
||||
request: Request, max_model_len: int, pooler_output: torch.Tensor | None = None
|
||||
) -> bool:
|
||||
if request.pooling_params:
|
||||
if pooler_output is not None:
|
||||
request.status = RequestStatus.FINISHED_STOPPED
|
||||
return True
|
||||
return False
|
||||
def check_stop(request: Request, max_model_len: int) -> bool:
|
||||
assert not request.pooling_params
|
||||
|
||||
sampling_params = request.sampling_params
|
||||
assert sampling_params is not None
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user