diff --git a/vllm/v1/core/sched/utils.py b/vllm/v1/core/sched/utils.py index 8af8a7d27806..82166dc97839 100644 --- a/vllm/v1/core/sched/utils.py +++ b/vllm/v1/core/sched/utils.py @@ -42,13 +42,6 @@ def remove_all(lst: list, items_to_remove: set) -> list: def check_stop( request: Request, max_model_len: int, pooler_output: torch.Tensor | None = None ) -> bool: - if ( - request.num_tokens >= max_model_len - or request.num_output_tokens >= request.max_tokens - ): - request.status = RequestStatus.FINISHED_LENGTH_CAPPED - return True - if request.pooling_params: if pooler_output is not None: request.status = RequestStatus.FINISHED_STOPPED @@ -70,4 +63,10 @@ def check_stop( request.status = RequestStatus.FINISHED_STOPPED request.stop_reason = last_token_id return True + if ( + request.num_tokens >= max_model_len + or request.num_output_tokens >= request.max_tokens + ): + request.status = RequestStatus.FINISHED_LENGTH_CAPPED + return True return False