From f4d97e4fc276b13e1a4ec18f35239fd48695667d Mon Sep 17 00:00:00 2001 From: bnellnm <49004751+bnellnm@users.noreply.github.com> Date: Wed, 12 Feb 2025 05:39:16 -0500 Subject: [PATCH] [Bug] [V1] Try fetching stop_reason from EngineOutput before checking the request (#13108) --- vllm/v1/engine/output_processor.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/vllm/v1/engine/output_processor.py b/vllm/v1/engine/output_processor.py index 7973c62c381ff..1438f9d5a7b42 100644 --- a/vllm/v1/engine/output_processor.py +++ b/vllm/v1/engine/output_processor.py @@ -2,7 +2,7 @@ import asyncio from dataclasses import dataclass -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union from vllm.outputs import RequestOutput from vllm.sampling_params import RequestOutputKind @@ -164,6 +164,7 @@ class OutputProcessor: new_token_ids = engine_core_output.new_token_ids finish_reason = engine_core_output.finish_reason + stop_reason = engine_core_output.stop_reason # TODO(andy): prompt logprobs + chunked prefill can # result in engine core returning an output for a @@ -181,9 +182,10 @@ class OutputProcessor: # 2) Detokenize the token ids into text and check for stop # strings. - stop_reason = req_state.detokenizer.update(new_token_ids) - if stop_reason: + stop_string = req_state.detokenizer.update(new_token_ids) + if stop_string and finish_reason != FinishReason.STOP: finish_reason = FinishReason.STOP + stop_reason = stop_string # 3) Compute sample and prompt logprobs for request, # if required. @@ -250,7 +252,7 @@ class OutputProcessor: request_state: RequestState, new_token_ids: List[int], finish_reason: Optional[FinishReason], - stop_reason: Optional[str], + stop_reason: Union[int, str, None], ) -> Optional[RequestOutput]: finished = finish_reason is not None