From 015069b01741e9ecb9e604c7fe87fbdfc306ebe5 Mon Sep 17 00:00:00 2001 From: Chauncey Date: Thu, 1 May 2025 18:29:01 +0800 Subject: [PATCH] [Misc] Optimize the Qwen3_ReasoningParser extract_reasoning_content (#17515) Signed-off-by: chaunceyjiang --- vllm/reasoning/qwen3_reasoning_parser.py | 49 ++++++++++++------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/vllm/reasoning/qwen3_reasoning_parser.py b/vllm/reasoning/qwen3_reasoning_parser.py index f588f40167bcc..7095034b1ca17 100644 --- a/vllm/reasoning/qwen3_reasoning_parser.py +++ b/vllm/reasoning/qwen3_reasoning_parser.py @@ -1,6 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -import re from collections.abc import Sequence from typing import Optional, Union @@ -31,9 +30,6 @@ class Qwen3ReasoningParser(ReasoningParser): self.think_start_token = "" self.think_end_token = "" - self.reasoning_regex = re.compile( - rf"{self.think_start_token}(.*?){self.think_end_token}", re.DOTALL) - if not self.model_tokenizer: raise ValueError( "The model tokenizer must be passed to the ReasoningParser " @@ -121,29 +117,34 @@ class Qwen3ReasoningParser(ReasoningParser): def extract_reasoning_content( self, model_output: str, request: ChatCompletionRequest ) -> tuple[Optional[str], Optional[str]]: + """ + Extract reasoning content from the model output. - # Check if the model output contains the tokens. + For text abcxyz: + - 'abc' goes to reasoning_content + - 'xyz' goes to content + + Returns: + tuple[Optional[str], Optional[str]]: reasoning content and content + """ + + # Check if the model output contains the and tokens. if (self.think_start_token not in model_output or self.think_end_token not in model_output): return None, model_output - else: - # Use a regex to find the reasoning content - reasoning_content = self.reasoning_regex.findall(model_output)[0] + # Check if the is present in the model output, remove it + # if it is present. + model_output_parts = model_output.partition(self.think_start_token) + model_output = model_output_parts[2] if model_output_parts[ + 1] else model_output_parts[0] + # Check if the model output contains the tokens. + # If the end token is not found, return the model output as is. + if self.think_end_token not in model_output: + return None, model_output - # Remove the reasoning content from the model output - # Although token is always at the - # beginning of the line, we cannot guarantee that the - # other models will follow this convention. - # Therefore, we need to add :start_index. - start_index = model_output.find(self.think_start_token) - if start_index != -1: - end_index = start_index + len( - f"{self.think_start_token}{reasoning_content}{self.think_end_token}" - ) - model_output = model_output[:start_index] + \ - model_output[end_index:] + # Extract reasoning content from the model output. + reasoning_content, _, content = model_output.partition( + self.think_end_token) - if len(model_output) == 0: - return reasoning_content, None - - return reasoning_content, model_output + final_content = content or None + return reasoning_content, final_content