mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-10 04:05:53 +08:00
[Misc] Optimize the Qwen3_ReasoningParser extract_reasoning_content (#17515)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
parent
fbefc8a78d
commit
015069b017
@ -1,6 +1,5 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
import re
|
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
from typing import Optional, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
@ -31,9 +30,6 @@ class Qwen3ReasoningParser(ReasoningParser):
|
|||||||
self.think_start_token = "<think>"
|
self.think_start_token = "<think>"
|
||||||
self.think_end_token = "</think>"
|
self.think_end_token = "</think>"
|
||||||
|
|
||||||
self.reasoning_regex = re.compile(
|
|
||||||
rf"{self.think_start_token}(.*?){self.think_end_token}", re.DOTALL)
|
|
||||||
|
|
||||||
if not self.model_tokenizer:
|
if not self.model_tokenizer:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"The model tokenizer must be passed to the ReasoningParser "
|
"The model tokenizer must be passed to the ReasoningParser "
|
||||||
@ -121,29 +117,34 @@ class Qwen3ReasoningParser(ReasoningParser):
|
|||||||
def extract_reasoning_content(
|
def extract_reasoning_content(
|
||||||
self, model_output: str, request: ChatCompletionRequest
|
self, model_output: str, request: ChatCompletionRequest
|
||||||
) -> tuple[Optional[str], Optional[str]]:
|
) -> tuple[Optional[str], Optional[str]]:
|
||||||
|
"""
|
||||||
|
Extract reasoning content from the model output.
|
||||||
|
|
||||||
# Check if the model output contains the <think> tokens.
|
For text <think>abc</think>xyz:
|
||||||
|
- 'abc' goes to reasoning_content
|
||||||
|
- 'xyz' goes to content
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple[Optional[str], Optional[str]]: reasoning content and content
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Check if the model output contains the <think> and </think> tokens.
|
||||||
if (self.think_start_token not in model_output
|
if (self.think_start_token not in model_output
|
||||||
or self.think_end_token not in model_output):
|
or self.think_end_token not in model_output):
|
||||||
return None, model_output
|
return None, model_output
|
||||||
else:
|
# Check if the <think> is present in the model output, remove it
|
||||||
# Use a regex to find the reasoning content
|
# if it is present.
|
||||||
reasoning_content = self.reasoning_regex.findall(model_output)[0]
|
model_output_parts = model_output.partition(self.think_start_token)
|
||||||
|
model_output = model_output_parts[2] if model_output_parts[
|
||||||
|
1] else model_output_parts[0]
|
||||||
|
# Check if the model output contains the </think> tokens.
|
||||||
|
# If the end token is not found, return the model output as is.
|
||||||
|
if self.think_end_token not in model_output:
|
||||||
|
return None, model_output
|
||||||
|
|
||||||
# Remove the reasoning content from the model output
|
# Extract reasoning content from the model output.
|
||||||
# Although <think> token is always at the
|
reasoning_content, _, content = model_output.partition(
|
||||||
# beginning of the line, we cannot guarantee that the
|
self.think_end_token)
|
||||||
# other models will follow this convention.
|
|
||||||
# Therefore, we need to add :start_index.
|
|
||||||
start_index = model_output.find(self.think_start_token)
|
|
||||||
if start_index != -1:
|
|
||||||
end_index = start_index + len(
|
|
||||||
f"{self.think_start_token}{reasoning_content}{self.think_end_token}"
|
|
||||||
)
|
|
||||||
model_output = model_output[:start_index] + \
|
|
||||||
model_output[end_index:]
|
|
||||||
|
|
||||||
if len(model_output) == 0:
|
final_content = content or None
|
||||||
return reasoning_content, None
|
return reasoning_content, final_content
|
||||||
|
|
||||||
return reasoning_content, model_output
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user