diff --git a/docs/features/reasoning_outputs.md b/docs/features/reasoning_outputs.md
index 28520a80ed36..5f26c7cf182b 100644
--- a/docs/features/reasoning_outputs.md
+++ b/docs/features/reasoning_outputs.md
@@ -2,7 +2,10 @@
vLLM offers support for reasoning models like [DeepSeek R1](https://huggingface.co/deepseek-ai/DeepSeek-R1), which are designed to generate outputs containing both reasoning steps and final conclusions.
-Reasoning models return an additional `reasoning_content` field in their outputs, which contains the reasoning steps that led to the final conclusion. This field is not present in the outputs of other models.
+Reasoning models return an additional `reasoning` field in their outputs, which contains the reasoning steps that led to the final conclusion. This field is not present in the outputs of other models.
+
+!!! warning
+ `reasoning` used to be called `reasoning_content`. For now, `reasoning_content` will continue to work. However, we encourage you to migrate to `reasoning` in case `reasoning_content` is removed in future.
## Supported Models
@@ -61,18 +64,18 @@ Next, make a request to the model that should return the reasoning content in th
# extra_body={"chat_template_kwargs": {"enable_thinking": False}}
response = client.chat.completions.create(model=model, messages=messages)
- reasoning_content = response.choices[0].message.reasoning_content
+ reasoning = response.choices[0].message.reasoning
content = response.choices[0].message.content
- print("reasoning_content:", reasoning_content)
+ print("reasoning:", reasoning)
print("content:", content)
```
-The `reasoning_content` field contains the reasoning steps that led to the final conclusion, while the `content` field contains the final conclusion.
+The `reasoning` field contains the reasoning steps that led to the final conclusion, while the `content` field contains the final conclusion.
## Streaming chat completions
-Streaming chat completions are also supported for reasoning models. The `reasoning_content` field is available in the `delta` field in [chat completion response chunks](https://platform.openai.com/docs/api-reference/chat/streaming).
+Streaming chat completions are also supported for reasoning models. The `reasoning` field is available in the `delta` field in [chat completion response chunks](https://platform.openai.com/docs/api-reference/chat/streaming).
??? console "Json"
@@ -88,7 +91,7 @@ Streaming chat completions are also supported for reasoning models. The `reasoni
"index": 0,
"delta": {
"role": "assistant",
- "reasoning_content": "is",
+ "reasoning": "is",
},
"logprobs": null,
"finish_reason": null
@@ -97,7 +100,7 @@ Streaming chat completions are also supported for reasoning models. The `reasoni
}
```
-OpenAI Python client library does not officially support `reasoning_content` attribute for streaming output. But the client supports extra attributes in the response. You can use `hasattr` to check if the `reasoning_content` attribute is present in the response. For example:
+OpenAI Python client library does not officially support `reasoning` attribute for streaming output. But the client supports extra attributes in the response. You can use `hasattr` to check if the `reasoning` attribute is present in the response. For example:
??? code
@@ -127,22 +130,22 @@ OpenAI Python client library does not officially support `reasoning_content` att
)
print("client: Start streaming chat completions...")
- printed_reasoning_content = False
+ printed_reasoning = False
printed_content = False
for chunk in stream:
- # Safely extract reasoning_content and content from delta,
+ # Safely extract reasoning and content from delta,
# defaulting to None if attributes don't exist or are empty strings
- reasoning_content = (
- getattr(chunk.choices[0].delta, "reasoning_content", None) or None
+ reasoning = (
+ getattr(chunk.choices[0].delta, "reasoning", None) or None
)
content = getattr(chunk.choices[0].delta, "content", None) or None
- if reasoning_content is not None:
- if not printed_reasoning_content:
- printed_reasoning_content = True
- print("reasoning_content:", end="", flush=True)
- print(reasoning_content, end="", flush=True)
+ if reasoning is not None:
+ if not printed_reasoning:
+ printed_reasoning = True
+ print("reasoning:", end="", flush=True)
+ print(reasoning, end="", flush=True)
elif content is not None:
if not printed_content:
printed_content = True
@@ -151,11 +154,11 @@ OpenAI Python client library does not officially support `reasoning_content` att
print(content, end="", flush=True)
```
-Remember to check whether the `reasoning_content` exists in the response before accessing it. You could check out the [example](https://github.com/vllm-project/vllm/blob/main/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py).
+Remember to check whether the `reasoning` exists in the response before accessing it. You could check out the [example](https://github.com/vllm-project/vllm/blob/main/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py).
## Tool Calling
-The reasoning content is also available when both tool calling and the reasoning parser are enabled. Additionally, tool calling only parses functions from the `content` field, not from the `reasoning_content`.
+The reasoning content is also available when both tool calling and the reasoning parser are enabled. Additionally, tool calling only parses functions from the `content` field, not from the `reasoning`.
??? code
@@ -192,7 +195,7 @@ The reasoning content is also available when both tool calling and the reasoning
print(response)
tool_call = response.choices[0].message.tool_calls[0].function
- print(f"reasoning_content: {response.choices[0].message.reasoning_content}")
+ print(f"reasoning: {response.choices[0].message.reasoning}")
print(f"Function called: {tool_call.name}")
print(f"Arguments: {tool_call.arguments}")
```
@@ -223,7 +226,7 @@ You can add a new `ReasoningParser` similar to [vllm/reasoning/deepseek_r1_reaso
def __init__(self, tokenizer: AnyTokenizer):
super().__init__(tokenizer)
- def extract_reasoning_content_streaming(
+ def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
@@ -240,7 +243,7 @@ You can add a new `ReasoningParser` similar to [vllm/reasoning/deepseek_r1_reaso
previously been parsed and extracted (see constructor)
"""
- def extract_reasoning_content(
+ def extract_reasoning(
self,
model_output: str,
request: ChatCompletionRequest | ResponsesRequest,
diff --git a/docs/features/structured_outputs.md b/docs/features/structured_outputs.md
index 9e1da37ca962..e38627c70788 100644
--- a/docs/features/structured_outputs.md
+++ b/docs/features/structured_outputs.md
@@ -204,7 +204,7 @@ Note that you can use reasoning with any provided structured outputs feature. Th
}
},
)
- print("reasoning_content: ", completion.choices[0].message.reasoning_content)
+ print("reasoning: ", completion.choices[0].message.reasoning)
print("content: ", completion.choices[0].message.content)
```
diff --git a/examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py b/examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py
index 4006d07f73b0..1dfc3084646d 100644
--- a/examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py
+++ b/examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py
@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
An example demonstrates how to use tool calling with reasoning models
-like QwQ-32B. The reasoning_content will not be parsed by the tool
+like QwQ-32B. The reasoning will not be parsed by the tool
calling process; only the final output will be parsed.
To run this example, you need to start the vLLM server with both
@@ -78,7 +78,7 @@ messages = [
def extract_reasoning_and_calls(chunks: list):
- reasoning_content = ""
+ reasoning = ""
tool_call_idx = -1
arguments = []
function_names = []
@@ -97,9 +97,9 @@ def extract_reasoning_and_calls(chunks: list):
if tool_call.function.arguments:
arguments[tool_call_idx] += tool_call.function.arguments
else:
- if hasattr(chunk.choices[0].delta, "reasoning_content"):
- reasoning_content += chunk.choices[0].delta.reasoning_content
- return reasoning_content, arguments, function_names
+ if hasattr(chunk.choices[0].delta, "reasoning"):
+ reasoning += chunk.choices[0].delta.reasoning
+ return reasoning, arguments, function_names
def main():
@@ -115,7 +115,7 @@ def main():
tool_calls = client.chat.completions.create(
messages=messages, model=model, tools=tools
)
- print(f"reasoning_content: {tool_calls.choices[0].message.reasoning_content}")
+ print(f"reasoning: {tool_calls.choices[0].message.reasoning}")
print(f"function name: {tool_calls.choices[0].message.tool_calls[0].function.name}")
print(
f"function arguments: "
@@ -129,9 +129,9 @@ def main():
chunks = list(tool_calls_stream)
- reasoning_content, arguments, function_names = extract_reasoning_and_calls(chunks)
+ reasoning, arguments, function_names = extract_reasoning_and_calls(chunks)
- print(f"reasoning_content: {reasoning_content}")
+ print(f"reasoning: {reasoning}")
print(f"function name: {function_names[0]}")
print(f"function arguments: {arguments[0]}")
@@ -144,7 +144,7 @@ def main():
)
tool_call = tool_calls.choices[0].message.tool_calls[0].function
- print(f"reasoning_content: {tool_calls.choices[0].message.reasoning_content}")
+ print(f"reasoning: {tool_calls.choices[0].message.reasoning}")
print(f"function name: {tool_call.name}")
print(f"function arguments: {tool_call.arguments}")
print("----------Stream Generate With Named Function Calling--------------")
@@ -159,8 +159,8 @@ def main():
chunks = list(tool_calls_stream)
- reasoning_content, arguments, function_names = extract_reasoning_and_calls(chunks)
- print(f"reasoning_content: {reasoning_content}")
+ reasoning, arguments, function_names = extract_reasoning_and_calls(chunks)
+ print(f"reasoning: {reasoning}")
print(f"function name: {function_names[0]}")
print(f"function arguments: {arguments[0]}")
print("\n\n")
diff --git a/examples/online_serving/openai_chat_completion_with_reasoning.py b/examples/online_serving/openai_chat_completion_with_reasoning.py
index 932dbeb2e7a2..87043897b058 100644
--- a/examples/online_serving/openai_chat_completion_with_reasoning.py
+++ b/examples/online_serving/openai_chat_completion_with_reasoning.py
@@ -38,10 +38,10 @@ def main():
# For granite, add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
response = client.chat.completions.create(model=model, messages=messages)
- reasoning_content = response.choices[0].message.reasoning_content
+ reasoning = response.choices[0].message.reasoning
content = response.choices[0].message.content
- print("reasoning_content for Round 1:", reasoning_content)
+ print("reasoning for Round 1:", reasoning)
print("content for Round 1:", content)
# Round 2
@@ -54,10 +54,10 @@ def main():
)
response = client.chat.completions.create(model=model, messages=messages)
- reasoning_content = response.choices[0].message.reasoning_content
+ reasoning = response.choices[0].message.reasoning
content = response.choices[0].message.content
- print("reasoning_content for Round 2:", reasoning_content)
+ print("reasoning for Round 2:", reasoning)
print("content for Round 2:", content)
diff --git a/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py b/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py
index 7d1ea3771459..8e262701b720 100644
--- a/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py
+++ b/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py
@@ -20,7 +20,7 @@ in real-time as they are generated by the model. This is useful for scenarios
where you want to display chat completions to the user as they are generated
by the model.
-Remember to check content and reasoning_content exist in `ChatCompletionChunk`,
+Remember to check content and reasoning exist in `ChatCompletionChunk`,
content may not exist leading to errors if you try to access it.
"""
@@ -47,22 +47,20 @@ def main():
stream = client.chat.completions.create(model=model, messages=messages, stream=True)
print("client: Start streaming chat completions...")
- printed_reasoning_content = False
+ printed_reasoning = False
printed_content = False
for chunk in stream:
- # Safely extract reasoning_content and content from delta,
+ # Safely extract reasoning and content from delta,
# defaulting to None if attributes don't exist or are empty strings
- reasoning_content = (
- getattr(chunk.choices[0].delta, "reasoning_content", None) or None
- )
+ reasoning = getattr(chunk.choices[0].delta, "reasoning", None) or None
content = getattr(chunk.choices[0].delta, "content", None) or None
- if reasoning_content is not None:
- if not printed_reasoning_content:
- printed_reasoning_content = True
- print("reasoning_content:", end="", flush=True)
- print(reasoning_content, end="", flush=True)
+ if reasoning is not None:
+ if not printed_reasoning:
+ printed_reasoning = True
+ print("reasoning:", end="", flush=True)
+ print(reasoning, end="", flush=True)
elif content is not None:
if not printed_content:
printed_content = True
diff --git a/examples/online_serving/streamlit_openai_chatbot_webserver.py b/examples/online_serving/streamlit_openai_chatbot_webserver.py
index 64c8a9178280..d60dbf4d753a 100644
--- a/examples/online_serving/streamlit_openai_chatbot_webserver.py
+++ b/examples/online_serving/streamlit_openai_chatbot_webserver.py
@@ -159,8 +159,8 @@ def get_llm_response(messages, model, reason, content_ph=None, reasoning_ph=None
for chunk in response:
delta = chunk.choices[0].delta
# Stream reasoning first
- if reason and hasattr(delta, "reasoning_content") and live_think:
- rc = delta.reasoning_content
+ if reason and hasattr(delta, "reasoning") and live_think:
+ rc = delta.reasoning
if rc:
think_text += rc
live_think.markdown(think_text + "▌")
@@ -262,8 +262,8 @@ def server_supports_reasoning():
messages=[{"role": "user", "content": "Hi"}],
stream=False,
)
- return hasattr(resp.choices[0].message, "reasoning_content") and bool(
- resp.choices[0].message.reasoning_content
+ return hasattr(resp.choices[0].message, "reasoning") and bool(
+ resp.choices[0].message.reasoning
)
diff --git a/examples/online_serving/structured_outputs/structured_outputs.py b/examples/online_serving/structured_outputs/structured_outputs.py
index 02853a95469a..ff473d044e32 100644
--- a/examples/online_serving/structured_outputs/structured_outputs.py
+++ b/examples/online_serving/structured_outputs/structured_outputs.py
@@ -33,7 +33,7 @@ async def print_stream_response(
async for chunk in stream_response:
delta = chunk.choices[0].delta
- reasoning_chunk_text: str | None = getattr(delta, "reasoning_content", None)
+ reasoning_chunk_text: str | None = getattr(delta, "reasoning", None)
content_chunk_text = delta.content
if args.reasoning:
@@ -255,8 +255,8 @@ async def cli():
for constraint, response in zip(constraints, results):
print(f"\n\n{constraint}:")
message = response.choices[0].message
- if args.reasoning and hasattr(message, "reasoning_content"):
- print(f" Reasoning: {message.reasoning_content or ''}")
+ if args.reasoning and hasattr(message, "reasoning"):
+ print(f" Reasoning: {message.reasoning or ''}")
print(f" Content: {message.content!r}")
diff --git a/tests/entrypoints/openai/test_chat_with_tool_reasoning.py b/tests/entrypoints/openai/test_chat_with_tool_reasoning.py
index e452b578ba22..7b3092b56303 100644
--- a/tests/entrypoints/openai/test_chat_with_tool_reasoning.py
+++ b/tests/entrypoints/openai/test_chat_with_tool_reasoning.py
@@ -80,7 +80,7 @@ FUNC_ARGS = """{"city": "Dallas", "state": "TX", "unit": "fahrenheit"}"""
def extract_reasoning_and_calls(chunks: list):
- reasoning_content = ""
+ reasoning = ""
tool_call_idx = -1
arguments = []
function_names = []
@@ -99,9 +99,9 @@ def extract_reasoning_and_calls(chunks: list):
if tool_call.function.arguments:
arguments[tool_call_idx] += tool_call.function.arguments
else:
- if hasattr(chunk.choices[0].delta, "reasoning_content"):
- reasoning_content += chunk.choices[0].delta.reasoning_content
- return reasoning_content, arguments, function_names
+ if hasattr(chunk.choices[0].delta, "reasoning"):
+ reasoning += chunk.choices[0].delta.reasoning
+ return reasoning, arguments, function_names
# test streaming
@@ -119,8 +119,8 @@ async def test_chat_streaming_of_tool_and_reasoning(client: openai.AsyncOpenAI):
async for chunk in stream:
chunks.append(chunk)
- reasoning_content, arguments, function_names = extract_reasoning_and_calls(chunks)
- assert len(reasoning_content) > 0
+ reasoning, arguments, function_names = extract_reasoning_and_calls(chunks)
+ assert len(reasoning) > 0
assert len(function_names) > 0 and function_names[0] == FUNC_NAME
assert len(arguments) > 0 and arguments[0] == FUNC_ARGS
@@ -136,6 +136,6 @@ async def test_chat_full_of_tool_and_reasoning(client: openai.AsyncOpenAI):
stream=False,
)
- assert len(tool_calls.choices[0].message.reasoning_content) > 0
+ assert len(tool_calls.choices[0].message.reasoning) > 0
assert tool_calls.choices[0].message.tool_calls[0].function.name == FUNC_NAME
assert tool_calls.choices[0].message.tool_calls[0].function.arguments == FUNC_ARGS
diff --git a/tests/entrypoints/openai/test_completion_with_function_calling.py b/tests/entrypoints/openai/test_completion_with_function_calling.py
index 6d8db361a57d..53369f074eca 100644
--- a/tests/entrypoints/openai/test_completion_with_function_calling.py
+++ b/tests/entrypoints/openai/test_completion_with_function_calling.py
@@ -180,8 +180,8 @@ async def test_function_tool_use(
extra_body={"chat_template_kwargs": {"enable_thinking": enable_thinking}},
)
if enable_thinking:
- assert chat_completion.choices[0].message.reasoning_content is not None
- assert chat_completion.choices[0].message.reasoning_content != ""
+ assert chat_completion.choices[0].message.reasoning is not None
+ assert chat_completion.choices[0].message.reasoning != ""
assert chat_completion.choices[0].message.tool_calls is not None
assert len(chat_completion.choices[0].message.tool_calls) > 0
else:
@@ -200,9 +200,9 @@ async def test_function_tool_use(
async for chunk in output_stream:
if chunk.choices:
if enable_thinking and getattr(
- chunk.choices[0].delta, "reasoning_content", None
+ chunk.choices[0].delta, "reasoning", None
):
- reasoning.append(chunk.choices[0].delta.reasoning_content)
+ reasoning.append(chunk.choices[0].delta.reasoning)
if chunk.choices[0].delta.tool_calls:
output.extend(chunk.choices[0].delta.tool_calls)
diff --git a/tests/entrypoints/openai/test_run_batch.py b/tests/entrypoints/openai/test_run_batch.py
index 2f678a0535cc..f951b57fe726 100644
--- a/tests/entrypoints/openai/test_run_batch.py
+++ b/tests/entrypoints/openai/test_run_batch.py
@@ -232,9 +232,9 @@ def test_reasoning_parser():
assert isinstance(line_dict, dict)
assert line_dict["error"] is None
- # Check that reasoning_content is present and not empty
- reasoning_content = line_dict["response"]["body"]["choices"][0]["message"][
- "reasoning_content"
+ # Check that reasoning is present and not empty
+ reasoning = line_dict["response"]["body"]["choices"][0]["message"][
+ "reasoning"
]
- assert reasoning_content is not None
- assert len(reasoning_content) > 0
+ assert reasoning is not None
+ assert len(reasoning) > 0
diff --git a/tests/reasoning/test_base_thinking_reasoning_parser.py b/tests/reasoning/test_base_thinking_reasoning_parser.py
index ddda50fe770a..d31b1c7d169b 100644
--- a/tests/reasoning/test_base_thinking_reasoning_parser.py
+++ b/tests/reasoning/test_base_thinking_reasoning_parser.py
@@ -151,57 +151,57 @@ class TestBaseThinkingReasoningParserMethods:
class TestBaseThinkingReasoningParserExtraction:
"""Test reasoning content extraction methods."""
- def test_extract_reasoning_content_with_both_tokens(self, test_tokenizer):
+ def test_extract_reasoning_with_both_tokens(self, test_tokenizer):
"""Test extraction when both start and end tokens are present."""
parser = TestThinkingReasoningParser(test_tokenizer)
request = ChatCompletionRequest(messages=[], model="test-model")
model_output = "This is reasoningThis is content"
- reasoning, content = parser.extract_reasoning_content(model_output, request)
+ reasoning, content = parser.extract_reasoning(model_output, request)
assert reasoning == "This is reasoning"
assert content == "This is content"
- def test_extract_reasoning_content_only_end_token(self, test_tokenizer):
+ def test_extract_reasoning_only_end_token(self, test_tokenizer):
"""Test extraction when only end token is present."""
parser = TestThinkingReasoningParser(test_tokenizer)
request = ChatCompletionRequest(messages=[], model="test-model")
model_output = "This is reasoningThis is content"
- reasoning, content = parser.extract_reasoning_content(model_output, request)
+ reasoning, content = parser.extract_reasoning(model_output, request)
assert reasoning == "This is reasoning"
assert content == "This is content"
- def test_extract_reasoning_content_no_end_token(self, test_tokenizer):
+ def test_extract_reasoning_no_end_token(self, test_tokenizer):
"""Test extraction when no end token is present."""
parser = TestThinkingReasoningParser(test_tokenizer)
request = ChatCompletionRequest(messages=[], model="test-model")
model_output = "This is just content"
- reasoning, content = parser.extract_reasoning_content(model_output, request)
+ reasoning, content = parser.extract_reasoning(model_output, request)
assert reasoning == "This is just content"
assert content is None
- def test_extract_reasoning_content_empty_output(self, test_tokenizer):
+ def test_extract_reasoning_empty_output(self, test_tokenizer):
"""Test extraction with empty output."""
parser = TestThinkingReasoningParser(test_tokenizer)
request = ChatCompletionRequest(messages=[], model="test-model")
model_output = ""
- reasoning, content = parser.extract_reasoning_content(model_output, request)
+ reasoning, content = parser.extract_reasoning(model_output, request)
assert reasoning == ""
assert content is None
- def test_extract_reasoning_content_only_tokens(self, test_tokenizer):
+ def test_extract_reasoning_only_tokens(self, test_tokenizer):
"""Test extraction with only tokens and no content."""
parser = TestThinkingReasoningParser(test_tokenizer)
request = ChatCompletionRequest(messages=[], model="test-model")
model_output = ""
- reasoning, content = parser.extract_reasoning_content(model_output, request)
+ reasoning, content = parser.extract_reasoning(model_output, request)
assert reasoning == ""
assert content is None
diff --git a/tests/reasoning/test_deepseekr1_reasoning_parser.py b/tests/reasoning/test_deepseekr1_reasoning_parser.py
index 946d01c123c5..91f0c93653d3 100644
--- a/tests/reasoning/test_deepseekr1_reasoning_parser.py
+++ b/tests/reasoning/test_deepseekr1_reasoning_parser.py
@@ -21,97 +21,97 @@ def deepseek_r1_qwen_tokenizer():
SIMPLE_REASONING = {
"output": "This is a reasoning sectionThis is the rest",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "This is the rest",
"is_reasoning_end": True,
}
COMPLETE_REASONING = {
"output": "This is a reasoning section",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
"is_reasoning_end": True,
}
NO_CONTENT = {
"output": "This is content",
- "reasoning_content": "This is content",
+ "reasoning": "This is content",
"content": None,
"is_reasoning_end": False,
}
NO_REASONING_STREAMING = {
"output": "This is a reasoning section",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
"is_reasoning_end": False,
}
MULTIPLE_LINES = {
"output": "This\nThatThis is the rest\nThat",
- "reasoning_content": "This\nThat",
+ "reasoning": "This\nThat",
"content": "This is the rest\nThat",
"is_reasoning_end": True,
}
SHORTEST_REASONING_NO_STREAMING = {
"output": "This is the rest",
- "reasoning_content": "",
+ "reasoning": "",
"content": "This is the rest",
"is_reasoning_end": True,
}
SHORTEST_REASONING = {
"output": "This is the rest",
- "reasoning_content": None,
+ "reasoning": None,
"content": "This is the rest",
"is_reasoning_end": True,
}
REASONING_WITH_THINK = {
"output": "This is a reasoning sectionThis is the rest",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "This is the rest",
"is_reasoning_end": True,
}
COMPLETE_REASONING_WITH_THINK = {
"output": "This is a reasoning section",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
"is_reasoning_end": True,
}
MULTIPLE_LINES_WITH_THINK = {
"output": "This\nThatThis is the rest\nThat",
- "reasoning_content": "This\nThat",
+ "reasoning": "This\nThat",
"content": "This is the rest\nThat",
"is_reasoning_end": True,
}
SHORTEST_REASONING_NO_STREAMING_WITH_THINK = {
"output": "This is the rest",
- "reasoning_content": "",
+ "reasoning": "",
"content": "This is the rest",
"is_reasoning_end": True,
}
SHORTEST_REASONING_WITH_THINK = {
"output": "This is the rest",
- "reasoning_content": None,
+ "reasoning": None,
"content": "This is the rest",
"is_reasoning_end": True,
}
THINK_NO_END = {
"output": "This is a reasoning section",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
"is_reasoning_end": False,
}
EMPTY = {
"output": "",
- "reasoning_content": "",
+ "reasoning": "",
"content": None,
"is_reasoning_end": False,
}
EMPTY_STREAMING = {
"output": "",
- "reasoning_content": None,
+ "reasoning": None,
"content": None,
"is_reasoning_end": False,
}
NEW_LINE = {
"output": "\nThis is a reasoning section\nThis is the rest",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "\nThis is the rest",
"is_reasoning_end": True,
}
@@ -121,7 +121,7 @@ NEW_LINE = {
# or not.
NEW_LINE_STREAMING = {
"output": "\nThis is a reasoning section\nThis is the rest",
- "reasoning_content": "\nThis is a reasoning section",
+ "reasoning": "\nThis is a reasoning section",
"content": "\nThis is the rest",
"is_reasoning_end": True,
}
@@ -269,7 +269,7 @@ def test_reasoning(
parser, output_tokens, streaming=streaming
)
- assert reasoning == param_dict["reasoning_content"]
+ assert reasoning == param_dict["reasoning"]
assert content == param_dict["content"]
# Test is_reasoning_end
diff --git a/tests/reasoning/test_deepseekv3_reasoning_parser.py b/tests/reasoning/test_deepseekv3_reasoning_parser.py
index e1ff7462b1fa..6e8f0e8dcc9b 100644
--- a/tests/reasoning/test_deepseekv3_reasoning_parser.py
+++ b/tests/reasoning/test_deepseekv3_reasoning_parser.py
@@ -44,14 +44,14 @@ def test_identity_reasoning_parser_basic(tokenizer):
# Test extract_content_ids returns all input_ids
assert parser.extract_content_ids(input_ids) == input_ids
- # Test extract_reasoning_content returns (None, model_output)
+ # Test extract_reasoning returns (None, model_output)
request = ChatCompletionRequest(model="test-model", messages=[], temperature=1.0)
- reasoning, content = parser.extract_reasoning_content(input_text, request)
+ reasoning, content = parser.extract_reasoning(input_text, request)
assert reasoning is None
assert content == input_text
- # Test extract_reasoning_content_streaming returns DeltaMessage or None
- result = parser.extract_reasoning_content_streaming(
+ # Test extract_reasoning_streaming returns DeltaMessage or None
+ result = parser.extract_reasoning_streaming(
previous_text="",
current_text="Hello world",
delta_text="Hello world",
@@ -63,7 +63,7 @@ def test_identity_reasoning_parser_basic(tokenizer):
assert result.content == "Hello world"
# If delta_text is empty, should return None
- result_none = parser.extract_reasoning_content_streaming(
+ result_none = parser.extract_reasoning_streaming(
previous_text="Hello world",
current_text="Hello world",
delta_text="",
diff --git a/tests/reasoning/test_ernie45_reasoning_parser.py b/tests/reasoning/test_ernie45_reasoning_parser.py
index 344478013e6b..dbf5507ae68b 100644
--- a/tests/reasoning/test_ernie45_reasoning_parser.py
+++ b/tests/reasoning/test_ernie45_reasoning_parser.py
@@ -20,36 +20,36 @@ def ernie45_tokenizer():
# 带 ,非stream
WITH_THINK = {
"output": "abcdef",
- "reasoning_content": "abc",
+ "reasoning": "abc",
"content": "def",
}
# 带 ,stream
WITH_THINK_STREAM = {
"output": "abcdef",
- "reasoning_content": "abc",
+ "reasoning": "abc",
"content": "def",
}
-# without , all is reasoning_content
+# without , all is reasoning
WITHOUT_THINK = {
"output": "abc",
- "reasoning_content": "abc",
+ "reasoning": "abc",
"content": None,
}
-# without , all is reasoning_content
+# without , all is reasoning
WITHOUT_THINK_STREAM = {
"output": "abc",
- "reasoning_content": "abc",
+ "reasoning": "abc",
"content": None,
}
COMPLETE_REASONING = {
"output": "abc",
- "reasoning_content": "abc",
+ "reasoning": "abc",
"content": None,
}
MULTILINE_REASONING = {
"output": "abc\nABCdef\nDEF",
- "reasoning_content": "abc\nABC",
+ "reasoning": "abc\nABC",
"content": "def\nDEF",
}
@@ -120,5 +120,5 @@ def test_reasoning(
print()
- assert reasoning == param_dict["reasoning_content"]
+ assert reasoning == param_dict["reasoning"]
assert content == param_dict["content"]
diff --git a/tests/reasoning/test_glm4_moe_reasoning_parser.py b/tests/reasoning/test_glm4_moe_reasoning_parser.py
index 0a8595a00fcb..6f7827e5b827 100644
--- a/tests/reasoning/test_glm4_moe_reasoning_parser.py
+++ b/tests/reasoning/test_glm4_moe_reasoning_parser.py
@@ -21,54 +21,54 @@ def glm45_tokenizer():
WITH_THINK = {
"output": "This is a reasoning sectionThis is the rest",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "This is the rest",
"is_reasoning_end": True,
}
WITH_THINK_STREAM = {
"output": "This is a reasoning sectionThis is the rest",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "This is the rest",
"is_reasoning_end": True,
}
WITHOUT_THINK = {
"output": "This is the rest",
- "reasoning_content": None,
+ "reasoning": None,
"content": "This is the rest",
"is_reasoning_end": False,
}
WITHOUT_THINK_STREAM = {
"output": "This is the rest",
- "reasoning_content": None,
+ "reasoning": None,
"content": "This is the rest",
"is_reasoning_end": False,
}
COMPLETE_REASONING = {
"output": "This is a reasoning section",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
"is_reasoning_end": True,
}
MULTILINE_REASONING = {
"output": "This is a reasoning\nsectionThis is the rest\nThat",
- "reasoning_content": "This is a reasoning\nsection",
+ "reasoning": "This is a reasoning\nsection",
"content": "This is the rest\nThat",
"is_reasoning_end": True,
}
ONLY_OPEN_TAG = {
"output": "This is a reasoning section",
- "reasoning_content": None,
+ "reasoning": None,
"content": "This is a reasoning section",
"is_reasoning_end": False,
}
ONLY_OPEN_TAG_STREAM = {
"output": "This is a reasoning section",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
"is_reasoning_end": False,
}
@@ -184,7 +184,7 @@ def test_reasoning(
parser, output_tokens, streaming=streaming
)
- assert reasoning == param_dict["reasoning_content"]
+ assert reasoning == param_dict["reasoning"]
assert content == param_dict["content"]
output_ids = glm45_tokenizer.convert_tokens_to_ids(output)
diff --git a/tests/reasoning/test_granite_reasoning_parser.py b/tests/reasoning/test_granite_reasoning_parser.py
index de1663408d72..14aad3ad0818 100644
--- a/tests/reasoning/test_granite_reasoning_parser.py
+++ b/tests/reasoning/test_granite_reasoning_parser.py
@@ -12,37 +12,37 @@ START_RESPONSE = "Here is my response:"
SIMPLE_REASONING = {
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest", # noqa: E501
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "This is the rest",
}
COMPLETE_REASONING = {
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
}
NO_REASONING = {
"output": "This is content",
- "reasoning_content": None,
+ "reasoning": None,
"content": "This is content",
}
MULTIPLE_LINES = {
"output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
- "reasoning_content": "This\nThat",
+ "reasoning": "This\nThat",
"content": "This is the rest\nThat",
}
REASONING_WITH_THINK = {
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest", # noqa: E501
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "This is the rest",
}
COMPLETE_REASONING_WITH_THINK = {
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
}
MULTIPLE_LINES_WITH_THINK = {
"output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
- "reasoning_content": "This\nThat",
+ "reasoning": "This\nThat",
"content": "This is the rest\nThat",
}
@@ -141,7 +141,7 @@ def test_reasoning(
parser, output_tokens, streaming=streaming
)
- assert reasoning == param_dict["reasoning_content"]
+ assert reasoning == param_dict["reasoning"]
assert content == param_dict["content"]
@@ -155,7 +155,7 @@ STREAMING_1 = {
"previous_text": None,
"current_text": "Here",
"delta_text": "Here",
- "reasoning_content": None,
+ "reasoning": None,
"content": None,
}
# When we fail, we should give what was previously being silenced first
@@ -163,7 +163,7 @@ STREAMING_2 = {
"previous_text": "Here is my thought",
"current_text": "Here is my thought failure",
"delta_text": " failure",
- "reasoning_content": None,
+ "reasoning": None,
"content": "Here is my thought failure",
}
# But then after the first one, we should only add the delta text to content
@@ -171,7 +171,7 @@ STREAMING_3 = {
"previous_text": "Here wrong",
"current_text": " words",
"delta_text": " Here wrong words",
- "reasoning_content": None,
+ "reasoning": None,
"content": " words",
}
# But then after the first one, we should only add the delta text to content
@@ -179,7 +179,7 @@ STREAMING_4 = {
"previous_text": "Here is my thought",
"current_text": "Here is my thought process:",
"delta_text": " process:",
- "reasoning_content": None,
+ "reasoning": None,
"content": None,
}
# Reasoning started successfully; parse reasoning content
@@ -187,7 +187,7 @@ STREAMING_5 = {
"previous_text": "Here is my thought process:",
"current_text": "Here is my thought process: foo",
"delta_text": " foo",
- "reasoning_content": " foo",
+ "reasoning": " foo",
"content": None,
}
# Response special sequence has started, but not finished.
@@ -195,7 +195,7 @@ STREAMING_6 = {
"previous_text": "Here is my thought process: foo",
"current_text": "Here is my thought process: foo Here is",
"delta_text": " Here is",
- "reasoning_content": " ",
+ "reasoning": " ",
"content": None,
}
# Response special sequence started, but was broken; the reasoning
@@ -204,7 +204,7 @@ STREAMING_7 = {
"previous_text": "Here is my thought process: foo Here is",
"current_text": "Here is my thought process: foo Here is Here",
"delta_text": " Here",
- "reasoning_content": "Here is ",
+ "reasoning": "Here is ",
"content": None,
}
# Response special sequence is ongoing
@@ -212,7 +212,7 @@ STREAMING_8 = {
"previous_text": "Here is my thought process: foo Here is my response:",
"current_text": "Here is my thought process: foo Here is my response: bar",
"delta_text": " bar",
- "reasoning_content": None,
+ "reasoning": None,
"content": " bar",
}
# The delta text has everything; we should be able to correctly parse both
@@ -220,7 +220,7 @@ STREAMING_9 = {
"previous_text": None,
"current_text": "Here is my thought process: foo Here is my response: bar",
"delta_text": "Here is my thought process: foo Here is my response: bar",
- "reasoning_content": " foo ",
+ "reasoning": " foo ",
"content": " bar",
}
## The Response is ongoing, and the delta mixes reasoning content / content
@@ -228,7 +228,7 @@ STREAMING_10 = {
"previous_text": "Here is my thought process: foo",
"current_text": "Here is my thought process: foo bar Here is my response: baz",
"delta_text": " bar Here is my response: baz",
- "reasoning_content": " bar ",
+ "reasoning": " bar ",
"content": " baz",
}
# The delta text starts a new substring that might be a response special seq
@@ -236,7 +236,7 @@ STREAMING_11 = {
"previous_text": "Here is my thought process: This is a reasoning section ",
"current_text": "Here is my thought process: This is a reasoning section Here",
"delta_text": "Here",
- "reasoning_content": None,
+ "reasoning": None,
"content": None,
}
# The delta text is finishing the response special seq
@@ -244,14 +244,14 @@ STREAMING_12 = {
"previous_text": "Here is my thought process: foo Here is my response",
"current_text": "Here is my thought process: foo Here is my response:",
"delta_text": ":",
- "reasoning_content": None,
+ "reasoning": None,
"content": None,
}
STREAMING_13 = {
"previous_text": "Here is my thought process: foo Here",
"current_text": "Here is my thought process: foo Here was",
"delta_text": " was",
- "reasoning_content": "Here was",
+ "reasoning": "Here was",
"content": None,
}
@@ -326,7 +326,7 @@ def test_streaming_subcases(param_dict):
tokenizer
)
- response = parser.extract_reasoning_content_streaming(
+ response = parser.extract_reasoning_streaming(
previous_text=param_dict["previous_text"],
current_text=param_dict["current_text"],
delta_text=param_dict["delta_text"],
@@ -336,9 +336,9 @@ def test_streaming_subcases(param_dict):
)
# Streaming currently expects at least one of reasoning content / content,
# so the response should return None in that case.
- if param_dict["reasoning_content"] is None and param_dict["content"] is None:
+ if param_dict["reasoning"] is None and param_dict["content"] is None:
assert response is None
else:
assert isinstance(response, DeltaMessage)
- assert param_dict["reasoning_content"] == response.reasoning_content
+ assert param_dict["reasoning"] == response.reasoning
assert param_dict["content"] == response.content
diff --git a/tests/reasoning/test_hunyuan_reasoning_parser.py b/tests/reasoning/test_hunyuan_reasoning_parser.py
index b7e3ea73ccde..32e753d2abb7 100644
--- a/tests/reasoning/test_hunyuan_reasoning_parser.py
+++ b/tests/reasoning/test_hunyuan_reasoning_parser.py
@@ -14,49 +14,49 @@ END_RESPONSE = "\n"
NO_REASONING_QUICK_THROUGHT = {
"output": f"{START_REASONING}{START_RESPONSE}This is the rest{END_RESPONSE}", # noqa: E501
- "reasoning_content": None,
+ "reasoning": None,
"content": "This is the rest",
}
SIMPLE_REASONING = {
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest{END_RESPONSE}", # noqa: E501
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "This is the rest",
}
COMPLETE_REASONING = {
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
}
COMPLETE_REASONING_WITH_SYMBOL = {
"output": f"{START_REASONING}This is a reasoning section!{START_RESPONSE}",
- "reasoning_content": "This is a reasoning section!",
+ "reasoning": "This is a reasoning section!",
"content": None,
}
NO_REASONING = {
"output": "This is content",
- "reasoning_content": None,
+ "reasoning": None,
"content": "This is content",
}
MULTIPLE_LINES = {
"output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
- "reasoning_content": "This\nThat",
+ "reasoning": "This\nThat",
"content": "This is the rest\nThat",
}
REASONING_WITH_THINK = {
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest", # noqa: E501
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "This is the rest",
}
COMPLETE_REASONING_WITH_THINK = {
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
}
MULTIPLE_LINES_WITH_THINK = {
"output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
- "reasoning_content": "This\nThat",
+ "reasoning": "This\nThat",
"content": "This is the rest\nThat",
}
@@ -164,5 +164,5 @@ def test_reasoning(
parser, output_tokens, streaming=streaming
)
- assert reasoning == param_dict["reasoning_content"]
+ assert reasoning == param_dict["reasoning"]
assert content == param_dict["content"]
diff --git a/tests/reasoning/test_mistral_reasoning_parser.py b/tests/reasoning/test_mistral_reasoning_parser.py
index ff7f94b40ee1..5163c863863a 100644
--- a/tests/reasoning/test_mistral_reasoning_parser.py
+++ b/tests/reasoning/test_mistral_reasoning_parser.py
@@ -20,97 +20,97 @@ def mistral_tokenizer():
SIMPLE_REASONING = {
"output": "This is a reasoning section[/THINK]This is the rest",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "This is the rest",
"is_reasoning_end": True,
}
COMPLETE_REASONING = {
"output": "This is a reasoning section[/THINK]",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
"is_reasoning_end": True,
}
NO_CONTENT = {
"output": "This is content",
- "reasoning_content": "This is content",
+ "reasoning": "This is content",
"content": None,
"is_reasoning_end": False,
}
NO_REASONING_STREAMING = {
"output": "This is a reasoning section",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
"is_reasoning_end": False,
}
MULTIPLE_LINES = {
"output": "This\nThat[/THINK]This is the rest\nThat",
- "reasoning_content": "This\nThat",
+ "reasoning": "This\nThat",
"content": "This is the rest\nThat",
"is_reasoning_end": True,
}
SHORTEST_REASONING_NO_STREAMING = {
"output": "[/THINK]This is the rest",
- "reasoning_content": "",
+ "reasoning": "",
"content": "This is the rest",
"is_reasoning_end": True,
}
SHORTEST_REASONING = {
"output": "[/THINK]This is the rest",
- "reasoning_content": None,
+ "reasoning": None,
"content": "This is the rest",
"is_reasoning_end": True,
}
REASONING_WITH_THINK = {
"output": "[THINK]This is a reasoning section[/THINK]This is the rest",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "This is the rest",
"is_reasoning_end": True,
}
COMPLETE_REASONING_WITH_THINK = {
"output": "[THINK]This is a reasoning section[/THINK]",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
"is_reasoning_end": True,
}
MULTIPLE_LINES_WITH_THINK = {
"output": "[THINK]This\nThat[/THINK]This is the rest\nThat",
- "reasoning_content": "This\nThat",
+ "reasoning": "This\nThat",
"content": "This is the rest\nThat",
"is_reasoning_end": True,
}
SHORTEST_REASONING_NO_STREAMING_WITH_THINK = {
"output": "[/THINK]This is the rest",
- "reasoning_content": "",
+ "reasoning": "",
"content": "This is the rest",
"is_reasoning_end": True,
}
SHORTEST_REASONING_WITH_THINK = {
"output": "[/THINK]This is the rest",
- "reasoning_content": None,
+ "reasoning": None,
"content": "This is the rest",
"is_reasoning_end": True,
}
THINK_NO_END = {
"output": "[THINK]This is a reasoning section",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
"is_reasoning_end": False,
}
EMPTY = {
"output": "",
- "reasoning_content": "",
+ "reasoning": "",
"content": None,
"is_reasoning_end": False,
}
EMPTY_STREAMING = {
"output": "",
- "reasoning_content": None,
+ "reasoning": None,
"content": None,
"is_reasoning_end": False,
}
NEW_LINE = {
"output": "\n[THINK]This is a reasoning section[/THINK]\nThis is the rest",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "\nThis is the rest",
"is_reasoning_end": True,
}
@@ -120,7 +120,7 @@ NEW_LINE = {
# or not.
NEW_LINE_STREAMING = {
"output": "\n[THINK]This is a reasoning section[/THINK]\nThis is the rest",
- "reasoning_content": "\nThis is a reasoning section",
+ "reasoning": "\nThis is a reasoning section",
"content": "\nThis is the rest",
"is_reasoning_end": True,
}
@@ -307,7 +307,7 @@ def test_mistral_reasoning(
parser, output_tokens, streaming=streaming
)
- assert reasoning == param_dict["reasoning_content"]
+ assert reasoning == param_dict["reasoning"]
assert content == param_dict["content"]
# Test is_reasoning_end
diff --git a/tests/reasoning/test_olmo3_reasoning_parser.py b/tests/reasoning/test_olmo3_reasoning_parser.py
index 4a2eca994610..bc0e72e2a456 100644
--- a/tests/reasoning/test_olmo3_reasoning_parser.py
+++ b/tests/reasoning/test_olmo3_reasoning_parser.py
@@ -13,43 +13,43 @@ END_REASONING = ""
NO_REASONING = {
"output": f"{START_REASONING}{END_REASONING}No thoughts, head empty!",
- "reasoning_content": None,
+ "reasoning": None,
"content": "No thoughts, head empty!",
}
NO_REASONING_WITH_NEWLINE = {
"output": f"{START_REASONING}\n{END_REASONING}\n\nNo thoughts, head empty!",
- "reasoning_content": "\n",
+ "reasoning": "\n",
"content": "\n\nNo thoughts, head empty!",
}
SIMPLE_REASONING = {
"output": f"{START_REASONING}This is a reasoning section{END_REASONING}This is the rest", # noqa: E501
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "This is the rest",
}
SIMPLE_REASONING_WITH_NEWLINE = {
"output": f"{START_REASONING} Look!\n\nI'm thinking...{END_REASONING}\nThis is the rest", # noqa: E501
- "reasoning_content": " Look!\n\nI'm thinking...",
+ "reasoning": " Look!\n\nI'm thinking...",
"content": "\nThis is the rest",
}
SIMPLE_REASONING_WITH_MULTIPLE_NEWLINES = {
"output": f"{START_REASONING}\nLook!\nI'm thinking...\n\n{END_REASONING}\n\n\nThis is the rest", # noqa: E501
- "reasoning_content": "\nLook!\nI'm thinking...\n\n",
+ "reasoning": "\nLook!\nI'm thinking...\n\n",
"content": "\n\n\nThis is the rest",
}
NO_REASONING_ONLY_END_THINK = {
"output": f"{END_REASONING}\n\nNo thoughts, head empty!",
- "reasoning_content": None,
+ "reasoning": None,
"content": "\n\nNo thoughts, head empty!",
}
REASONING_ONLY_END_THINK = {
"output": f"The user is asking me not to think.{END_REASONING}No thoughts!",
- "reasoning_content": "The user is asking me not to think.",
+ "reasoning": "The user is asking me not to think.",
"content": "No thoughts!",
}
@@ -148,5 +148,5 @@ def test_reasoning(
reasoning_parser=parser, model_output=model_output, streaming=streaming
)
- assert reasoning == param_dict["reasoning_content"]
+ assert reasoning == param_dict["reasoning"]
assert content == param_dict["content"]
diff --git a/tests/reasoning/test_qwen3_reasoning_parser.py b/tests/reasoning/test_qwen3_reasoning_parser.py
index c06e40d72de2..92a8b6ab3761 100644
--- a/tests/reasoning/test_qwen3_reasoning_parser.py
+++ b/tests/reasoning/test_qwen3_reasoning_parser.py
@@ -22,47 +22,47 @@ def qwen3_tokenizer():
# 带 ,非stream
WITH_THINK = {
"output": "This is a reasoning sectionThis is the rest",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "This is the rest",
}
# 带 ,stream
WITH_THINK_STREAM = {
"output": "This is a reasoning sectionThis is the rest",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "This is the rest",
}
# 不带 ,非stream
WITHOUT_THINK = {
"output": "This is the rest",
- "reasoning_content": None,
+ "reasoning": None,
"content": "This is the rest",
}
# 不带 ,stream
WITHOUT_THINK_STREAM = {
"output": "This is the rest",
- "reasoning_content": None,
+ "reasoning": None,
"content": "This is the rest",
}
COMPLETE_REASONING = {
"output": "This is a reasoning section",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
}
MULTILINE_REASONING = {
"output": "This is a reasoning\nsectionThis is the rest\nThat",
- "reasoning_content": "This is a reasoning\nsection",
+ "reasoning": "This is a reasoning\nsection",
"content": "This is the rest\nThat",
}
ONLY_OPEN_TAG = {
"output": "This is a reasoning section",
- "reasoning_content": None,
+ "reasoning": None,
"content": "This is a reasoning section",
}
ONLY_OPEN_TAG_STREAM = {
"output": "This is a reasoning section",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
}
@@ -138,5 +138,5 @@ def test_reasoning(
parser, output_tokens, streaming=streaming
)
- assert reasoning == param_dict["reasoning_content"]
+ assert reasoning == param_dict["reasoning"]
assert content == param_dict["content"]
diff --git a/tests/reasoning/test_seedoss_reasoning_parser.py b/tests/reasoning/test_seedoss_reasoning_parser.py
index b356b8545f41..33d56d32965a 100644
--- a/tests/reasoning/test_seedoss_reasoning_parser.py
+++ b/tests/reasoning/test_seedoss_reasoning_parser.py
@@ -28,49 +28,49 @@ def seedoss_tokenizer():
SIMPLE_REASONING: dict[str, Any] = {
"output": "This is a reasoning sectionThis is the rest",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "This is the rest",
"is_reasoning_end": True,
}
COMPLETE_REASONING: dict[str, Any] = {
"output": "This is a reasoning section",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
"is_reasoning_end": True,
}
NO_CONTENT: dict[str, Any] = {
"output": "This is content",
- "reasoning_content": "This is content",
+ "reasoning": "This is content",
"content": None,
"is_reasoning_end": False,
}
NO_REASONING_STREAMING: dict[str, Any] = {
"output": "This is a reasoning section",
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": None,
"is_reasoning_end": False,
}
MULTIPLE_LINES: dict[str, Any] = {
"output": "This\nThatThis is the rest\nThat",
- "reasoning_content": "This\nThat",
+ "reasoning": "This\nThat",
"content": "This is the rest\nThat",
"is_reasoning_end": True,
}
WITH_START_TOKEN: dict[str, Any] = {
"output": ("This is a reasoning sectionThis is the rest"),
- "reasoning_content": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
"content": "This is the rest",
"is_reasoning_end": True,
}
ONLY_END_TOKEN: dict[str, Any] = {
"output": "Some reasoningThis is the rest",
- "reasoning_content": "Some reasoning",
+ "reasoning": "Some reasoning",
"content": "This is the rest",
"is_reasoning_end": True,
}
NO_TOKENS: dict[str, Any] = {
"output": "This is just content without any reasoning tokens",
- "reasoning_content": "This is just content without any reasoning tokens",
+ "reasoning": "This is just content without any reasoning tokens",
"content": None,
"is_reasoning_end": False,
}
@@ -95,7 +95,7 @@ def test_simple_reasoning(seedoss_tokenizer, streaming):
parser, [cast(str, SIMPLE_REASONING["output"])], streaming=streaming
)
- assert reasoning == SIMPLE_REASONING["reasoning_content"]
+ assert reasoning == SIMPLE_REASONING["reasoning"]
assert content == SIMPLE_REASONING["content"]
@@ -109,7 +109,7 @@ def test_complete_reasoning(seedoss_tokenizer, streaming):
parser, [cast(str, COMPLETE_REASONING["output"])], streaming=streaming
)
- assert reasoning == COMPLETE_REASONING["reasoning_content"]
+ assert reasoning == COMPLETE_REASONING["reasoning"]
assert content == COMPLETE_REASONING["content"]
@@ -123,7 +123,7 @@ def test_no_content(seedoss_tokenizer, streaming):
parser, [cast(str, NO_CONTENT["output"])], streaming=streaming
)
- assert reasoning == NO_CONTENT["reasoning_content"]
+ assert reasoning == NO_CONTENT["reasoning"]
assert content == NO_CONTENT["content"]
@@ -137,7 +137,7 @@ def test_multiple_lines(seedoss_tokenizer, streaming):
parser, [cast(str, MULTIPLE_LINES["output"])], streaming=streaming
)
- assert reasoning == MULTIPLE_LINES["reasoning_content"]
+ assert reasoning == MULTIPLE_LINES["reasoning"]
assert content == MULTIPLE_LINES["content"]
@@ -151,7 +151,7 @@ def test_with_start_token(seedoss_tokenizer, streaming):
parser, [cast(str, WITH_START_TOKEN["output"])], streaming=streaming
)
- assert reasoning == WITH_START_TOKEN["reasoning_content"]
+ assert reasoning == WITH_START_TOKEN["reasoning"]
assert content == WITH_START_TOKEN["content"]
@@ -168,7 +168,7 @@ def test_only_end_token(seedoss_tokenizer, streaming):
parser, [cast(str, ONLY_END_TOKEN["output"])], streaming=streaming
)
- assert reasoning == ONLY_END_TOKEN["reasoning_content"]
+ assert reasoning == ONLY_END_TOKEN["reasoning"]
assert content == ONLY_END_TOKEN["content"]
@@ -182,7 +182,7 @@ def test_no_tokens(seedoss_tokenizer, streaming):
parser, [cast(str, NO_TOKENS["output"])], streaming=streaming
)
- assert reasoning == NO_TOKENS["reasoning_content"]
+ assert reasoning == NO_TOKENS["reasoning"]
assert content == NO_TOKENS["content"]
diff --git a/tests/reasoning/utils.py b/tests/reasoning/utils.py
index ccd4ff8dd263..bd0b230a847c 100644
--- a/tests/reasoning/utils.py
+++ b/tests/reasoning/utils.py
@@ -9,25 +9,28 @@ from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer
class StreamingReasoningReconstructor:
def __init__(self):
- self.reasoning_content = None
+ self.reasoning = None
self.other_content = None
def append_delta(self, delta: DeltaMessage):
# content and the reasoning content should not be present
# at the same time
- assert delta.content is None or delta.reasoning_content is None, (
+ assert delta.content is None or delta.reasoning is None, (
"Both content and reasoning content are present in the delta message"
)
+ assert delta.reasoning == delta.reasoning_content, (
+ "reasoning_content should be present for backwards compatibility"
+ )
if delta.content is not None:
if self.other_content is None:
self.other_content = delta.content
else:
self.other_content += delta.content
else:
- if self.reasoning_content is None:
- self.reasoning_content = delta.reasoning_content
+ if self.reasoning is None:
+ self.reasoning = delta.reasoning
else:
- self.reasoning_content += delta.reasoning_content
+ self.reasoning += delta.reasoning
def run_reasoning_extraction(
@@ -43,7 +46,7 @@ def run_reasoning_extraction(
request,
)
return (
- reconstructor.reasoning_content,
+ reconstructor.reasoning,
reconstructor.other_content or None,
)
else:
@@ -69,7 +72,7 @@ def run_reasoning_extraction_mistral(
request,
)
return (
- reconstructor.reasoning_content,
+ reconstructor.reasoning,
reconstructor.other_content or None,
)
else:
@@ -88,7 +91,7 @@ def run_reasoning_extraction_nonstreaming(
request: ChatCompletionRequest | None = None,
) -> tuple[str | None, str | None]:
request = request or ChatCompletionRequest(messages=[], model="test-model")
- return reasoning_parser.extract_reasoning_content(
+ return reasoning_parser.extract_reasoning(
model_output="".join(model_output), request=request
)
@@ -110,7 +113,7 @@ def run_reasoning_extraction_streaming(
]
current_text = previous_text + delta
current_tokens = previous_tokens + token_delta
- delta_message = reasoning_parser.extract_reasoning_content_streaming(
+ delta_message = reasoning_parser.extract_reasoning_streaming(
previous_text,
current_text,
delta,
@@ -142,7 +145,7 @@ def run_reasoning_extraction_streaming_mistral(
delta = reasoning_parser.model_tokenizer.convert_ids_to_tokens([model_delta])[0]
current_text = previous_text + delta
current_tokens = previous_tokens + token_delta
- delta_message = reasoning_parser.extract_reasoning_content_streaming(
+ delta_message = reasoning_parser.extract_reasoning_streaming(
previous_text,
current_text,
delta,
diff --git a/tests/tokenization/test_mistral_tokenizer.py b/tests/tokenization/test_mistral_tokenizer.py
index 926ad2503398..1ada8ee187c3 100644
--- a/tests/tokenization/test_mistral_tokenizer.py
+++ b/tests/tokenization/test_mistral_tokenizer.py
@@ -102,7 +102,7 @@ def test_prepare_apply_chat_template_tools_and_messages(
assert actual_request == expected_mistral_output
-# Tool use with list content and reasoning_content
+# Tool use with list content and reasoning
@pytest.mark.parametrize(
"openai_request,expected_mistral_output",
[
@@ -115,7 +115,7 @@ def test_prepare_apply_chat_template_tools_and_messages(
},
{
"role": "assistant",
- "reasoning_content": None,
+ "reasoning": None,
"content": None,
"tool_calls": [
{
diff --git a/tests/tool_use/test_ernie45_moe_tool_parser.py b/tests/tool_use/test_ernie45_moe_tool_parser.py
index fb5af6e13a96..36a07bb561d9 100644
--- a/tests/tool_use/test_ernie45_moe_tool_parser.py
+++ b/tests/tool_use/test_ernie45_moe_tool_parser.py
@@ -337,7 +337,7 @@ def test_extract_tool_calls_streaming_incremental(
if (
delta_message.role is None
and delta_message.content is None
- and delta_message.reasoning_content is None
+ and delta_message.reasoning is None
and len(delta_message.tool_calls) == 0
):
continue
diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py
index 676423f2ca91..4cd26e7b41d3 100644
--- a/tests/v1/entrypoints/llm/test_struct_output_generate.py
+++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py
@@ -674,10 +674,10 @@ def test_structured_output_with_reasoning_matrices(
assert output is not None and isinstance(output, RequestOutput)
prompt = output.prompt
generated_text = output.outputs[0].text
- reasoning_content, content = run_reasoning_extraction(reasoner, [generated_text])
- print(f"Prompt: {prompt!r}\nReasoning: {reasoning_content!r}\nContent: {content!r}")
+ reasoning, content = run_reasoning_extraction(reasoner, [generated_text])
+ print(f"Prompt: {prompt!r}\nReasoning: {reasoning!r}\nContent: {content!r}")
- assert content is not None and reasoning_content is not None
+ assert content is not None and reasoning is not None
output_json = json.loads(content)
jsonschema.validate(instance=output_json, schema=reasoning_schema)
diff --git a/vllm/entrypoints/harmony_utils.py b/vllm/entrypoints/harmony_utils.py
index 7958d0317739..47a252348c10 100644
--- a/vllm/entrypoints/harmony_utils.py
+++ b/vllm/entrypoints/harmony_utils.py
@@ -521,15 +521,15 @@ def parse_chat_output(
is_tool_call = False # TODO: update this when tool call is supported
if len(output_msgs) == 0:
# The generation has stopped during reasoning.
- reasoning_content = parser.current_content
+ reasoning = parser.current_content
final_content = None
elif len(output_msgs) == 1:
# The generation has stopped during final message.
- reasoning_content = output_msgs[0].content[0].text
+ reasoning = output_msgs[0].content[0].text
final_content = parser.current_content
else:
reasoning_msg = output_msgs[:-1]
final_msg = output_msgs[-1]
- reasoning_content = "\n".join([msg.content[0].text for msg in reasoning_msg])
+ reasoning = "\n".join([msg.content[0].text for msg in reasoning_msg])
final_content = final_msg.content[0].text
- return reasoning_content, final_content, is_tool_call
+ return reasoning, final_content, is_tool_call
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index cf80c4fccbad..69e757d4764d 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -2102,7 +2102,15 @@ class ChatMessage(OpenAIBaseModel):
tool_calls: list[ToolCall] = Field(default_factory=list)
# vLLM-specific fields that are not in OpenAI spec
+ reasoning: str | None = None
reasoning_content: str | None = None
+ """Deprecated: use `reasoning` instead."""
+
+ @model_validator(mode="after")
+ def handle_deprecated_reasoning_content(self):
+ """Copy reasoning to reasoning_content for backward compatibility."""
+ self.reasoning_content = self.reasoning
+ return self
class ChatCompletionLogProb(OpenAIBaseModel):
@@ -2156,9 +2164,17 @@ class ChatCompletionResponse(OpenAIBaseModel):
class DeltaMessage(OpenAIBaseModel):
role: str | None = None
content: str | None = None
+ reasoning: str | None = None
reasoning_content: str | None = None
+ """Deprecated: use `reasoning` instead."""
tool_calls: list[DeltaToolCall] = Field(default_factory=list)
+ @model_validator(mode="after")
+ def handle_deprecated_reasoning_content(self):
+ """Copy reasoning to reasoning_content for backward compatibility."""
+ self.reasoning_content = self.reasoning
+ return self
+
class ChatCompletionResponseStreamChoice(OpenAIBaseModel):
index: int
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index 888aa4eb6fa8..59e1c8d53179 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -759,9 +759,7 @@ class OpenAIServingChat(OpenAIServing):
delta_message = DeltaMessage(content=delta_text)
elif cur_channel == "analysis":
if request.include_reasoning:
- delta_message = DeltaMessage(
- reasoning_content=delta_text
- )
+ delta_message = DeltaMessage(reasoning=delta_text)
else:
delta_message = None
elif (
@@ -823,7 +821,7 @@ class OpenAIServingChat(OpenAIServing):
):
assert reasoning_parser is not None
delta_message = (
- reasoning_parser.extract_reasoning_content_streaming(
+ reasoning_parser.extract_reasoning_streaming(
previous_text,
current_text,
delta_text,
@@ -836,7 +834,7 @@ class OpenAIServingChat(OpenAIServing):
# or think end id in prompt_token_ids
# i.e {"enable_thinking": False},
# set reasoning status to end.
- # Only keep 'content', remove 'reasoning_content'.
+ # Only keep 'content', remove 'reasoning'.
if reasoning_parser.is_reasoning_end(
as_list(output.token_ids)
) or (
@@ -899,7 +897,7 @@ class OpenAIServingChat(OpenAIServing):
if self.reasoning_parser and not reasoning_end_arr[i]:
delta_message = (
- reasoning_parser.extract_reasoning_content_streaming(
+ reasoning_parser.extract_reasoning_streaming(
previous_text,
current_text,
delta_text,
@@ -948,7 +946,7 @@ class OpenAIServingChat(OpenAIServing):
output_token_ids = as_list(output.token_ids)
if not reasoning_end_arr[i]:
delta_message = (
- reasoning_parser.extract_reasoning_content_streaming(
+ reasoning_parser.extract_reasoning_streaming(
previous_text,
current_text,
delta_text,
@@ -961,7 +959,7 @@ class OpenAIServingChat(OpenAIServing):
# i.e {"enable_thinking": False},
# set reasoning status to end.
# Remove the text and token ids related
- # to 'reasoning_content'.
+ # to 'reasoning'.
if (
res.prompt_token_ids
and reasoning_parser.is_reasoning_end(
@@ -978,7 +976,7 @@ class OpenAIServingChat(OpenAIServing):
# When encountering think end id in delta_token_ids,
# set reasoning status to end.
# Remove the text and token ids related
- # to 'reasoning_content'.
+ # to 'reasoning'.
if reasoning_parser.is_reasoning_end(output_token_ids):
reasoning_end_arr[i] = True
current_token_ids = (
@@ -1033,15 +1031,13 @@ class OpenAIServingChat(OpenAIServing):
# when only reasoning
elif self.reasoning_parser:
- delta_message = (
- reasoning_parser.extract_reasoning_content_streaming(
- previous_text,
- current_text,
- delta_text,
- previous_token_ids,
- current_token_ids,
- output.token_ids,
- )
+ delta_message = reasoning_parser.extract_reasoning_streaming(
+ previous_text,
+ current_text,
+ delta_text,
+ previous_token_ids,
+ current_token_ids,
+ output.token_ids,
)
# handle streaming just a content delta
else:
@@ -1334,9 +1330,9 @@ class OpenAIServingChat(OpenAIServing):
logprobs = None
if self.use_harmony:
- reasoning_content, content, _ = parse_chat_output(token_ids)
+ reasoning, content, _ = parse_chat_output(token_ids)
if not request.include_reasoning:
- reasoning_content = None
+ reasoning = None
if self.tool_parser is not None:
tool_parser = self.tool_parser(tokenizer)
@@ -1349,14 +1345,14 @@ class OpenAIServingChat(OpenAIServing):
content = tool_call_info.content
message = ChatMessage(
role=role,
- reasoning_content=reasoning_content,
+ reasoning=reasoning,
content=content,
tool_calls=tool_call_info.tool_calls,
)
else:
message = ChatMessage(
role=role,
- reasoning_content=reasoning_content,
+ reasoning=reasoning,
content=content,
)
@@ -1390,13 +1386,13 @@ class OpenAIServingChat(OpenAIServing):
return self.create_error_response(str(e))
# If the reasoning parser is enabled,
# tool calls are extracted exclusively from the content.
- reasoning_content, content = reasoning_parser.extract_reasoning_content(
+ reasoning, content = reasoning_parser.extract_reasoning(
output.text, request=request
)
if not request.include_reasoning:
- reasoning_content = None
+ reasoning = None
else:
- reasoning_content = None
+ reasoning = None
content = output.text
auto_tools_called = False
@@ -1416,9 +1412,7 @@ class OpenAIServingChat(OpenAIServing):
not isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam)
and request.tool_choice != "required"
):
- message = ChatMessage(
- role=role, reasoning_content=reasoning_content, content=content
- )
+ message = ChatMessage(role=role, reasoning=reasoning, content=content)
# if the request uses tools and specified a tool choice
elif (
@@ -1428,7 +1422,7 @@ class OpenAIServingChat(OpenAIServing):
assert tool_calls is not None and len(tool_calls) > 0
message = ChatMessage(
role=role,
- reasoning_content=reasoning_content,
+ reasoning=reasoning,
content="",
tool_calls=[tool_call_class(function=tc) for tc in tool_calls],
)
@@ -1452,15 +1446,13 @@ class OpenAIServingChat(OpenAIServing):
role=role,
content="",
tool_calls=tool_call_class_items,
- reasoning_content=reasoning_content,
+ reasoning=reasoning,
)
# if the request doesn't use tool choice
# OR specifies to not use a tool
elif not request.tool_choice or request.tool_choice == "none":
- message = ChatMessage(
- role=role, reasoning_content=reasoning_content, content=content
- )
+ message = ChatMessage(role=role, reasoning=reasoning, content=content)
# handle when there are tools and tool choice is auto
elif (
@@ -1476,7 +1468,7 @@ class OpenAIServingChat(OpenAIServing):
if tool_calls:
message = ChatMessage(
role=role,
- reasoning_content=reasoning_content,
+ reasoning=reasoning,
content=content,
tool_calls=[
ToolCall(
@@ -1498,7 +1490,7 @@ class OpenAIServingChat(OpenAIServing):
ret_content = content
message = ChatMessage(
role=role,
- reasoning_content=reasoning_content,
+ reasoning=reasoning,
content=ret_content,
)
@@ -1509,9 +1501,7 @@ class OpenAIServingChat(OpenAIServing):
" if tools should be extracted. Returning a standard chat "
"completion."
)
- message = ChatMessage(
- role=role, reasoning_content=reasoning_content, content=content
- )
+ message = ChatMessage(role=role, reasoning=reasoning, content=content)
# In OpenAI's API, when a tool is called, the finish_reason is:
# "tool_calls" for "auto" or "required" tool calls,
# and "stop" for named tool calls.
diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index b6fef7d2fafd..9b79e50c3208 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -778,11 +778,11 @@ class OpenAIServingResponses(OpenAIServing):
logger.exception("Error in reasoning parser creation.")
raise e
- reasoning_content, content = reasoning_parser.extract_reasoning_content(
+ reasoning, content = reasoning_parser.extract_reasoning(
final_output.text, request=request
)
else:
- reasoning_content = None
+ reasoning = None
content = final_output.text
# Log complete response if output logging is enabled
@@ -790,8 +790,8 @@ class OpenAIServingResponses(OpenAIServing):
output_text = ""
if content:
output_text = content
- elif reasoning_content:
- output_text = f"[reasoning: {reasoning_content}]"
+ elif reasoning:
+ output_text = f"[reasoning: {reasoning}]"
if output_text:
self.request_logger.log_outputs(
@@ -805,15 +805,13 @@ class OpenAIServingResponses(OpenAIServing):
reasoning_item = None
message_item = None
- if reasoning_content:
+ if reasoning:
reasoning_item = ResponseReasoningItem(
id=f"rs_{random_uuid()}",
summary=[],
type="reasoning",
content=[
- ResponseReasoningTextContent(
- text=reasoning_content, type="reasoning_text"
- )
+ ResponseReasoningTextContent(text=reasoning, type="reasoning_text")
],
status=None, # NOTE: Only the last output item has status.
)
@@ -1208,15 +1206,13 @@ class OpenAIServingResponses(OpenAIServing):
if ctx.last_output.outputs:
output = ctx.last_output.outputs[0]
if reasoning_parser:
- delta_message = (
- reasoning_parser.extract_reasoning_content_streaming(
- previous_text=previous_text,
- current_text=previous_text + output.text,
- delta_text=output.text,
- previous_token_ids=previous_token_ids,
- current_token_ids=previous_token_ids + output.token_ids,
- delta_token_ids=output.token_ids,
- )
+ delta_message = reasoning_parser.extract_reasoning_streaming(
+ previous_text=previous_text,
+ current_text=previous_text + output.text,
+ delta_text=output.text,
+ previous_token_ids=previous_token_ids,
+ current_token_ids=previous_token_ids + output.token_ids,
+ delta_token_ids=output.token_ids,
)
else:
delta_message = DeltaMessage(
@@ -1228,7 +1224,7 @@ class OpenAIServingResponses(OpenAIServing):
continue
if not first_delta_sent:
current_item_id = str(uuid.uuid4())
- if delta_message.reasoning_content:
+ if delta_message.reasoning:
yield _increment_sequence_number_and_return(
ResponseOutputItemAddedEvent(
type="response.output_item.added",
@@ -1280,15 +1276,15 @@ class OpenAIServingResponses(OpenAIServing):
# same as content or reasoning content
if (
previous_delta_messages
- and previous_delta_messages[-1].reasoning_content is not None
+ and previous_delta_messages[-1].reasoning is not None
and delta_message.content is not None
):
# from reasoning to normal content, send done
# event for reasoning
reason_content = "".join(
- pm.reasoning_content
+ pm.reasoning
for pm in previous_delta_messages
- if pm.reasoning_content is not None
+ if pm.reasoning is not None
)
yield _increment_sequence_number_and_return(
ResponseReasoningTextDoneEvent(
@@ -1356,7 +1352,7 @@ class OpenAIServingResponses(OpenAIServing):
# reset previous delta messages
previous_delta_messages = []
- if delta_message.reasoning_content is not None:
+ if delta_message.reasoning is not None:
yield _increment_sequence_number_and_return(
ResponseReasoningTextDeltaEvent(
type="response.reasoning_text.delta",
@@ -1364,7 +1360,7 @@ class OpenAIServingResponses(OpenAIServing):
content_index=current_content_index,
output_index=current_output_index,
item_id=current_item_id,
- delta=delta_message.reasoning_content,
+ delta=delta_message.reasoning,
)
)
elif delta_message.content is not None:
@@ -1392,11 +1388,11 @@ class OpenAIServingResponses(OpenAIServing):
previous_delta_messages.append(delta_message)
if previous_delta_messages:
- if previous_delta_messages[-1].reasoning_content is not None:
+ if previous_delta_messages[-1].reasoning is not None:
reason_content = "".join(
- pm.reasoning_content
+ pm.reasoning
for pm in previous_delta_messages
- if pm.reasoning_content is not None
+ if pm.reasoning is not None
)
yield _increment_sequence_number_and_return(
ResponseReasoningTextDoneEvent(
diff --git a/vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
index cf2fa30d0154..432c419db189 100644
--- a/vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
@@ -279,7 +279,7 @@ class StreamingXMLToolCallParser:
final_delta = DeltaMessage(
role=None,
content=None,
- reasoning_content=None,
+ reasoning=None,
tool_calls=[
DeltaToolCall(
index=self.tool_call_index - 1,
diff --git a/vllm/reasoning/abs_reasoning_parsers.py b/vllm/reasoning/abs_reasoning_parsers.py
index 63ff450053ea..d26e4ffc9c16 100644
--- a/vllm/reasoning/abs_reasoning_parsers.py
+++ b/vllm/reasoning/abs_reasoning_parsers.py
@@ -76,7 +76,7 @@ class ReasoningParser:
"""
@abstractmethod
- def extract_reasoning_content(
+ def extract_reasoning(
self,
model_output: str,
request: ChatCompletionRequest | ResponsesRequest,
@@ -100,7 +100,7 @@ class ReasoningParser:
"""
@abstractmethod
- def extract_reasoning_content_streaming(
+ def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
diff --git a/vllm/reasoning/basic_parsers.py b/vllm/reasoning/basic_parsers.py
index 5fb3c8d368a8..026894773272 100644
--- a/vllm/reasoning/basic_parsers.py
+++ b/vllm/reasoning/basic_parsers.py
@@ -76,7 +76,7 @@ class BaseThinkingReasoningParser(ReasoningParser):
else:
return input_ids[input_ids.index(self.end_token_id) + 1 :]
- def extract_reasoning_content_streaming(
+ def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
@@ -103,11 +103,10 @@ class BaseThinkingReasoningParser(ReasoningParser):
# start token in previous, end token in delta,
# extract reasoning content
end_index = delta_text.find(self.end_token)
- reasoning_content = delta_text[:end_index]
+ reasoning = delta_text[:end_index]
content = delta_text[end_index + len(self.end_token) :]
return DeltaMessage(
- reasoning_content=reasoning_content,
- content=content if content else None,
+ reasoning=reasoning, content=content if content else None
)
elif self.end_token_id in previous_token_ids:
# start token in previous, end token in previous,
@@ -116,30 +115,27 @@ class BaseThinkingReasoningParser(ReasoningParser):
else:
# start token in previous, no end token in previous or delta,
# reasoning content continues
- return DeltaMessage(reasoning_content=delta_text)
+ return DeltaMessage(reasoning=delta_text)
elif self.start_token_id in delta_token_ids:
if self.end_token_id in delta_token_ids:
# start token in delta, end token in delta,
# extract reasoning content
start_index = delta_text.find(self.start_token)
end_index = delta_text.find(self.end_token)
- reasoning_content = delta_text[
- start_index + len(self.start_token) : end_index
- ]
+ reasoning = delta_text[start_index + len(self.start_token) : end_index]
content = delta_text[end_index + len(self.end_token) :]
return DeltaMessage(
- reasoning_content=reasoning_content,
- content=content if content else None,
+ reasoning=reasoning, content=content if content else None
)
else:
# start token in delta, no end token in delta,
# reasoning content continues
- return DeltaMessage(reasoning_content=delta_text)
+ return DeltaMessage(reasoning=delta_text)
else:
# not find thinking start token
return DeltaMessage(content=delta_text)
- def extract_reasoning_content(
+ def extract_reasoning(
self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
) -> tuple[str | None, str | None]:
"""
@@ -160,7 +156,7 @@ class BaseThinkingReasoningParser(ReasoningParser):
if self.end_token not in model_output:
return model_output, None
else:
- reasoning_content, _, content = model_output.partition(self.end_token)
+ reasoning, _, content = model_output.partition(self.end_token)
# If generation stops right after end-of-think, return null content
final_content = content or None
- return reasoning_content, final_content
+ return reasoning, final_content
diff --git a/vllm/reasoning/deepseek_r1_reasoning_parser.py b/vllm/reasoning/deepseek_r1_reasoning_parser.py
index ad4e0fe6c9ce..a91c8ceeb625 100644
--- a/vllm/reasoning/deepseek_r1_reasoning_parser.py
+++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py
@@ -25,7 +25,7 @@ class DeepSeekR1ReasoningParser(BaseThinkingReasoningParser):
"""The token that ends reasoning content."""
return ""
- def extract_reasoning_content_streaming(
+ def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
@@ -34,7 +34,7 @@ class DeepSeekR1ReasoningParser(BaseThinkingReasoningParser):
current_token_ids: Sequence[int],
delta_token_ids: Sequence[int],
) -> DeltaMessage | None:
- ret = super().extract_reasoning_content_streaming(
+ ret = super().extract_reasoning_streaming(
previous_text,
current_text,
delta_text,
@@ -51,10 +51,10 @@ class DeepSeekR1ReasoningParser(BaseThinkingReasoningParser):
# end token in delta with more tokens,
# extract reasoning content and content
end_index = delta_text.find(self.end_token)
- reasoning_content = delta_text[:end_index]
+ reasoning = delta_text[:end_index]
content = delta_text[end_index + len(self.end_token) :]
return DeltaMessage(
- reasoning_content=reasoning_content,
+ reasoning=reasoning,
content=content if content else None,
)
elif self.end_token_id in previous_token_ids:
@@ -62,6 +62,6 @@ class DeepSeekR1ReasoningParser(BaseThinkingReasoningParser):
return DeltaMessage(content=delta_text)
else:
# no end token in previous or delta, reasoning content continues
- return DeltaMessage(reasoning_content=delta_text)
+ return DeltaMessage(reasoning=delta_text)
return ret
diff --git a/vllm/reasoning/deepseek_v3_reasoning_parser.py b/vllm/reasoning/deepseek_v3_reasoning_parser.py
index 81f6e1f32eb3..afdf73262aca 100644
--- a/vllm/reasoning/deepseek_v3_reasoning_parser.py
+++ b/vllm/reasoning/deepseek_v3_reasoning_parser.py
@@ -38,12 +38,12 @@ class DeepSeekV3ReasoningParser(ReasoningParser):
def extract_content_ids(self, input_ids: list[int]) -> list[int]:
return self._parser.extract_content_ids(input_ids)
- def extract_reasoning_content(
+ def extract_reasoning(
self, model_output: str, request: ChatCompletionRequest
) -> tuple[str | None, str | None]:
- return self._parser.extract_reasoning_content(model_output, request)
+ return self._parser.extract_reasoning(model_output, request)
- def extract_reasoning_content_streaming(
+ def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
@@ -52,7 +52,7 @@ class DeepSeekV3ReasoningParser(ReasoningParser):
current_token_ids: Sequence[int],
delta_token_ids: Sequence[int],
) -> DeltaMessage | None:
- return self._parser.extract_reasoning_content_streaming(
+ return self._parser.extract_reasoning_streaming(
previous_text,
current_text,
delta_text,
diff --git a/vllm/reasoning/ernie45_reasoning_parser.py b/vllm/reasoning/ernie45_reasoning_parser.py
index 11dc1d10f73e..3cdbf14858ec 100644
--- a/vllm/reasoning/ernie45_reasoning_parser.py
+++ b/vllm/reasoning/ernie45_reasoning_parser.py
@@ -57,7 +57,7 @@ class Ernie45ReasoningParser(BaseThinkingReasoningParser):
"tokens in the tokenizer!"
)
- def extract_reasoning_content_streaming(
+ def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
@@ -73,7 +73,7 @@ class Ernie45ReasoningParser(BaseThinkingReasoningParser):
The Ernie45 thinking model ouput format is
abc\n\n\n\ndef\n\n
or abc\n\ndef
- - 'abc' goes to reasoning_content
+ - 'abc' goes to reasoning
- 'def' goes to content
"""
# Skip single special tokens
@@ -94,7 +94,7 @@ class Ernie45ReasoningParser(BaseThinkingReasoningParser):
# in delta with more tokens,
# extract reasoning content and content
think_end_index = delta_text.find(self.end_token)
- reasoning_content = delta_text[:think_end_index]
+ reasoning = delta_text[:think_end_index]
content = delta_text[think_end_index + len(self.end_token) :]
content = content.lstrip("\n")
response_start_idx = content.find(self.response_start_token)
@@ -104,7 +104,7 @@ class Ernie45ReasoningParser(BaseThinkingReasoningParser):
if response_end_idx != -1:
content = content[:response_end_idx]
return DeltaMessage(
- reasoning_content=reasoning_content,
+ reasoning=reasoning,
content=content if content else None,
)
elif self.end_token_id in previous_token_ids:
@@ -138,9 +138,9 @@ class Ernie45ReasoningParser(BaseThinkingReasoningParser):
return DeltaMessage(content=content if content else None)
else:
# no in previous or delta, reasoning content continues
- return DeltaMessage(reasoning_content=delta_text)
+ return DeltaMessage(reasoning=delta_text)
- def extract_reasoning_content(
+ def extract_reasoning(
self, model_output: str, request: ChatCompletionRequest
) -> tuple[str | None, str | None]:
"""
@@ -148,14 +148,12 @@ class Ernie45ReasoningParser(BaseThinkingReasoningParser):
The Ernie45 thinking model ouput format is
abc\n\n\n\n\ndef\n\n
or abc\n\ndef
- - 'abc' goes to reasoning_content
+ - 'abc' goes to reasoning
- 'def' goes to content
Returns:
tuple[Optional[str], Optional[str]]: reasoning content and content
"""
- reasoning_content, content = super().extract_reasoning_content(
- model_output, request
- )
+ reasoning, content = super().extract_reasoning(model_output, request)
if content:
start_idx = content.find(self.response_start_token)
end_idx = content.rfind(self.response_end_token)
@@ -164,4 +162,4 @@ class Ernie45ReasoningParser(BaseThinkingReasoningParser):
content = content[start_idx + len(self.response_start_token) : end_idx]
final_content = content or None
- return reasoning_content, final_content
+ return reasoning, final_content
diff --git a/vllm/reasoning/glm4_moe_reasoning_parser.py b/vllm/reasoning/glm4_moe_reasoning_parser.py
index d43fa7700799..1871adcd4321 100644
--- a/vllm/reasoning/glm4_moe_reasoning_parser.py
+++ b/vllm/reasoning/glm4_moe_reasoning_parser.py
@@ -70,7 +70,7 @@ class Glm4MoeModelReasoningParser(ReasoningParser):
else:
return input_ids[input_ids.index(self.think_end_token_id) + 1 :]
- def extract_reasoning_content_streaming(
+ def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
@@ -84,7 +84,7 @@ class Glm4MoeModelReasoningParser(ReasoningParser):
Handles streaming output where previous + delta = current.
Uses token IDs for faster processing.
For text abcxyz:
- - 'abc' goes to reasoning_content
+ - 'abc' goes to reasoning
- 'xyz' goes to content
"""
# Skip single special tokens
@@ -98,10 +98,10 @@ class Glm4MoeModelReasoningParser(ReasoningParser):
# in previous, in delta,
# extract reasoning content
end_index = delta_text.find(self.think_end_token)
- reasoning_content = delta_text[:end_index]
+ reasoning = delta_text[:end_index]
content = delta_text[end_index + len(self.think_end_token) :]
return DeltaMessage(
- reasoning_content=reasoning_content,
+ reasoning=reasoning,
content=content if content else None,
)
elif self.think_end_token_id in previous_token_ids:
@@ -111,36 +111,36 @@ class Glm4MoeModelReasoningParser(ReasoningParser):
else:
# in previous, no in previous or delta,
# reasoning content continues
- return DeltaMessage(reasoning_content=delta_text)
+ return DeltaMessage(reasoning=delta_text)
elif self.think_start_token_id in delta_token_ids:
if self.think_end_token_id in delta_token_ids:
# in delta, in delta, extract reasoning content
start_index = delta_text.find(self.think_start_token)
end_index = delta_text.find(self.think_end_token)
- reasoning_content = delta_text[
+ reasoning = delta_text[
start_index + len(self.think_start_token) : end_index
]
content = delta_text[end_index + len(self.think_end_token) :]
return DeltaMessage(
- reasoning_content=reasoning_content,
+ reasoning=reasoning,
content=content if content else None,
)
else:
# in delta, no in delta,
# reasoning content continues
- return DeltaMessage(reasoning_content=delta_text)
+ return DeltaMessage(reasoning=delta_text)
else:
# thinking is disabled, just content
return DeltaMessage(content=delta_text)
- def extract_reasoning_content(
+ def extract_reasoning(
self, model_output: str, request: ChatCompletionRequest
) -> tuple[str | None, str | None]:
"""
Extract reasoning content from the model output.
For text abcxyz:
- - 'abc' goes to reasoning_content
+ - 'abc' goes to reasoning
- 'xyz' goes to content
Returns:
@@ -165,7 +165,7 @@ class Glm4MoeModelReasoningParser(ReasoningParser):
return None, model_output
# Extract reasoning content from the model output.
- reasoning_content, _, content = model_output.partition(self.think_end_token)
+ reasoning, _, content = model_output.partition(self.think_end_token)
final_content = content or None
- return reasoning_content, final_content
+ return reasoning, final_content
diff --git a/vllm/reasoning/gptoss_reasoning_parser.py b/vllm/reasoning/gptoss_reasoning_parser.py
index e720f5228d0f..0c1b54d0bd35 100644
--- a/vllm/reasoning/gptoss_reasoning_parser.py
+++ b/vllm/reasoning/gptoss_reasoning_parser.py
@@ -104,7 +104,7 @@ class GptOssReasoningParser(ReasoningParser):
return []
return self.model_tokenizer.encode(content)
- def extract_reasoning_content_streaming(
+ def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
@@ -131,9 +131,9 @@ class GptOssReasoningParser(ReasoningParser):
content_delta = cur_content
if reasoning_delta is None and content_delta is None:
return None
- return DeltaMessage(reasoning_content=reasoning_delta, content=content_delta)
+ return DeltaMessage(reasoning=reasoning_delta, content=content_delta)
- def extract_reasoning_content(
+ def extract_reasoning(
self,
model_output: str,
request: ChatCompletionRequest,
diff --git a/vllm/reasoning/granite_reasoning_parser.py b/vllm/reasoning/granite_reasoning_parser.py
index eae6c2f5c7b3..484045d66a3c 100644
--- a/vllm/reasoning/granite_reasoning_parser.py
+++ b/vllm/reasoning/granite_reasoning_parser.py
@@ -49,7 +49,7 @@ class GraniteReasoningParser(ReasoningParser):
len(think_start) for think_start in self.valid_think_starts
)
- def extract_reasoning_content(
+ def extract_reasoning(
self, model_output: str, request: ChatCompletionRequest
) -> tuple[str | None, str | None]:
"""Extract the reasoning content & content sections, respectively.
@@ -67,12 +67,12 @@ class GraniteReasoningParser(ReasoningParser):
re_match = self.reasoning_regex.findall(model_output)
if not re_match:
return None, model_output
- reasoning_content, response_content = re_match[0]
+ reasoning, response_content = re_match[0]
if not response_content:
- return reasoning_content, None
- return reasoning_content, response_content
+ return reasoning, None
+ return reasoning, response_content
- def extract_reasoning_content_streaming(
+ def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
@@ -107,12 +107,10 @@ class GraniteReasoningParser(ReasoningParser):
Union[DeltaMessage, None]
DeltaMessage with either reasoning content or content, or None.
"""
- reasoning_content, resp_seq_len, content = self._get_content_sections(
- current_text
- )
+ reasoning, resp_seq_len, content = self._get_content_sections(current_text)
# Either we haven't finished the start of the reasoning sequence,
# or the model is generating something unexpected.
- if not reasoning_content:
+ if not reasoning:
delta_message = self._get_delta_message_with_no_reasoning_bounds(
current_text, delta_text
)
@@ -120,16 +118,16 @@ class GraniteReasoningParser(ReasoningParser):
# the start of response sequence.
elif not content:
delta_message = self._get_delta_message_with_no_response_bounds(
- current_text, reasoning_content, delta_text
+ current_text, reasoning, delta_text
)
# We've finished both the start of reasoning and start of response seq.
else:
# This should never happen since we matched on the response
assert resp_seq_len is not None
delta_message = self._get_delta_message_with_both_bounds(
- delta_text, reasoning_content, content, current_text, resp_seq_len
+ delta_text, reasoning, content, current_text, resp_seq_len
)
- if not delta_message.content and not delta_message.reasoning_content:
+ if not delta_message.content and not delta_message.reasoning:
return None
return delta_message
@@ -185,20 +183,20 @@ class GraniteReasoningParser(ReasoningParser):
# message and append everything to content in the future.
if was_substr and not is_substr:
return DeltaMessage(
- reasoning_content=None,
+ reasoning=None,
content=current_text,
)
if is_substr:
# Might still be in the special token sequence; return nothing
- return DeltaMessage(reasoning_content=None, content=None)
+ return DeltaMessage(reasoning=None, content=None)
# Otherwise the sequence has already been broken and we already
# corrected; just return the delta text as normal content.
- return DeltaMessage(reasoning_content=None, content=delta_text)
+ return DeltaMessage(reasoning=None, content=delta_text)
def _get_delta_message_with_no_response_bounds(
self,
current_text: str,
- reasoning_content: str,
+ reasoning: str,
delta_text: str,
) -> DeltaMessage:
"""Parse the delta message when the current text has both reasoning
@@ -208,7 +206,7 @@ class GraniteReasoningParser(ReasoningParser):
Args:
current_text (str): The full previous + delta text.
- reasoning_content (str): reasoning content from current_text.
+ reasoning (str): reasoning content from current_text.
delta_text (str): Text to consider and parse content from.
Returns:
@@ -222,12 +220,12 @@ class GraniteReasoningParser(ReasoningParser):
current_text.endswith(response_start)
for response_start in self.valid_response_starts
)
- if reasoning_content is None or ends_with_start_response_seq:
- return DeltaMessage(reasoning_content=None, content=None)
+ if reasoning is None or ends_with_start_response_seq:
+ return DeltaMessage(reasoning=None, content=None)
# Consider previous / current text only within context of the reasoning
- previous_text = reasoning_content[: -len(delta_text)]
- current_text = reasoning_content
+ previous_text = reasoning[: -len(delta_text)]
+ current_text = reasoning
# We need to be careful about adding unfinished response sequences;
# Find the place at which we MIGHT be starting a response sequence
@@ -253,32 +251,30 @@ class GraniteReasoningParser(ReasoningParser):
# Delta only contains potential continued response sequence text.
if delta_continues_substr:
- return DeltaMessage(reasoning_content=None, content=None)
+ return DeltaMessage(reasoning=None, content=None)
if not prev_was_substr:
# Delta may be starting a new response seq but has other text too.
if delta_new_substr:
- return DeltaMessage(
- reasoning_content=delta_text[:delta_idx], content=None
- )
+ return DeltaMessage(reasoning=delta_text[:delta_idx], content=None)
# Normal case for most reasoning text (no potential special seqs).
- return DeltaMessage(reasoning_content=delta_text, content=None)
+ return DeltaMessage(reasoning=delta_text, content=None)
# The substring that previously seemed to be a potential response
# seq wasn't one; we need to add the content to the delta message,
# and also slice off the potential response sequence
elif delta_new_substr:
- reasoning_content = previous_text[prev_idx:] + delta_text[:delta_idx]
- return DeltaMessage(reasoning_content=reasoning_content, content=None)
+ reasoning = previous_text[prev_idx:] + delta_text[:delta_idx]
+ return DeltaMessage(reasoning=reasoning, content=None)
# No new substring yet, and we broke our old one; take the whole delta
return DeltaMessage(
- reasoning_content=previous_text[prev_idx:] + delta_text,
+ reasoning=previous_text[prev_idx:] + delta_text,
content=None,
)
def _get_delta_message_with_both_bounds(
self,
delta_text: str,
- reasoning_content: str,
+ reasoning: str,
response_content: str,
current_text: str,
response_seq_len: int,
@@ -288,7 +284,7 @@ class GraniteReasoningParser(ReasoningParser):
Args:
delta_text: Text to consider and parse content from.
- reasoning_content: reasoning content from current_text.
+ reasoning: reasoning content from current_text.
response_content: response content from current_text.
current_text: The full previous + delta text.
response_seq_len: Len of the complete response sequence used.
@@ -301,20 +297,20 @@ class GraniteReasoningParser(ReasoningParser):
reasoning_end_idx = len(delta_text) - (len(response_content) + response_seq_len)
if reasoning_end_idx < 0:
- delta_reasoning_content = None
+ delta_reasoning = None
else:
# Get the starting offset
- start_reasoning_content_idx = (
- len(reasoning_content) + response_seq_len + len(response_content) - 1
+ start_reasoning_idx = (
+ len(reasoning) + response_seq_len + len(response_content) - 1
)
delta_offset = len(current_text) - len(delta_text)
- start_offset = start_reasoning_content_idx - delta_offset
+ start_offset = start_reasoning_idx - delta_offset
if start_offset < 0:
start_offset = 0
- delta_reasoning_content = delta_text[start_offset:reasoning_end_idx]
+ delta_reasoning = delta_text[start_offset:reasoning_end_idx]
return DeltaMessage(
- reasoning_content=delta_reasoning_content,
+ reasoning=delta_reasoning,
content=delta_content,
)
@@ -333,7 +329,7 @@ class GraniteReasoningParser(ReasoningParser):
(if there is one) and the non-reasoning content.
"""
current_chunk_start = 0
- start_reasoning_content = None
+ start_reasoning = None
parsed_content = False
delimiter_idxs = [
idx
@@ -344,10 +340,10 @@ class GraniteReasoningParser(ReasoningParser):
for current_chunk_end in delimiter_idxs:
current_chunk = current_text[current_chunk_start:current_chunk_end]
# Check to see if the start of reasoning seq if complete
- if start_reasoning_content is None:
+ if start_reasoning is None:
for think_start in self.valid_think_starts:
if current_chunk == think_start[:-1]:
- start_reasoning_content = current_chunk_end + 1
+ start_reasoning = current_chunk_end + 1
current_chunk_start = current_chunk_end + 1
break
@@ -357,13 +353,11 @@ class GraniteReasoningParser(ReasoningParser):
if current_chunk[-len(response_start) + 1 :] == response_start[:-1]:
# Mark end of reasoning and start response content
# after the start of response sequence.
- end_reasoning_content = current_chunk_end - len(response_start)
- reasoning_content = current_text[
- start_reasoning_content:end_reasoning_content
- ]
+ end_reasoning = current_chunk_end - len(response_start)
+ reasoning = current_text[start_reasoning:end_reasoning]
response_content = current_text[current_chunk_end + 1 :]
- return reasoning_content, len(response_start), response_content
+ return reasoning, len(response_start), response_content
- if start_reasoning_content and not parsed_content:
- return current_text[start_reasoning_content:], None, None
+ if start_reasoning and not parsed_content:
+ return current_text[start_reasoning:], None, None
return None, None, None
diff --git a/vllm/reasoning/hunyuan_a13b_reasoning_parser.py b/vllm/reasoning/hunyuan_a13b_reasoning_parser.py
index 1a82068c2694..f297454f57ec 100644
--- a/vllm/reasoning/hunyuan_a13b_reasoning_parser.py
+++ b/vllm/reasoning/hunyuan_a13b_reasoning_parser.py
@@ -86,7 +86,7 @@ class HunyuanA13BReasoningParser(ReasoningParser):
# this id is not part of content, so just return [] here.
return []
- def extract_reasoning_content(
+ def extract_reasoning(
self, model_output: str, request: ChatCompletionRequest
) -> tuple[str | None, str | None]:
"""Extract the reasoning content & content sections, respectively.
@@ -104,27 +104,27 @@ class HunyuanA13BReasoningParser(ReasoningParser):
re_match = self.full_match_reasoning_regex.findall(model_output)
if re_match:
- reasoning_content, response_content = re_match[0]
- if len(reasoning_content) == 0:
- reasoning_content = None
+ reasoning, response_content = re_match[0]
+ if len(reasoning) == 0:
+ reasoning = None
if len(response_content) == 0:
response_content = None
- return reasoning_content, response_content
+ return reasoning, response_content
fallback_regex = self.half_match_reasoning_regex
fallback_match = fallback_regex.findall(model_output)
if fallback_match:
- reasoning_content, response_content = fallback_match[0]
+ reasoning, response_content = fallback_match[0]
if response_content.endswith(self.response_end_expr):
response_content = response_content[: -len(self.response_end_expr)]
- if len(reasoning_content) == 0:
- reasoning_content = None
+ if len(reasoning) == 0:
+ reasoning = None
if len(response_content) == 0:
response_content = None
- return reasoning_content, response_content
+ return reasoning, response_content
return None, model_output
@@ -140,7 +140,7 @@ class HunyuanA13BReasoningParser(ReasoningParser):
sub_idx += 1
return sub_idx == len(subsequence)
- def extract_reasoning_content_streaming(
+ def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
@@ -223,19 +223,15 @@ class HunyuanA13BReasoningParser(ReasoningParser):
# Return content based on current state
if self.current_state == "think":
- return DeltaMessage(
- reasoning_content=buffered_content, content=None
- )
+ return DeltaMessage(reasoning=buffered_content, content=None)
else:
- return DeltaMessage(
- reasoning_content=None, content=buffered_content
- )
+ return DeltaMessage(reasoning=None, content=buffered_content)
else:
# No buffered content, send normally
if self.current_state == "think":
- return DeltaMessage(reasoning_content=delta_text, content=None)
+ return DeltaMessage(reasoning=delta_text, content=None)
else:
- return DeltaMessage(reasoning_content=None, content=delta_text)
+ return DeltaMessage(reasoning=None, content=delta_text)
# If no content to send in this delta
return None
diff --git a/vllm/reasoning/identity_reasoning_parser.py b/vllm/reasoning/identity_reasoning_parser.py
index f1d17a71be33..e92f8add0391 100644
--- a/vllm/reasoning/identity_reasoning_parser.py
+++ b/vllm/reasoning/identity_reasoning_parser.py
@@ -36,7 +36,7 @@ class IdentityReasoningParser(ReasoningParser):
# Identity: return all tokens as content
return input_ids
- def extract_reasoning_content_streaming(
+ def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
@@ -50,9 +50,9 @@ class IdentityReasoningParser(ReasoningParser):
return DeltaMessage(content=delta_text)
return None
- def extract_reasoning_content(
+ def extract_reasoning(
self, model_output: str, request: ChatCompletionRequest
) -> tuple[str | None, str | None]:
- # No reasoning separation: return None for reasoning_content,
+ # No reasoning separation: return None for reasoning,
# and full model_output as content
return None, model_output
diff --git a/vllm/reasoning/minimax_m2_reasoning_parser.py b/vllm/reasoning/minimax_m2_reasoning_parser.py
index 440b2b6e2fc2..30f5f2f88caf 100644
--- a/vllm/reasoning/minimax_m2_reasoning_parser.py
+++ b/vllm/reasoning/minimax_m2_reasoning_parser.py
@@ -48,7 +48,7 @@ class MiniMaxM2AppendThinkReasoningParser(ReasoningParser):
def extract_content_ids(self, input_ids: list[int]) -> list[int]:
return input_ids
- def extract_reasoning_content_streaming(
+ def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
@@ -61,7 +61,7 @@ class MiniMaxM2AppendThinkReasoningParser(ReasoningParser):
delta_text = "" + delta_text
return DeltaMessage(content=delta_text)
- def extract_reasoning_content(
+ def extract_reasoning(
self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
) -> tuple[str | None, str | None]:
return None, "" + model_output
diff --git a/vllm/reasoning/olmo3_reasoning_parser.py b/vllm/reasoning/olmo3_reasoning_parser.py
index 91512a2e34c7..7149f8c4123b 100644
--- a/vllm/reasoning/olmo3_reasoning_parser.py
+++ b/vllm/reasoning/olmo3_reasoning_parser.py
@@ -115,7 +115,7 @@ class Olmo3ReasoningBuffer:
if end_think_idx > 0:
# this covers the case there's content before
# the end of the reasoning block
- return DeltaMessage(reasoning_content=pretext)
+ return DeltaMessage(reasoning=pretext)
if self.state == Olmo3ReasoningState.REASONING:
# we are inside reasoning block, return and empty
@@ -124,7 +124,7 @@ class Olmo3ReasoningBuffer:
text_buffer,
self.buffer,
) = self.buffer, ""
- return DeltaMessage(reasoning_content=text_buffer)
+ return DeltaMessage(reasoning=text_buffer)
if self.state == Olmo3ReasoningState.CONTENT:
# we are outside reasoning block, return and empty
@@ -250,7 +250,7 @@ class Olmo3ReasoningParser(ReasoningParser):
# this id is not part of content, so just return [] here.
return []
- def extract_reasoning_content(
+ def extract_reasoning(
self,
model_output: str,
request: ChatCompletionRequest | ResponsesRequest,
@@ -271,14 +271,14 @@ class Olmo3ReasoningParser(ReasoningParser):
re_match = self.reasoning_regex.match(model_output)
if re_match:
- reasoning_content = re_match.group("reasoning") or None
+ reasoning = re_match.group("reasoning") or None
content = re_match.group("content") or None
- return reasoning_content, content
+ return reasoning, content
# no reasoning content
return None, model_output
- def extract_reasoning_content_streaming(
+ def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
diff --git a/vllm/reasoning/qwen3_reasoning_parser.py b/vllm/reasoning/qwen3_reasoning_parser.py
index 254f0e2e066b..ef7762bf0af5 100644
--- a/vllm/reasoning/qwen3_reasoning_parser.py
+++ b/vllm/reasoning/qwen3_reasoning_parser.py
@@ -27,7 +27,7 @@ class Qwen3ReasoningParser(BaseThinkingReasoningParser):
"""The token that ends reasoning content."""
return ""
- def extract_reasoning_content(
+ def extract_reasoning(
self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
) -> tuple[str | None, str | None]:
"""
@@ -37,7 +37,7 @@ class Qwen3ReasoningParser(BaseThinkingReasoningParser):
to be present, unlike other models that work with just the end token.
For text abcxyz:
- - 'abc' goes to reasoning_content
+ - 'abc' goes to reasoning
- 'xyz' goes to content
Returns:
@@ -61,7 +61,7 @@ class Qwen3ReasoningParser(BaseThinkingReasoningParser):
return None, model_output
# Extract reasoning content from the model output.
- reasoning_content, _, content = model_output.partition(self.end_token)
+ reasoning, _, content = model_output.partition(self.end_token)
final_content = content or None
- return reasoning_content, final_content
+ return reasoning, final_content
diff --git a/vllm/reasoning/step3_reasoning_parser.py b/vllm/reasoning/step3_reasoning_parser.py
index 202da057b028..f635758a92c0 100644
--- a/vllm/reasoning/step3_reasoning_parser.py
+++ b/vllm/reasoning/step3_reasoning_parser.py
@@ -40,7 +40,7 @@ class Step3ReasoningParser(ReasoningParser):
"token in the tokenizer!"
)
- def extract_reasoning_content_streaming(
+ def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
@@ -54,7 +54,7 @@ class Step3ReasoningParser(ReasoningParser):
Handles streaming output where previous + delta = current.
Uses token IDs for faster processing.
For text "abcxyz":
- - 'abc' goes to reasoning_content
+ - 'abc' goes to reasoning
- 'xyz' goes to content
"""
# Skip single special token
@@ -64,10 +64,10 @@ class Step3ReasoningParser(ReasoningParser):
if self.think_end_token_id in delta_token_ids:
# in delta, extract reasoning content and remaining content
end_index = delta_text.find(self.think_end_token)
- reasoning_content = delta_text[:end_index]
+ reasoning = delta_text[:end_index]
content = delta_text[end_index + len(self.think_end_token) :]
return DeltaMessage(
- reasoning_content=reasoning_content,
+ reasoning=reasoning,
content=content if content else None,
)
elif self.think_end_token_id in previous_token_ids:
@@ -75,9 +75,9 @@ class Step3ReasoningParser(ReasoningParser):
return DeltaMessage(content=delta_text)
else:
# No seen yet, everything is reasoning
- return DeltaMessage(reasoning_content=delta_text)
+ return DeltaMessage(reasoning=delta_text)
- def extract_reasoning_content(
+ def extract_reasoning(
self, model_output: str, request: ChatCompletionRequest
) -> tuple[str | None, str | None]:
# Check if the model output contains the token
@@ -87,7 +87,7 @@ class Step3ReasoningParser(ReasoningParser):
else:
# Find the first occurrence of
end_index = model_output.find(self.think_end_token)
- reasoning_content = model_output[:end_index]
+ reasoning = model_output[:end_index]
# Content after token
content = model_output[end_index + len(self.think_end_token) :]
@@ -95,7 +95,7 @@ class Step3ReasoningParser(ReasoningParser):
if len(content) == 0:
content = None
- return reasoning_content, content
+ return reasoning, content
def is_reasoning_end(self, input_ids: list[int]) -> bool:
return self.think_end_token_id in input_ids
diff --git a/vllm/transformers_utils/chat_templates/template_minicpmv45.jinja b/vllm/transformers_utils/chat_templates/template_minicpmv45.jinja
index 661ebd1cf5c1..c73ae96f0c1d 100644
--- a/vllm/transformers_utils/chat_templates/template_minicpmv45.jinja
+++ b/vllm/transformers_utils/chat_templates/template_minicpmv45.jinja
@@ -30,18 +30,18 @@
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
{%- elif message.role == "assistant" %}
{%- set content = message.content %}
- {%- set reasoning_content = '' %}
- {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
- {%- set reasoning_content = message.reasoning_content %}
+ {%- set reasoning = '' %}
+ {%- if message.reasoning is defined and message.reasoning is not none %}
+ {%- set reasoning = message.reasoning %}
{%- else %}
{%- if '' in message.content %}
{%- set content = message.content.split('')[-1].lstrip('\n') %}
- {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set reasoning = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{%- if loop.index0 > ns.last_query_index %}
- {%- if loop.last or (not loop.last and reasoning_content) %}
- {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }}
+ {%- if loop.last or (not loop.last and reasoning) %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning.strip('\n') + '\n\n\n' + content.lstrip('\n') }}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py
index 34433484fc14..39198a1f3d81 100644
--- a/vllm/transformers_utils/tokenizers/mistral.py
+++ b/vllm/transformers_utils/tokenizers/mistral.py
@@ -121,8 +121,8 @@ def _prepare_apply_chat_template_tools_and_messages(
#
# [1]: https://github.com/mistralai/mistral-common/blob/f4a06998b75ed78bbf5aaf569590b772ea26c9f6/src/mistral_common/protocol/instruct/messages.py#L80
for message in messages:
- # Remove reasoning_content as unsupported by Mistral
- _ = message.pop("reasoning_content", None) # type: ignore
+ # Remove reasoning as unsupported by Mistral
+ _ = message.pop("reasoning", None) # type: ignore
# The Mistral client, in comparison to the OpenAI client, requires the
# "parameters" dict and the "description" string to be present