From 6299628d326f429eba78736acb44e76749b281f5 Mon Sep 17 00:00:00 2001
From: "Rei." <56646027+JaviS-Rei@users.noreply.github.com>
Date: Thu, 11 Dec 2025 17:05:08 +0800
Subject: [PATCH] [bugfix] fix MiniMaxM2ReasoningParser streaming output not
separating reasoning_content. (#29882)
Signed-off-by: Rei <1477174254@qq.com>
---
...test_minimax_m2_append_reasoning_parser.py | 195 +++++++++++++++
.../test_minimax_m2_reasoning_parser.py | 230 ++++++++++++++++++
vllm/reasoning/minimax_m2_reasoning_parser.py | 43 ++++
3 files changed, 468 insertions(+)
create mode 100644 tests/reasoning/test_minimax_m2_append_reasoning_parser.py
create mode 100644 tests/reasoning/test_minimax_m2_reasoning_parser.py
diff --git a/tests/reasoning/test_minimax_m2_append_reasoning_parser.py b/tests/reasoning/test_minimax_m2_append_reasoning_parser.py
new file mode 100644
index 0000000000000..eefe5e3eff74c
--- /dev/null
+++ b/tests/reasoning/test_minimax_m2_append_reasoning_parser.py
@@ -0,0 +1,195 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+
+parser_name = "minimax_m2_append_think"
+end_token = ""
+
+# MiniMax M2 model path
+REASONING_MODEL_NAME = "MiniMaxAI/MiniMax-M2"
+
+
+@pytest.fixture(scope="module")
+def minimax_m2_tokenizer():
+ return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
+
+
+# =============================================================================
+# MiniMaxM2AppendThinkReasoningParser behavior:
+# - Prepends to the beginning of the output
+# - Does NOT separate reasoning and content
+# - Returns everything as content (with prepended)
+# - reasoning is always None
+#
+# This parser is used when you want to keep the raw output with added
+# =============================================================================
+
+# Case: simple output with end token
+SIMPLE_OUTPUT = {
+ "output": "This is reasoningThis is response",
+ "reasoning": None,
+ "content": "This is reasoningThis is response",
+ "is_reasoning_end": True,
+}
+
+# Case: output without end token (reasoning in progress)
+NO_END_TOKEN = {
+ "output": "This is reasoning in progress",
+ "reasoning": None,
+ "content": "This is reasoning in progress",
+ "is_reasoning_end": False,
+}
+
+# Case: only end token
+ONLY_END_TOKEN = {
+ "output": "This is response",
+ "reasoning": None,
+ "content": "This is response",
+ "is_reasoning_end": True,
+}
+
+# Case: multiple lines
+MULTIPLE_LINES = {
+ "output": "Line 1\nLine 2Response 1\nResponse 2",
+ "reasoning": None,
+ "content": "Line 1\nLine 2Response 1\nResponse 2",
+ "is_reasoning_end": True,
+}
+
+# Case: empty output (non-streaming prepends )
+EMPTY = {
+ "output": "",
+ "reasoning": None,
+ "content": "",
+ "is_reasoning_end": False,
+}
+
+# Case: empty output streaming (no tokens = no output)
+EMPTY_STREAMING = {
+ "output": "",
+ "reasoning": None,
+ "content": None,
+ "is_reasoning_end": False,
+}
+
+# Case: special characters
+SPECIAL_CHARS = {
+ "output": "Let me think... 1+1=2Yes!",
+ "reasoning": None,
+ "content": "Let me think... 1+1=2Yes!",
+ "is_reasoning_end": True,
+}
+
+# Case: code in output
+CODE_OUTPUT = {
+ "output": "```python\nprint('hi')\n```Here's the code.",
+ "reasoning": None,
+ "content": "```python\nprint('hi')\n```Here's the code.",
+ "is_reasoning_end": True,
+}
+
+TEST_CASES = [
+ pytest.param(
+ False,
+ SIMPLE_OUTPUT,
+ id="simple_output",
+ ),
+ pytest.param(
+ True,
+ SIMPLE_OUTPUT,
+ id="simple_output_streaming",
+ ),
+ pytest.param(
+ False,
+ NO_END_TOKEN,
+ id="no_end_token",
+ ),
+ pytest.param(
+ True,
+ NO_END_TOKEN,
+ id="no_end_token_streaming",
+ ),
+ pytest.param(
+ False,
+ ONLY_END_TOKEN,
+ id="only_end_token",
+ ),
+ pytest.param(
+ True,
+ ONLY_END_TOKEN,
+ id="only_end_token_streaming",
+ ),
+ pytest.param(
+ False,
+ MULTIPLE_LINES,
+ id="multiple_lines",
+ ),
+ pytest.param(
+ True,
+ MULTIPLE_LINES,
+ id="multiple_lines_streaming",
+ ),
+ pytest.param(
+ False,
+ EMPTY,
+ id="empty",
+ ),
+ pytest.param(
+ True,
+ EMPTY_STREAMING,
+ id="empty_streaming",
+ ),
+ pytest.param(
+ False,
+ SPECIAL_CHARS,
+ id="special_chars",
+ ),
+ pytest.param(
+ True,
+ SPECIAL_CHARS,
+ id="special_chars_streaming",
+ ),
+ pytest.param(
+ False,
+ CODE_OUTPUT,
+ id="code_output",
+ ),
+ pytest.param(
+ True,
+ CODE_OUTPUT,
+ id="code_output_streaming",
+ ),
+]
+
+
+@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
+def test_reasoning(
+ streaming: bool,
+ param_dict: dict,
+ minimax_m2_tokenizer,
+):
+ output = minimax_m2_tokenizer.tokenize(param_dict["output"])
+ # decode everything to tokens
+ output_tokens: list[str] = [
+ minimax_m2_tokenizer.convert_tokens_to_string([token]) for token in output
+ ]
+ parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+ minimax_m2_tokenizer
+ )
+
+ reasoning, content = run_reasoning_extraction(
+ parser, output_tokens, streaming=streaming
+ )
+
+ assert reasoning == param_dict["reasoning"]
+ assert content == param_dict["content"]
+
+ # Test is_reasoning_end
+ output_ids = minimax_m2_tokenizer.convert_tokens_to_ids(output)
+ is_reasoning_end = parser.is_reasoning_end(output_ids)
+ assert is_reasoning_end == param_dict["is_reasoning_end"]
diff --git a/tests/reasoning/test_minimax_m2_reasoning_parser.py b/tests/reasoning/test_minimax_m2_reasoning_parser.py
new file mode 100644
index 0000000000000..0d1056894c6ae
--- /dev/null
+++ b/tests/reasoning/test_minimax_m2_reasoning_parser.py
@@ -0,0 +1,230 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+
+parser_name = "minimax_m2"
+end_token = ""
+
+# MiniMax M2 model path
+REASONING_MODEL_NAME = "MiniMaxAI/MiniMax-M2"
+
+
+@pytest.fixture(scope="module")
+def minimax_m2_tokenizer():
+ return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
+
+
+# =============================================================================
+# MiniMax M2 specific behavior:
+# - Model does NOT generate start token
+# - Model only generates end token
+# - All content before is reasoning
+# - All content after is the actual response (content)
+# =============================================================================
+
+# Case: reasoning + end token + content (typical case)
+SIMPLE_REASONING = {
+ "output": "This is a reasoning sectionThis is the rest",
+ "reasoning": "This is a reasoning section",
+ "content": "This is the rest",
+ "is_reasoning_end": True,
+}
+
+# Case: reasoning + end token only (no content after)
+COMPLETE_REASONING = {
+ "output": "This is a reasoning section",
+ "reasoning": "This is a reasoning section",
+ "content": None,
+ "is_reasoning_end": True,
+}
+
+# Case: no end token yet (streaming in progress, all is reasoning)
+NO_END_TOKEN = {
+ "output": "This is reasoning in progress",
+ "reasoning": "This is reasoning in progress",
+ "content": None,
+ "is_reasoning_end": False,
+}
+
+# Case: multiple lines of reasoning
+MULTIPLE_LINES = {
+ "output": "First line\nSecond lineResponse first line\nResponse second",
+ "reasoning": "First line\nSecond line",
+ "content": "Response first line\nResponse second",
+ "is_reasoning_end": True,
+}
+
+# Case: only end token (empty reasoning, immediate response)
+SHORTEST_REASONING_NO_STREAMING = {
+ "output": "This is the response",
+ "reasoning": "",
+ "content": "This is the response",
+ "is_reasoning_end": True,
+}
+
+# Case: only end token streaming (reasoning is None because it's just the token)
+SHORTEST_REASONING_STREAMING = {
+ "output": "This is the response",
+ "reasoning": None,
+ "content": "This is the response",
+ "is_reasoning_end": True,
+}
+
+# Case: empty output
+EMPTY = {
+ "output": "",
+ "reasoning": "",
+ "content": None,
+ "is_reasoning_end": False,
+}
+
+# Case: empty streaming
+EMPTY_STREAMING = {
+ "output": "",
+ "reasoning": None,
+ "content": None,
+ "is_reasoning_end": False,
+}
+
+# Case: long reasoning with special characters
+SPECIAL_CHARS = {
+ "output": "Let me think... 1+1=2, right?Yes, 1+1=2.",
+ "reasoning": "Let me think... 1+1=2, right?",
+ "content": "Yes, 1+1=2.",
+ "is_reasoning_end": True,
+}
+
+# Case: reasoning with code blocks
+CODE_IN_REASONING = {
+ "output": "```python\nprint('hello')\n```Here is the code.",
+ "reasoning": "```python\nprint('hello')\n```",
+ "content": "Here is the code.",
+ "is_reasoning_end": True,
+}
+
+TEST_CASES = [
+ # Core cases: no start token (MiniMax M2 actual behavior)
+ pytest.param(
+ False,
+ SIMPLE_REASONING,
+ id="simple_reasoning",
+ ),
+ pytest.param(
+ True,
+ SIMPLE_REASONING,
+ id="simple_reasoning_streaming",
+ ),
+ pytest.param(
+ False,
+ COMPLETE_REASONING,
+ id="complete_reasoning",
+ ),
+ pytest.param(
+ True,
+ COMPLETE_REASONING,
+ id="complete_reasoning_streaming",
+ ),
+ pytest.param(
+ False,
+ NO_END_TOKEN,
+ id="no_end_token",
+ ),
+ pytest.param(
+ True,
+ NO_END_TOKEN,
+ id="no_end_token_streaming",
+ ),
+ pytest.param(
+ False,
+ MULTIPLE_LINES,
+ id="multiple_lines",
+ ),
+ pytest.param(
+ True,
+ MULTIPLE_LINES,
+ id="multiple_lines_streaming",
+ ),
+ pytest.param(
+ False,
+ SHORTEST_REASONING_NO_STREAMING,
+ id="shortest_reasoning",
+ ),
+ pytest.param(
+ True,
+ SHORTEST_REASONING_STREAMING,
+ id="shortest_reasoning_streaming",
+ ),
+ pytest.param(
+ False,
+ EMPTY,
+ id="empty",
+ ),
+ pytest.param(
+ True,
+ EMPTY_STREAMING,
+ id="empty_streaming",
+ ),
+ pytest.param(
+ False,
+ SPECIAL_CHARS,
+ id="special_chars",
+ ),
+ pytest.param(
+ True,
+ SPECIAL_CHARS,
+ id="special_chars_streaming",
+ ),
+ pytest.param(
+ False,
+ CODE_IN_REASONING,
+ id="code_in_reasoning",
+ ),
+ pytest.param(
+ True,
+ CODE_IN_REASONING,
+ id="code_in_reasoning_streaming",
+ ),
+]
+
+
+@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
+def test_reasoning(
+ streaming: bool,
+ param_dict: dict,
+ minimax_m2_tokenizer,
+):
+ output = minimax_m2_tokenizer.tokenize(param_dict["output"])
+ # decode everything to tokens
+ output_tokens: list[str] = [
+ minimax_m2_tokenizer.convert_tokens_to_string([token]) for token in output
+ ]
+ parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+ minimax_m2_tokenizer
+ )
+
+ reasoning, content = run_reasoning_extraction(
+ parser, output_tokens, streaming=streaming
+ )
+
+ assert reasoning == param_dict["reasoning"]
+ assert content == param_dict["content"]
+
+ # Test is_reasoning_end
+ output_ids = minimax_m2_tokenizer.convert_tokens_to_ids(output)
+ is_reasoning_end = parser.is_reasoning_end(output_ids)
+ assert is_reasoning_end == param_dict["is_reasoning_end"]
+
+ # Test extract_content
+ if param_dict["content"] is not None:
+ content = parser.extract_content_ids(output_ids)
+ assert content == minimax_m2_tokenizer.convert_tokens_to_ids(
+ minimax_m2_tokenizer.tokenize(param_dict["content"])
+ )
+ else:
+ content = parser.extract_content_ids(output)
+ assert content == []
diff --git a/vllm/reasoning/minimax_m2_reasoning_parser.py b/vllm/reasoning/minimax_m2_reasoning_parser.py
index 138d1b4e6dacf..a2b9224cb3bff 100644
--- a/vllm/reasoning/minimax_m2_reasoning_parser.py
+++ b/vllm/reasoning/minimax_m2_reasoning_parser.py
@@ -19,6 +19,10 @@ logger = init_logger(__name__)
class MiniMaxM2ReasoningParser(BaseThinkingReasoningParser):
"""
Reasoning parser for MiniMax M2 model.
+
+ MiniMax M2 models don't generate start token, only end
+ token. All content before is reasoning, content after is the
+ actual response.
"""
@property
@@ -31,6 +35,45 @@ class MiniMaxM2ReasoningParser(BaseThinkingReasoningParser):
"""The token that ends reasoning content."""
return ""
+ def extract_reasoning_streaming(
+ self,
+ previous_text: str,
+ current_text: str,
+ delta_text: str,
+ previous_token_ids: Sequence[int],
+ current_token_ids: Sequence[int],
+ delta_token_ids: Sequence[int],
+ ) -> DeltaMessage | None:
+ """
+ Extract reasoning content from a delta message for streaming.
+
+ MiniMax M2 models don't generate start token, so we assume
+ all content is reasoning until we encounter the end token.
+ """
+ # Skip single end token
+ if len(delta_token_ids) == 1 and delta_token_ids[0] == self.end_token_id:
+ return None
+
+ # Check if end token has already appeared in previous tokens
+ # meaning we're past the reasoning phase
+ if self.end_token_id in previous_token_ids:
+ # We're past the reasoning phase, this is content
+ return DeltaMessage(content=delta_text)
+
+ # Check if end token is in delta tokens
+ if self.end_token_id in delta_token_ids:
+ # End token in delta, split reasoning and content
+ end_index = delta_text.find(self.end_token)
+ reasoning = delta_text[:end_index]
+ content = delta_text[end_index + len(self.end_token) :]
+ return DeltaMessage(
+ reasoning=reasoning if reasoning else None,
+ content=content if content else None,
+ )
+
+ # No end token yet, all content is reasoning
+ return DeltaMessage(reasoning=delta_text)
+
class MiniMaxM2AppendThinkReasoningParser(ReasoningParser):
"""