From 20fda431515d19a883cc962d3a1fa727f225e82d Mon Sep 17 00:00:00 2001 From: Robin <863579016@qq.com> Date: Wed, 17 Dec 2025 16:37:57 +0800 Subject: [PATCH] [Bugfix][Frontend] Prevent IndexError in MiniMax M2 tool parser during streaming extraction (#30555) Signed-off-by: WangErXiao <863579016@qq.com> --- tests/tool_use/test_minimax_m2_tool_parser.py | 119 ++++++++++++++++++ vllm/tool_parsers/minimax_m2_tool_parser.py | 22 +++- 2 files changed, 137 insertions(+), 4 deletions(-) create mode 100644 tests/tool_use/test_minimax_m2_tool_parser.py diff --git a/tests/tool_use/test_minimax_m2_tool_parser.py b/tests/tool_use/test_minimax_m2_tool_parser.py new file mode 100644 index 0000000000000..cf1835b1928b4 --- /dev/null +++ b/tests/tool_use/test_minimax_m2_tool_parser.py @@ -0,0 +1,119 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import json + +import pytest + +from vllm.tool_parsers.minimax_m2_tool_parser import ( + MinimaxM2ToolParser, +) + +pytestmark = pytest.mark.cpu_test + + +class FakeTokenizer: + """Minimal fake tokenizer that exposes the attributes used by the + parser: a truthy model_tokenizer marker and a vocab mapping for the + special tokens. + """ + + def __init__(self): + self.model_tokenizer = True + # The parser will look up start/end tokens by their literal strings + self.vocab = { + "": 1, + "": 2, + } + + def get_vocab(self): + return self.vocab + + +@pytest.fixture +def minimax_m2_tool_parser(): + return MinimaxM2ToolParser(FakeTokenizer()) + + +def test_extract_tool_calls_streaming_incremental(minimax_m2_tool_parser): + parser = minimax_m2_tool_parser + parser._reset_streaming_state() + chunks = [ + "", + '', + '', + "Seattle", + "", + ] + previous = "" + for chunk in chunks: + current = previous + chunk + delta = chunk + parser.extract_tool_calls_streaming( + previous_text=previous, + current_text=current, + delta_text=delta, + previous_token_ids=[], + current_token_ids=[], + delta_token_ids=[], + request=None, + ) + previous = current + + assert len(parser.prev_tool_call_arr) == 1 + entry = parser.prev_tool_call_arr[0] + + assert entry["name"] == "get_weather" + args = entry["arguments"] + assert args["city"] == "Seattle" + + +def test_streaming_minimax_m2_multiple_invokes(minimax_m2_tool_parser): + parser = minimax_m2_tool_parser + parser._reset_streaming_state() + + chunks = [ + "", + '', + '', + '["technology", "events"]', + '', + '["OpenAI", "latest", "release"]', + "", + '', + '', + '["technology", "events"]', + '', + '["Gemini", "latest", "release"]', + "", + "", + ] + previous = "" + for chunk in chunks: + current = previous + chunk + delta = chunk + parser.extract_tool_calls_streaming( + previous_text=previous, + current_text=current, + delta_text=delta, + previous_token_ids=[], + current_token_ids=[], + delta_token_ids=[], + request=None, + ) + previous = current + + assert len(parser.prev_tool_call_arr) == 2 + + for entry, expect_model in zip(parser.prev_tool_call_arr, ["OpenAI", "Gemini"]): + assert entry["name"] == "search_web" + args = json.dumps(entry["arguments"]) + assert "technology" in args and "events" in args + assert expect_model in args + + # check streamed_args_for_tool for serving_chat.py + for index in range(2): + expected_call = parser.prev_tool_call_arr[index].get("arguments", {}) + expected_call = json.dumps(expected_call) + actual_call = parser.streamed_args_for_tool[index] + assert expected_call == actual_call diff --git a/vllm/tool_parsers/minimax_m2_tool_parser.py b/vllm/tool_parsers/minimax_m2_tool_parser.py index dcb2b64f6e73c..a1ab75f548bfc 100644 --- a/vllm/tool_parsers/minimax_m2_tool_parser.py +++ b/vllm/tool_parsers/minimax_m2_tool_parser.py @@ -122,6 +122,8 @@ class MinimaxM2ToolParser(ToolParser): self.streaming_request = None # Clear previous tool call history to avoid state pollution self.prev_tool_call_arr.clear() + # Reset streamed args tracking + self.streamed_args_for_tool.clear() def _extract_name(self, name_str: str) -> str: """Extract name from quoted string.""" @@ -421,9 +423,12 @@ class MinimaxM2ToolParser(ToolParser): self.prev_tool_call_arr.append( { "name": self.current_function_name, - "arguments": "{}", # Placeholder, will be updated later + "arguments": {}, # Placeholder, will be updated later } ) + # Initialize streamed_args_for_tool for this tool call + if len(self.streamed_args_for_tool) <= self.current_tool_index: + self.streamed_args_for_tool.append("") # Send header with function info return DeltaMessage( @@ -445,6 +450,9 @@ class MinimaxM2ToolParser(ToolParser): # Send opening brace if not sent yet if self.in_function and not self.json_started: self.json_started = True + # Update streamed_args_for_tool for opening brace + if self.current_tool_index < len(self.streamed_args_for_tool): + self.streamed_args_for_tool[self.current_tool_index] += "{" return DeltaMessage( tool_calls=[ DeltaToolCall( @@ -493,7 +501,7 @@ class MinimaxM2ToolParser(ToolParser): args = parsed_tool.function.arguments self.prev_tool_call_arr[self.current_tool_index][ "arguments" - ] = args + ] = json.loads(args) except Exception: pass # Ignore parsing errors during streaming @@ -505,7 +513,9 @@ class MinimaxM2ToolParser(ToolParser): ) ] ) - + # Update streamed_args_for_tool for closing brace + if self.current_tool_index < len(self.streamed_args_for_tool): + self.streamed_args_for_tool[self.current_tool_index] += "}" # Reset state for next tool self.json_closed = True self.in_function = False @@ -630,7 +640,11 @@ class MinimaxM2ToolParser(ToolParser): ) self.param_count += 1 - + # Update streamed_args_for_tool for this tool call + if self.current_tool_index < len(self.streamed_args_for_tool): + self.streamed_args_for_tool[self.current_tool_index] += ( + json_fragment + ) return DeltaMessage( tool_calls=[ DeltaToolCall(