[Bugfix] fix tool_parser error handling when serve a model not support it (#8709)

2026-01-26 20:14:43 +08:00 · 2024-10-06 20:51:08 +08:00 · 2024-10-06 20:51:08 +08:00 · fdf59d30ea
commit fdf59d30ea
parent b22b798471
3 changed files with 31 additions and 10 deletions
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@ -302,10 +302,6 @@ class OpenAIServingChat(OpenAIServing):
        finish_reason_sent = [False] * num_choices
        num_prompt_tokens = 0

-        tool_parsers: List[Optional[ToolParser]] = [
-            self.tool_parser(tokenizer) if self.tool_parser else None
-        ] * num_choices
-
        if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam):
            tool_choice_function_name = request.tool_choice.function.name
        else:
@ -324,6 +320,21 @@ class OpenAIServingChat(OpenAIServing):
        else:
            previous_texts, all_previous_token_ids = None, None

+        # Prepare the tool parser if it's needed
+        try:
+            if tool_choice_auto and self.tool_parser:
+                tool_parsers: List[Optional[ToolParser]] = [
+                    self.tool_parser(tokenizer)
+                ] * num_choices
+            else:
+                tool_parsers = [None] * num_choices
+        except RuntimeError as e:
+            logger.error("Error in tool parser creation: %s", e)
+            data = self.create_streaming_error_response(str(e))
+            yield f"data: {data}\n\n"
+            yield "data: [DONE]\n\n"
+            return
+
        try:
            async for res in result_generator:
                if res.prompt_token_ids is not None:
@ -704,7 +715,12 @@ class OpenAIServingChat(OpenAIServing):
                    or request.tool_choice is None) and self.enable_auto_tools \
                    and self.tool_parser:

-                tool_parser = self.tool_parser(tokenizer)
+                try:
+                    tool_parser = self.tool_parser(tokenizer)
+                except RuntimeError as e:
+                    logger.error("Error in tool parser creation: %s", e)
+                    return self.create_error_response(str(e))
+
                tool_call_info = tool_parser.extract_tool_calls(
                    output.text, request=request)
                tools_called = tool_call_info.tools_called
--- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
@ -50,10 +50,10 @@ class Hermes2ProToolParser(ToolParser):
            raise ValueError(
                "The model tokenizer must be passed to the ToolParser "
                "constructor during construction.")
-        self.tool_call_start_token_id: int = self.model_tokenizer.vocab[
-            self.tool_call_start_token]
-        self.tool_call_end_token_id: int = self.model_tokenizer.vocab[
-            self.tool_call_end_token]
+        self.tool_call_start_token_id: int = self.model_tokenizer.vocab.get(
+            self.tool_call_start_token, None)
+        self.tool_call_end_token_id: int = self.model_tokenizer.vocab.get(
+            self.tool_call_end_token, None)
        if not self.tool_call_start_token_id or not self.tool_call_end_token_id:
            raise RuntimeError(
                "Hermes 2 Pro Tool parser could not locate tool call start/end "
--- a/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
@ -61,8 +61,13 @@ class MistralToolParser(ToolParser):
        self.streamed_args_for_tool: List[str] = [
        ]  # map what has been streamed for each tool so far to a list
        self.bot_token = "[TOOL_CALLS]"
-        self.bot_token_id = self.model_tokenizer.get_vocab()[self.bot_token]
+        self.bot_token_id = self.model_tokenizer.get_vocab().get(
+            self.bot_token, None)
        self.tool_call_regex = re.compile(r"\[{.*?}\]", re.DOTALL)
+        if not self.bot_token_id:
+            raise RuntimeError(
+                "Mistral Tool Parser could not locate the tool call token in "
+                "the tokenizer!")

    def extract_tool_calls(
        self,