From 3c7fefdeba183e5c5e575f668b797549530f5a3d Mon Sep 17 00:00:00 2001
From: Alec S <10566873+alecsolder@users.noreply.github.com>
Date: Wed, 29 Oct 2025 05:42:44 -0400
Subject: [PATCH] [Frontend] [gpt-oss] Tool json call parsing error retry
 (#27675)

Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
Signed-off-by: Alec Solder <alecs@fb.com>
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
Co-authored-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
Co-authored-by: Alec Solder <alecs@fb.com>
Co-authored-by: Ye (Charlotte) Qi <yeq@meta.com>
---
 vllm/entrypoints/context.py       | 39 +++++++++++++++++++++++++++++--
 vllm/entrypoints/harmony_utils.py | 19 ++++++++++++++-
 vllm/envs.py                      |  7 ++++++
 3 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/vllm/entrypoints/context.py b/vllm/entrypoints/context.py
index 8886d7c42d8a..0041db822080 100644
--- a/vllm/entrypoints/context.py
+++ b/vllm/entrypoints/context.py
@@ -11,6 +11,7 @@ from typing import TYPE_CHECKING, Union
 from openai.types.responses.tool import Mcp
 from openai_harmony import Author, Message, Role, StreamState, TextContent
 
+from vllm import envs
 from vllm.entrypoints.harmony_utils import (
     get_encoding,
     get_streamable_parser_for_assistant,
@@ -109,6 +110,28 @@ class ConversationContext(ABC):
         raise NotImplementedError("Should not be called.")
 
 
+def _create_json_parse_error_messages(
+    last_msg: Message, e: json.JSONDecodeError
+) -> list[Message]:
+    """
+    Creates an error message when json parse failed.
+    """
+    error_msg = (
+        f"Error parsing tool arguments as JSON: {str(e)}. "
+        "Please ensure the tool call arguments are valid JSON and try again."
+    )
+    content = TextContent(text=error_msg)
+    author = Author(role=Role.TOOL, name=last_msg.recipient)
+    return [
+        Message(
+            author=author,
+            content=[content],
+            recipient=Role.ASSISTANT,
+            channel=last_msg.channel,
+        )
+    ]
+
+
 class SimpleContext(ConversationContext):
     def __init__(self):
         self.last_output = None
@@ -339,7 +362,13 @@ class HarmonyContext(ConversationContext):
         if isinstance(tool_session, Tool):
             return await tool_session.get_result(self)
         tool_name = last_msg.recipient.split(".")[1]
-        args = json.loads(last_msg.content[0].text)
+        if envs.VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY:
+            try:
+                args = json.loads(last_msg.content[0].text)
+            except json.JSONDecodeError as e:
+                return _create_json_parse_error_messages(last_msg, e)
+        else:
+            args = json.loads(last_msg.content[0].text)
         result = await tool_session.call_tool(tool_name, args)
         result_str = result.content[0].text
         content = TextContent(text=result_str)
@@ -420,7 +449,13 @@ class HarmonyContext(ConversationContext):
         if isinstance(tool_session, Tool):
             return await tool_session.get_result(self)
         tool_name = last_msg.recipient.split(".")[1].split(" ")[0]
-        args = json.loads(last_msg.content[0].text)
+        if envs.VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY:
+            try:
+                args = json.loads(last_msg.content[0].text)
+            except json.JSONDecodeError as e:
+                return _create_json_parse_error_messages(last_msg, e)
+        else:
+            args = json.loads(last_msg.content[0].text)
         result = await tool_session.call_tool(tool_name, args)
         result_str = result.content[0].text
         content = TextContent(text=result_str)
diff --git a/vllm/entrypoints/harmony_utils.py b/vllm/entrypoints/harmony_utils.py
index 97f95a97ee30..8888a5aeb6b1 100644
--- a/vllm/entrypoints/harmony_utils.py
+++ b/vllm/entrypoints/harmony_utils.py
@@ -340,7 +340,24 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
         if len(message.content) != 1:
             raise ValueError("Invalid number of contents in browser message")
         content = message.content[0]
-        browser_call = json.loads(content.text)
+        # We do not need to check the VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY
+        # env variable since if it is not set, we are certain the json is valid
+        # The use of Actions for web search will be removed entirely in
+        # the future, so this is only necessary temporarily
+        try:
+            browser_call = json.loads(content.text)
+        except json.JSONDecodeError:
+            # If the content is not valid JSON, then it was
+            # caught and retried by vLLM, which means we
+            # need to make note of that so the user is aware
+            json_retry_output_message = (
+                f"Invalid JSON args, caught and retried: {content.text}"
+            )
+            browser_call = {
+                "query": json_retry_output_message,
+                "url": json_retry_output_message,
+                "pattern": json_retry_output_message,
+            }
         # TODO: translate to url properly!
         if recipient == "browser.search":
             action = ActionSearch(
diff --git a/vllm/envs.py b/vllm/envs.py
index 018af0e5bba8..0786d5d9ddcb 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -199,6 +199,7 @@ if TYPE_CHECKING:
     VLLM_ALLREDUCE_USE_SYMM_MEM: bool = False
     VLLM_TUNED_CONFIG_FOLDER: str | None = None
     VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False
+    VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY: bool = False
     VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False
     VLLM_NVTX_SCOPES_FOR_PROFILING: bool = False
     VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True
@@ -1331,6 +1332,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS": lambda: bool(
         int(os.getenv("VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS", "0"))
     ),
+    # Enable automatic retry when tool call JSON parsing fails
+    # If enabled, returns an error message to the model to retry
+    # If disabled (default), raises an exception and fails the request
+    "VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY": lambda: bool(
+        int(os.getenv("VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY", "0"))
+    ),
     # Add optional custom scopes for profiling, disable to avoid overheads
     "VLLM_CUSTOM_SCOPES_FOR_PROFILING": lambda: bool(
         int(os.getenv("VLLM_CUSTOM_SCOPES_FOR_PROFILING", "0"))