From 3c7fefdeba183e5c5e575f668b797549530f5a3d Mon Sep 17 00:00:00 2001 From: Alec S <10566873+alecsolder@users.noreply.github.com> Date: Wed, 29 Oct 2025 05:42:44 -0400 Subject: [PATCH] [Frontend] [gpt-oss] Tool json call parsing error retry (#27675) Signed-off-by: Jialin Ouyang Signed-off-by: Alec Solder Signed-off-by: Ye (Charlotte) Qi Co-authored-by: Jialin Ouyang Co-authored-by: Alec Solder Co-authored-by: Ye (Charlotte) Qi --- vllm/entrypoints/context.py | 39 +++++++++++++++++++++++++++++-- vllm/entrypoints/harmony_utils.py | 19 ++++++++++++++- vllm/envs.py | 7 ++++++ 3 files changed, 62 insertions(+), 3 deletions(-) diff --git a/vllm/entrypoints/context.py b/vllm/entrypoints/context.py index 8886d7c42d8a..0041db822080 100644 --- a/vllm/entrypoints/context.py +++ b/vllm/entrypoints/context.py @@ -11,6 +11,7 @@ from typing import TYPE_CHECKING, Union from openai.types.responses.tool import Mcp from openai_harmony import Author, Message, Role, StreamState, TextContent +from vllm import envs from vllm.entrypoints.harmony_utils import ( get_encoding, get_streamable_parser_for_assistant, @@ -109,6 +110,28 @@ class ConversationContext(ABC): raise NotImplementedError("Should not be called.") +def _create_json_parse_error_messages( + last_msg: Message, e: json.JSONDecodeError +) -> list[Message]: + """ + Creates an error message when json parse failed. + """ + error_msg = ( + f"Error parsing tool arguments as JSON: {str(e)}. " + "Please ensure the tool call arguments are valid JSON and try again." + ) + content = TextContent(text=error_msg) + author = Author(role=Role.TOOL, name=last_msg.recipient) + return [ + Message( + author=author, + content=[content], + recipient=Role.ASSISTANT, + channel=last_msg.channel, + ) + ] + + class SimpleContext(ConversationContext): def __init__(self): self.last_output = None @@ -339,7 +362,13 @@ class HarmonyContext(ConversationContext): if isinstance(tool_session, Tool): return await tool_session.get_result(self) tool_name = last_msg.recipient.split(".")[1] - args = json.loads(last_msg.content[0].text) + if envs.VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY: + try: + args = json.loads(last_msg.content[0].text) + except json.JSONDecodeError as e: + return _create_json_parse_error_messages(last_msg, e) + else: + args = json.loads(last_msg.content[0].text) result = await tool_session.call_tool(tool_name, args) result_str = result.content[0].text content = TextContent(text=result_str) @@ -420,7 +449,13 @@ class HarmonyContext(ConversationContext): if isinstance(tool_session, Tool): return await tool_session.get_result(self) tool_name = last_msg.recipient.split(".")[1].split(" ")[0] - args = json.loads(last_msg.content[0].text) + if envs.VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY: + try: + args = json.loads(last_msg.content[0].text) + except json.JSONDecodeError as e: + return _create_json_parse_error_messages(last_msg, e) + else: + args = json.loads(last_msg.content[0].text) result = await tool_session.call_tool(tool_name, args) result_str = result.content[0].text content = TextContent(text=result_str) diff --git a/vllm/entrypoints/harmony_utils.py b/vllm/entrypoints/harmony_utils.py index 97f95a97ee30..8888a5aeb6b1 100644 --- a/vllm/entrypoints/harmony_utils.py +++ b/vllm/entrypoints/harmony_utils.py @@ -340,7 +340,24 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]: if len(message.content) != 1: raise ValueError("Invalid number of contents in browser message") content = message.content[0] - browser_call = json.loads(content.text) + # We do not need to check the VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY + # env variable since if it is not set, we are certain the json is valid + # The use of Actions for web search will be removed entirely in + # the future, so this is only necessary temporarily + try: + browser_call = json.loads(content.text) + except json.JSONDecodeError: + # If the content is not valid JSON, then it was + # caught and retried by vLLM, which means we + # need to make note of that so the user is aware + json_retry_output_message = ( + f"Invalid JSON args, caught and retried: {content.text}" + ) + browser_call = { + "query": json_retry_output_message, + "url": json_retry_output_message, + "pattern": json_retry_output_message, + } # TODO: translate to url properly! if recipient == "browser.search": action = ActionSearch( diff --git a/vllm/envs.py b/vllm/envs.py index 018af0e5bba8..0786d5d9ddcb 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -199,6 +199,7 @@ if TYPE_CHECKING: VLLM_ALLREDUCE_USE_SYMM_MEM: bool = False VLLM_TUNED_CONFIG_FOLDER: str | None = None VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False + VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY: bool = False VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False VLLM_NVTX_SCOPES_FOR_PROFILING: bool = False VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True @@ -1331,6 +1332,12 @@ environment_variables: dict[str, Callable[[], Any]] = { "VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS": lambda: bool( int(os.getenv("VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS", "0")) ), + # Enable automatic retry when tool call JSON parsing fails + # If enabled, returns an error message to the model to retry + # If disabled (default), raises an exception and fails the request + "VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY": lambda: bool( + int(os.getenv("VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY", "0")) + ), # Add optional custom scopes for profiling, disable to avoid overheads "VLLM_CUSTOM_SCOPES_FOR_PROFILING": lambda: bool( int(os.getenv("VLLM_CUSTOM_SCOPES_FOR_PROFILING", "0"))