From 6eca337ce09e7dfa05ce57c4183ddb5d4488c85e Mon Sep 17 00:00:00 2001 From: Shintarou Okada Date: Thu, 24 Jul 2025 18:56:36 +0900 Subject: [PATCH] Replace `--expand-tools-even-if-tool-choice-none` with `--exclude-tools-when-tool-choice-none` for v0.10.0 (#20544) Signed-off-by: okada Signed-off-by: okada shintarou Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docs/features/tool_calling.md | 3 ++- vllm/entrypoints/openai/api_server.py | 2 ++ vllm/entrypoints/openai/cli_args.py | 3 +++ vllm/entrypoints/openai/serving_chat.py | 7 ++++++- 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/docs/features/tool_calling.md b/docs/features/tool_calling.md index ce74683a1620f..37d502ef9ce0a 100644 --- a/docs/features/tool_calling.md +++ b/docs/features/tool_calling.md @@ -103,7 +103,8 @@ When tool_choice='required' is set, the model is guaranteed to generate one or m vLLM supports the `tool_choice='none'` option in the chat completion API. When this option is set, the model will not generate any tool calls and will respond with regular text content only, even if tools are defined in the request. -However, when `tool_choice='none'` is specified, vLLM includes tool definitions from the prompt. +!!! note + When tools are specified in the request, vLLM includes tool definitions in the prompt by default, regardless of the `tool_choice` setting. To exclude tool definitions when `tool_choice='none'`, use the `--exclude-tools-when-tool-choice-none` option. ## Automatic Function Calling diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index d4135519aa459..89e5e7ed8d3c7 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -1646,6 +1646,8 @@ async def init_app_state( chat_template_content_format=args.chat_template_content_format, return_tokens_as_token_ids=args.return_tokens_as_token_ids, enable_auto_tools=args.enable_auto_tool_choice, + exclude_tools_when_tool_choice_none=args. + exclude_tools_when_tool_choice_none, tool_parser=args.tool_call_parser, reasoning_parser=args.reasoning_parser, enable_prompt_tokens_details=args.enable_prompt_tokens_details, diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py index 3025a62636827..7f60fe7130277 100644 --- a/vllm/entrypoints/openai/cli_args.py +++ b/vllm/entrypoints/openai/cli_args.py @@ -133,6 +133,9 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`""" """If specified, API server will add X-Request-Id header to responses. Caution: this hurts performance at high QPS.""" enable_auto_tool_choice: bool = False + """If specified, exclude tool definitions in prompts when + tool_choice='none'.""" + exclude_tools_when_tool_choice_none: bool = False """Enable auto tool choice for supported models. Use `--tool-call-parser` to specify which parser to use.""" tool_call_parser: Optional[str] = None diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 33d80743420c1..832a3d501de07 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -63,6 +63,7 @@ class OpenAIServingChat(OpenAIServing): return_tokens_as_token_ids: bool = False, reasoning_parser: str = "", enable_auto_tools: bool = False, + exclude_tools_when_tool_choice_none: bool = False, tool_parser: Optional[str] = None, enable_prompt_tokens_details: bool = False, enable_force_include_usage: bool = False, @@ -111,6 +112,8 @@ class OpenAIServingChat(OpenAIServing): raise TypeError("Error: --enable-auto-tool-choice requires " f"tool_parser:'{tool_parser}' which has not " "been registered") from e + self.exclude_tools_when_tool_choice_none = ( + exclude_tools_when_tool_choice_none) self.enable_prompt_tokens_details = enable_prompt_tokens_details self.enable_force_include_usage = enable_force_include_usage @@ -174,7 +177,9 @@ class OpenAIServingChat(OpenAIServing): "--enable-auto-tool-choice and --tool-call-parser to be set" ) - if request.tools is None: + if (request.tools is None + or (request.tool_choice == "none" + and self.exclude_tools_when_tool_choice_none)): tool_dicts = None else: tool_dicts = [tool.model_dump() for tool in request.tools]