[frontend] Refactor CLI Args for a better modular integration (#20206)

Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com>
2025-12-09 23:06:10 +08:00 · 2025-07-15 02:23:42 -07:00 · 2025-07-15 02:23:42 -07:00 · f148c44c6a
commit f148c44c6a
parent 235bfd5dfe
2 changed files with 167 additions and 212 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -166,7 +166,7 @@ repos:
    language: python
    types: [python]
    pass_filenames: true
-    files: vllm/config.py|tests/test_config.py
+    files: vllm/config.py|tests/test_config.py|vllm/entrypoints/openai/cli_args.py
  # Keep `suggestion` last
  - id: suggestion
    name: Suggestion
--- a/vllm/entrypoints/openai/cli_args.py
+++ b/vllm/entrypoints/openai/cli_args.py
@ -10,9 +10,13 @@ import argparse
 import json
 import ssl
 from collections.abc import Sequence
-from typing import Optional, Union, get_args
+from dataclasses import field
+from typing import Literal, Optional, Union
+
+from pydantic.dataclasses import dataclass

 import vllm.envs as envs
+from vllm.config import config
 from vllm.engine.arg_utils import AsyncEngineArgs, optional_type
 from vllm.entrypoints.chat_utils import (ChatTemplateContentFormatOption,
                                         validate_chat_template)
@ -82,220 +86,171 @@ class PromptAdapterParserAction(argparse.Action):
        setattr(namespace, self.dest, adapter_list)


+@config
+@dataclass
+class FrontendArgs:
+    """Arguments for the OpenAI-compatible frontend server."""
+    host: Optional[str] = None
+    """Host name."""
+    port: int = 8000
+    """Port number."""
+    uvicorn_log_level: Literal["debug", "info", "warning", "error", "critical",
+                               "trace"] = "info"
+    """Log level for uvicorn."""
+    disable_uvicorn_access_log: bool = False
+    """Disable uvicorn access log."""
+    allow_credentials: bool = False
+    """Allow credentials."""
+    allowed_origins: list[str] = field(default_factory=lambda: ["*"])
+    """Allowed origins."""
+    allowed_methods: list[str] = field(default_factory=lambda: ["*"])
+    """Allowed methods."""
+    allowed_headers: list[str] = field(default_factory=lambda: ["*"])
+    """Allowed headers."""
+    api_key: Optional[str] = None
+    """If provided, the server will require this key to be presented in the
+    header."""
+    lora_modules: Optional[list[LoRAModulePath]] = None
+    """LoRA modules configurations in either 'name=path' format or JSON format
+    or JSON list format. Example (old format): `'name=path'` Example (new 
+    format): `{\"name\": \"name\", \"path\": \"lora_path\", 
+    \"base_model_name\": \"id\"}`"""
+    prompt_adapters: Optional[list[PromptAdapterPath]] = None
+    """Prompt adapter configurations in the format name=path. Multiple adapters 
+    can be specified."""
+    chat_template: Optional[str] = None
+    """The file path to the chat template, or the template in single-line form 
+    for the specified model."""
+    chat_template_content_format: ChatTemplateContentFormatOption = "auto"
+    """The format to render message content within a chat template.
+
+* "string" will render the content as a string. Example: `"Hello World"`
+* "openai" will render the content as a list of dictionaries, similar to OpenAI 
+schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
+    response_role: str = "assistant"
+    """The role name to return if `request.add_generation_prompt=true`."""
+    ssl_keyfile: Optional[str] = None
+    """The file path to the SSL key file."""
+    ssl_certfile: Optional[str] = None
+    """The file path to the SSL cert file."""
+    ssl_ca_certs: Optional[str] = None
+    """The CA certificates file."""
+    enable_ssl_refresh: bool = False
+    """Refresh SSL Context when SSL certificate files change"""
+    ssl_cert_reqs: int = int(ssl.CERT_NONE)
+    """Whether client certificate is required (see stdlib ssl module's)."""
+    root_path: Optional[str] = None
+    """FastAPI root_path when app is behind a path based routing proxy."""
+    middleware: list[str] = field(default_factory=lambda: [])
+    """Additional ASGI middleware to apply to the app. We accept multiple 
+    --middleware arguments. The value should be an import path. If a function 
+    is provided, vLLM will add it to the server using 
+    `@app.middleware('http')`. If a class is provided, vLLM will 
+    add it to the server using `app.add_middleware()`."""
+    return_tokens_as_token_ids: bool = False
+    """When `--max-logprobs` is specified, represents single tokens as 
+    strings of the form 'token_id:{token_id}' so that tokens that are not 
+    JSON-encodable can be identified."""
+    disable_frontend_multiprocessing: bool = False
+    """If specified, will run the OpenAI frontend server in the same process as 
+    the model serving engine."""
+    enable_request_id_headers: bool = False
+    """If specified, API server will add X-Request-Id header to responses. 
+    Caution: this hurts performance at high QPS."""
+    enable_auto_tool_choice: bool = False
+    """Enable auto tool choice for supported models. Use `--tool-call-parser` 
+    to specify which parser to use."""
+    tool_call_parser: Optional[str] = None
+    """Select the tool call parser depending on the model that you're using. 
+    This is used to parse the model-generated tool call into OpenAI API format. 
+    Required for `--enable-auto-tool-choice`. You can choose any option from 
+    the built-in parsers or register a plugin via `--tool-parser-plugin`."""
+    tool_parser_plugin: str = ""
+    """Special the tool parser plugin write to parse the model-generated tool 
+    into OpenAI API format, the name register in this plugin can be used in 
+    `--tool-call-parser`."""
+    log_config_file: Optional[str] = envs.VLLM_LOGGING_CONFIG_PATH
+    """Path to logging config JSON file for both vllm and uvicorn"""
+    max_log_len: Optional[int] = None
+    """Max number of prompt characters or prompt ID numbers being printed in 
+    log. The default of None means unlimited."""
+    disable_fastapi_docs: bool = False
+    """Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."""
+    enable_prompt_tokens_details: bool = False
+    """If set to True, enable prompt_tokens_details in usage."""
+    enable_server_load_tracking: bool = False
+    """If set to True, enable tracking server_load_metrics in the app state."""
+    enable_force_include_usage: bool = False
+    """If set to True, including usage on every request."""
+    expand_tools_even_if_tool_choice_none: bool = False
+    """Include tool definitions in prompts even when `tool_choice='none'`.
+
+    This is a transitional option that will be removed in v0.10.0. In
+    v0.10.0, tool definitions will always be included regardless of
+    `tool_choice` setting. Use this flag to test the upcoming behavior
+    before the breaking change."""
+
+    @staticmethod
+    def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
+        from vllm.engine.arg_utils import get_kwargs
+
+        frontend_kwargs = get_kwargs(FrontendArgs)
+
+        # Special case: allowed_origins, allowed_methods, allowed_headers all
+        # need json.loads type
+        # Should also remove nargs
+        print(frontend_kwargs["allowed_origins"])
+        frontend_kwargs["allowed_origins"]["type"] = json.loads
+        frontend_kwargs["allowed_methods"]["type"] = json.loads
+        frontend_kwargs["allowed_headers"]["type"] = json.loads
+        del frontend_kwargs["allowed_origins"]["nargs"]
+        del frontend_kwargs["allowed_methods"]["nargs"]
+        del frontend_kwargs["allowed_headers"]["nargs"]
+
+        # Special case: LoRA modules need custom parser action and
+        # optional_type(str)
+        frontend_kwargs["lora_modules"]["type"] = optional_type(str)
+        frontend_kwargs["lora_modules"]["action"] = LoRAParserAction
+
+        # Special case: Prompt adapters need custom parser action and
+        # optional_type(str)
+        frontend_kwargs["prompt_adapters"]["type"] = optional_type(str)
+        frontend_kwargs["prompt_adapters"][
+            "action"] = PromptAdapterParserAction
+
+        # Special case: Middleware needs append action
+        frontend_kwargs["middleware"]["action"] = "append"
+
+        # Special case: Tool call parser shows built-in options.
+        valid_tool_parsers = list(ToolParserManager.tool_parsers.keys())
+        frontend_kwargs["tool_call_parser"]["choices"] = valid_tool_parsers
+
+        # Special case for expand-tools-even-if-tool-choice-none because of
+        # the deprecation field
+        frontend_kwargs["expand_tools_even_if_tool_choice_none"]\
+            ["deprecated"] = True
+
+        frontend_group = parser.add_argument_group(
+            title="Frontend",
+            description=FrontendArgs.__doc__,
+        )
+
+        for key, value in frontend_kwargs.items():
+            frontend_group.add_argument(f"--{key.replace('_', '-')}", **value)
+
+        return parser
+
+
 def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
-    parser.add_argument("--host",
-                        type=optional_type(str),
-                        default=None,
-                        help="Host name.")
-    parser.add_argument("--port", type=int, default=8000, help="Port number.")
-    parser.add_argument(
-        "--uvicorn-log-level",
-        type=str,
-        default="info",
-        choices=['debug', 'info', 'warning', 'error', 'critical', 'trace'],
-        help="Log level for uvicorn.")
-    parser.add_argument("--disable-uvicorn-access-log",
-                        action="store_true",
-                        help="Disable uvicorn access log.")
-    parser.add_argument("--allow-credentials",
-                        action="store_true",
-                        help="Allow credentials.")
-    parser.add_argument("--allowed-origins",
-                        type=json.loads,
-                        default=["*"],
-                        help="Allowed origins.")
-    parser.add_argument("--allowed-methods",
-                        type=json.loads,
-                        default=["*"],
-                        help="Allowed methods.")
-    parser.add_argument("--allowed-headers",
-                        type=json.loads,
-                        default=["*"],
-                        help="Allowed headers.")
-    parser.add_argument("--api-key",
-                        type=optional_type(str),
-                        default=None,
-                        help="If provided, the server will require this key "
-                        "to be presented in the header.")
-    parser.add_argument(
-        "--lora-modules",
-        type=optional_type(str),
-        default=None,
-        nargs='+',
-        action=LoRAParserAction,
-        help="LoRA module configurations in either 'name=path' format"
-        "or JSON format. "
-        "Example (old format): ``'name=path'`` "
-        "Example (new format): "
-        "``{\"name\": \"name\", \"path\": \"lora_path\", "
-        "\"base_model_name\": \"id\"}``")
-    parser.add_argument(
-        "--prompt-adapters",
-        type=optional_type(str),
-        default=None,
-        nargs='+',
-        action=PromptAdapterParserAction,
-        help="Prompt adapter configurations in the format name=path. "
-        "Multiple adapters can be specified.")
-    parser.add_argument("--chat-template",
-                        type=optional_type(str),
-                        default=None,
-                        help="The file path to the chat template, "
-                        "or the template in single-line form "
-                        "for the specified model.")
-    parser.add_argument(
-        '--chat-template-content-format',
-        type=str,
-        default="auto",
-        choices=get_args(ChatTemplateContentFormatOption),
-        help='The format to render message content within a chat template.'
-        '\n\n'
-        '* "string" will render the content as a string. '
-        'Example: ``"Hello World"``\n'
-        '* "openai" will render the content as a list of dictionaries, '
-        'similar to OpenAI schema. '
-        'Example: ``[{"type": "text", "text": "Hello world!"}]``')
-    parser.add_argument("--response-role",
-                        type=optional_type(str),
-                        default="assistant",
-                        help="The role name to return if "
-                        "``request.add_generation_prompt=true``.")
-    parser.add_argument("--ssl-keyfile",
-                        type=optional_type(str),
-                        default=None,
-                        help="The file path to the SSL key file.")
-    parser.add_argument("--ssl-certfile",
-                        type=optional_type(str),
-                        default=None,
-                        help="The file path to the SSL cert file.")
-    parser.add_argument("--ssl-ca-certs",
-                        type=optional_type(str),
-                        default=None,
-                        help="The CA certificates file.")
-    parser.add_argument(
-        "--enable-ssl-refresh",
-        action="store_true",
-        default=False,
-        help="Refresh SSL Context when SSL certificate files change")
-    parser.add_argument(
-        "--ssl-cert-reqs",
-        type=int,
-        default=int(ssl.CERT_NONE),
-        help="Whether client certificate is required (see stdlib ssl module's)."
-    )
-    parser.add_argument(
-        "--root-path",
-        type=optional_type(str),
-        default=None,
-        help="FastAPI root_path when app is behind a path based routing proxy."
-    )
-    parser.add_argument(
-        "--middleware",
-        type=optional_type(str),
-        action="append",
-        default=[],
-        help="Additional ASGI middleware to apply to the app. "
-        "We accept multiple --middleware arguments. "
-        "The value should be an import path. "
-        "If a function is provided, vLLM will add it to the server "
-        "using ``@app.middleware('http')``. "
-        "If a class is provided, vLLM will add it to the server "
-        "using ``app.add_middleware()``. ")
-    parser.add_argument(
-        "--return-tokens-as-token-ids",
-        action="store_true",
-        help="When ``--max-logprobs`` is specified, represents single tokens "
-        " as strings of the form 'token_id:{token_id}' so that tokens "
-        "that are not JSON-encodable can be identified.")
-    parser.add_argument(
-        "--disable-frontend-multiprocessing",
-        action="store_true",
-        help="If specified, will run the OpenAI frontend server in the same "
-        "process as the model serving engine.")
-    parser.add_argument(
-        "--enable-request-id-headers",
-        action="store_true",
-        help="If specified, API server will add X-Request-Id header to "
-        "responses.")
-    parser.add_argument(
-        "--enable-auto-tool-choice",
-        action="store_true",
-        default=False,
-        help="Enable auto tool choice for supported models. Use "
-        "``--tool-call-parser`` to specify which parser to use.")
-    parser.add_argument(
-        "--expand-tools-even-if-tool-choice-none",
-        action="store_true",
-        default=False,
-        deprecated=True,
-        help="Include tool definitions in prompts "
-        "even when tool_choice='none'. "
-        "This is a transitional option that will be removed in v0.10.0. "
-        "In v0.10.0, tool definitions will always be included regardless of "
-        "tool_choice setting. Use this flag now to test the new behavior "
-        "before the breaking change.")
-
-    valid_tool_parsers = ToolParserManager.tool_parsers.keys()
-    parser.add_argument(
-        "--tool-call-parser",
-        type=str,
-        metavar="{" + ",".join(valid_tool_parsers) + "} or name registered in "
-        "--tool-parser-plugin",
-        default=None,
-        help=
-        "Select the tool call parser depending on the model that you're using."
-        " This is used to parse the model-generated tool call into OpenAI API "
-        "format. Required for ``--enable-auto-tool-choice``.")
-
-    parser.add_argument(
-        "--tool-parser-plugin",
-        type=str,
-        default="",
-        help=
-        "Special the tool parser plugin write to parse the model-generated tool"
-        " into OpenAI API format, the name register in this plugin can be used "
-        "in ``--tool-call-parser``.")
-
-    parser.add_argument(
-        "--log-config-file",
-        type=str,
-        default=envs.VLLM_LOGGING_CONFIG_PATH,
-        help="Path to logging config JSON file for both vllm and uvicorn",
-    )
+    """Create the CLI argument parser used by the OpenAI API server.

+    We rely on the helper methods of `FrontendArgs` and `AsyncEngineArgs` to
+    register all arguments instead of manually enumerating them here. This
+    avoids code duplication and keeps the argument definitions in one place.
+    """
+    parser = FrontendArgs.add_cli_args(parser)
    parser = AsyncEngineArgs.add_cli_args(parser)

-    parser.add_argument('--max-log-len',
-                        type=int,
-                        default=None,
-                        help='Max number of prompt characters or prompt '
-                        'ID numbers being printed in log.'
-                        ' The default of None means unlimited.')
-
-    parser.add_argument(
-        "--disable-fastapi-docs",
-        action='store_true',
-        default=False,
-        help="Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."
-    )
-    parser.add_argument(
-        "--enable-prompt-tokens-details",
-        action='store_true',
-        default=False,
-        help="If set to True, enable prompt_tokens_details in usage.")
-    parser.add_argument(
-        "--enable-force-include-usage",
-        action='store_true',
-        default=False,
-        help="If set to True, including usage on every request.")
-    parser.add_argument(
-        "--enable-server-load-tracking",
-        action='store_true',
-        default=False,
-        help=
-        "If set to True, enable tracking server_load_metrics in the app state."
-    )
-
    return parser