[Feature] Support multiple api keys in server (#18548)

Signed-off-by: Yan Pashkovsky <yanp.bugz@gmail.com>
This commit is contained in:
Yan Pashkovsky 2025-07-30 15:03:23 +01:00 committed by GitHub
parent da3e0bd6e5
commit bf668b5bf5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 30 additions and 29 deletions

View File

@ -126,6 +126,7 @@ curl http://localhost:8000/v1/models
``` ```
You can pass in the argument `--api-key` or environment variable `VLLM_API_KEY` to enable the server to check for API key in the header. You can pass in the argument `--api-key` or environment variable `VLLM_API_KEY` to enable the server to check for API key in the header.
You can pass multiple keys after `--api-key`, and the server will accept any of the keys passed, this can be useful for key rotation.
### OpenAI Completions API with vLLM ### OpenAI Completions API with vLLM

View File

@ -1239,9 +1239,9 @@ class AuthenticationMiddleware:
2. The request path doesn't start with /v1 (e.g. /health). 2. The request path doesn't start with /v1 (e.g. /health).
""" """
def __init__(self, app: ASGIApp, api_token: str) -> None: def __init__(self, app: ASGIApp, tokens: list[str]) -> None:
self.app = app self.app = app
self.api_token = api_token self.api_tokens = {f"Bearer {token}" for token in tokens}
def __call__(self, scope: Scope, receive: Receive, def __call__(self, scope: Scope, receive: Receive,
send: Send) -> Awaitable[None]: send: Send) -> Awaitable[None]:
@ -1255,7 +1255,7 @@ class AuthenticationMiddleware:
headers = Headers(scope=scope) headers = Headers(scope=scope)
# Type narrow to satisfy mypy. # Type narrow to satisfy mypy.
if url_path.startswith("/v1") and headers.get( if url_path.startswith("/v1") and headers.get(
"Authorization") != f"Bearer {self.api_token}": "Authorization") not in self.api_tokens:
response = JSONResponse(content={"error": "Unauthorized"}, response = JSONResponse(content={"error": "Unauthorized"},
status_code=401) status_code=401)
return response(scope, receive, send) return response(scope, receive, send)
@ -1303,7 +1303,7 @@ class ScalingMiddleware:
""" """
Middleware that checks if the model is currently scaling and Middleware that checks if the model is currently scaling and
returns a 503 Service Unavailable response if it is. returns a 503 Service Unavailable response if it is.
This middleware applies to all HTTP requests and prevents This middleware applies to all HTTP requests and prevents
processing when the model is in a scaling state. processing when the model is in a scaling state.
""" """
@ -1512,8 +1512,8 @@ def build_app(args: Namespace) -> FastAPI:
status_code=HTTPStatus.BAD_REQUEST) status_code=HTTPStatus.BAD_REQUEST)
# Ensure --api-key option from CLI takes precedence over VLLM_API_KEY # Ensure --api-key option from CLI takes precedence over VLLM_API_KEY
if token := args.api_key or envs.VLLM_API_KEY: if tokens := [key for key in (args.api_key or [envs.VLLM_API_KEY]) if key]:
app.add_middleware(AuthenticationMiddleware, api_token=token) app.add_middleware(AuthenticationMiddleware, tokens=tokens)
if args.enable_request_id_headers: if args.enable_request_id_headers:
app.add_middleware(XRequestIdMiddleware) app.add_middleware(XRequestIdMiddleware)

View File

@ -85,22 +85,22 @@ class FrontendArgs:
"""Allowed methods.""" """Allowed methods."""
allowed_headers: list[str] = field(default_factory=lambda: ["*"]) allowed_headers: list[str] = field(default_factory=lambda: ["*"])
"""Allowed headers.""" """Allowed headers."""
api_key: Optional[str] = None api_key: Optional[list[str]] = None
"""If provided, the server will require this key to be presented in the """If provided, the server will require one of these keys to be presented in
header.""" the header."""
lora_modules: Optional[list[LoRAModulePath]] = None lora_modules: Optional[list[LoRAModulePath]] = None
"""LoRA modules configurations in either 'name=path' format or JSON format """LoRA modules configurations in either 'name=path' format or JSON format
or JSON list format. Example (old format): `'name=path'` Example (new or JSON list format. Example (old format): `'name=path'` Example (new
format): `{\"name\": \"name\", \"path\": \"lora_path\", format): `{\"name\": \"name\", \"path\": \"lora_path\",
\"base_model_name\": \"id\"}`""" \"base_model_name\": \"id\"}`"""
chat_template: Optional[str] = None chat_template: Optional[str] = None
"""The file path to the chat template, or the template in single-line form """The file path to the chat template, or the template in single-line form
for the specified model.""" for the specified model."""
chat_template_content_format: ChatTemplateContentFormatOption = "auto" chat_template_content_format: ChatTemplateContentFormatOption = "auto"
"""The format to render message content within a chat template. """The format to render message content within a chat template.
* "string" will render the content as a string. Example: `"Hello World"` * "string" will render the content as a string. Example: `"Hello World"`
* "openai" will render the content as a list of dictionaries, similar to OpenAI * "openai" will render the content as a list of dictionaries, similar to OpenAI
schema. Example: `[{"type": "text", "text": "Hello world!"}]`""" schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
response_role: str = "assistant" response_role: str = "assistant"
"""The role name to return if `request.add_generation_prompt=true`.""" """The role name to return if `request.add_generation_prompt=true`."""
@ -117,40 +117,40 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
root_path: Optional[str] = None root_path: Optional[str] = None
"""FastAPI root_path when app is behind a path based routing proxy.""" """FastAPI root_path when app is behind a path based routing proxy."""
middleware: list[str] = field(default_factory=lambda: []) middleware: list[str] = field(default_factory=lambda: [])
"""Additional ASGI middleware to apply to the app. We accept multiple """Additional ASGI middleware to apply to the app. We accept multiple
--middleware arguments. The value should be an import path. If a function --middleware arguments. The value should be an import path. If a function
is provided, vLLM will add it to the server using is provided, vLLM will add it to the server using
`@app.middleware('http')`. If a class is provided, vLLM will `@app.middleware('http')`. If a class is provided, vLLM will
add it to the server using `app.add_middleware()`.""" add it to the server using `app.add_middleware()`."""
return_tokens_as_token_ids: bool = False return_tokens_as_token_ids: bool = False
"""When `--max-logprobs` is specified, represents single tokens as """When `--max-logprobs` is specified, represents single tokens as
strings of the form 'token_id:{token_id}' so that tokens that are not strings of the form 'token_id:{token_id}' so that tokens that are not
JSON-encodable can be identified.""" JSON-encodable can be identified."""
disable_frontend_multiprocessing: bool = False disable_frontend_multiprocessing: bool = False
"""If specified, will run the OpenAI frontend server in the same process as """If specified, will run the OpenAI frontend server in the same process as
the model serving engine.""" the model serving engine."""
enable_request_id_headers: bool = False enable_request_id_headers: bool = False
"""If specified, API server will add X-Request-Id header to responses. """If specified, API server will add X-Request-Id header to responses.
Caution: this hurts performance at high QPS.""" Caution: this hurts performance at high QPS."""
enable_auto_tool_choice: bool = False enable_auto_tool_choice: bool = False
"""If specified, exclude tool definitions in prompts when """If specified, exclude tool definitions in prompts when
tool_choice='none'.""" tool_choice='none'."""
exclude_tools_when_tool_choice_none: bool = False exclude_tools_when_tool_choice_none: bool = False
"""Enable auto tool choice for supported models. Use `--tool-call-parser` """Enable auto tool choice for supported models. Use `--tool-call-parser`
to specify which parser to use.""" to specify which parser to use."""
tool_call_parser: Optional[str] = None tool_call_parser: Optional[str] = None
"""Select the tool call parser depending on the model that you're using. """Select the tool call parser depending on the model that you're using.
This is used to parse the model-generated tool call into OpenAI API format. This is used to parse the model-generated tool call into OpenAI API format.
Required for `--enable-auto-tool-choice`. You can choose any option from Required for `--enable-auto-tool-choice`. You can choose any option from
the built-in parsers or register a plugin via `--tool-parser-plugin`.""" the built-in parsers or register a plugin via `--tool-parser-plugin`."""
tool_parser_plugin: str = "" tool_parser_plugin: str = ""
"""Special the tool parser plugin write to parse the model-generated tool """Special the tool parser plugin write to parse the model-generated tool
into OpenAI API format, the name register in this plugin can be used in into OpenAI API format, the name register in this plugin can be used in
`--tool-call-parser`.""" `--tool-call-parser`."""
log_config_file: Optional[str] = envs.VLLM_LOGGING_CONFIG_PATH log_config_file: Optional[str] = envs.VLLM_LOGGING_CONFIG_PATH
"""Path to logging config JSON file for both vllm and uvicorn""" """Path to logging config JSON file for both vllm and uvicorn"""
max_log_len: Optional[int] = None max_log_len: Optional[int] = None
"""Max number of prompt characters or prompt ID numbers being printed in """Max number of prompt characters or prompt ID numbers being printed in
log. The default of None means unlimited.""" log. The default of None means unlimited."""
disable_fastapi_docs: bool = False disable_fastapi_docs: bool = False
"""Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint.""" """Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."""