[Feature] Support multiple api keys in server (#18548)

Signed-off-by: Yan Pashkovsky <yanp.bugz@gmail.com>
This commit is contained in:
Yan Pashkovsky 2025-07-30 15:03:23 +01:00 committed by GitHub
parent da3e0bd6e5
commit bf668b5bf5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 30 additions and 29 deletions

View File

@ -126,6 +126,7 @@ curl http://localhost:8000/v1/models
```
You can pass in the argument `--api-key` or environment variable `VLLM_API_KEY` to enable the server to check for API key in the header.
You can pass multiple keys after `--api-key`, and the server will accept any of the keys passed, this can be useful for key rotation.
### OpenAI Completions API with vLLM

View File

@ -1239,9 +1239,9 @@ class AuthenticationMiddleware:
2. The request path doesn't start with /v1 (e.g. /health).
"""
def __init__(self, app: ASGIApp, api_token: str) -> None:
def __init__(self, app: ASGIApp, tokens: list[str]) -> None:
self.app = app
self.api_token = api_token
self.api_tokens = {f"Bearer {token}" for token in tokens}
def __call__(self, scope: Scope, receive: Receive,
send: Send) -> Awaitable[None]:
@ -1255,7 +1255,7 @@ class AuthenticationMiddleware:
headers = Headers(scope=scope)
# Type narrow to satisfy mypy.
if url_path.startswith("/v1") and headers.get(
"Authorization") != f"Bearer {self.api_token}":
"Authorization") not in self.api_tokens:
response = JSONResponse(content={"error": "Unauthorized"},
status_code=401)
return response(scope, receive, send)
@ -1303,7 +1303,7 @@ class ScalingMiddleware:
"""
Middleware that checks if the model is currently scaling and
returns a 503 Service Unavailable response if it is.
This middleware applies to all HTTP requests and prevents
processing when the model is in a scaling state.
"""
@ -1512,8 +1512,8 @@ def build_app(args: Namespace) -> FastAPI:
status_code=HTTPStatus.BAD_REQUEST)
# Ensure --api-key option from CLI takes precedence over VLLM_API_KEY
if token := args.api_key or envs.VLLM_API_KEY:
app.add_middleware(AuthenticationMiddleware, api_token=token)
if tokens := [key for key in (args.api_key or [envs.VLLM_API_KEY]) if key]:
app.add_middleware(AuthenticationMiddleware, tokens=tokens)
if args.enable_request_id_headers:
app.add_middleware(XRequestIdMiddleware)

View File

@ -85,22 +85,22 @@ class FrontendArgs:
"""Allowed methods."""
allowed_headers: list[str] = field(default_factory=lambda: ["*"])
"""Allowed headers."""
api_key: Optional[str] = None
"""If provided, the server will require this key to be presented in the
header."""
api_key: Optional[list[str]] = None
"""If provided, the server will require one of these keys to be presented in
the header."""
lora_modules: Optional[list[LoRAModulePath]] = None
"""LoRA modules configurations in either 'name=path' format or JSON format
or JSON list format. Example (old format): `'name=path'` Example (new
format): `{\"name\": \"name\", \"path\": \"lora_path\",
or JSON list format. Example (old format): `'name=path'` Example (new
format): `{\"name\": \"name\", \"path\": \"lora_path\",
\"base_model_name\": \"id\"}`"""
chat_template: Optional[str] = None
"""The file path to the chat template, or the template in single-line form
"""The file path to the chat template, or the template in single-line form
for the specified model."""
chat_template_content_format: ChatTemplateContentFormatOption = "auto"
"""The format to render message content within a chat template.
* "string" will render the content as a string. Example: `"Hello World"`
* "openai" will render the content as a list of dictionaries, similar to OpenAI
* "openai" will render the content as a list of dictionaries, similar to OpenAI
schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
response_role: str = "assistant"
"""The role name to return if `request.add_generation_prompt=true`."""
@ -117,40 +117,40 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
root_path: Optional[str] = None
"""FastAPI root_path when app is behind a path based routing proxy."""
middleware: list[str] = field(default_factory=lambda: [])
"""Additional ASGI middleware to apply to the app. We accept multiple
--middleware arguments. The value should be an import path. If a function
is provided, vLLM will add it to the server using
`@app.middleware('http')`. If a class is provided, vLLM will
"""Additional ASGI middleware to apply to the app. We accept multiple
--middleware arguments. The value should be an import path. If a function
is provided, vLLM will add it to the server using
`@app.middleware('http')`. If a class is provided, vLLM will
add it to the server using `app.add_middleware()`."""
return_tokens_as_token_ids: bool = False
"""When `--max-logprobs` is specified, represents single tokens as
strings of the form 'token_id:{token_id}' so that tokens that are not
"""When `--max-logprobs` is specified, represents single tokens as
strings of the form 'token_id:{token_id}' so that tokens that are not
JSON-encodable can be identified."""
disable_frontend_multiprocessing: bool = False
"""If specified, will run the OpenAI frontend server in the same process as
"""If specified, will run the OpenAI frontend server in the same process as
the model serving engine."""
enable_request_id_headers: bool = False
"""If specified, API server will add X-Request-Id header to responses.
"""If specified, API server will add X-Request-Id header to responses.
Caution: this hurts performance at high QPS."""
enable_auto_tool_choice: bool = False
"""If specified, exclude tool definitions in prompts when
"""If specified, exclude tool definitions in prompts when
tool_choice='none'."""
exclude_tools_when_tool_choice_none: bool = False
"""Enable auto tool choice for supported models. Use `--tool-call-parser`
"""Enable auto tool choice for supported models. Use `--tool-call-parser`
to specify which parser to use."""
tool_call_parser: Optional[str] = None
"""Select the tool call parser depending on the model that you're using.
This is used to parse the model-generated tool call into OpenAI API format.
Required for `--enable-auto-tool-choice`. You can choose any option from
"""Select the tool call parser depending on the model that you're using.
This is used to parse the model-generated tool call into OpenAI API format.
Required for `--enable-auto-tool-choice`. You can choose any option from
the built-in parsers or register a plugin via `--tool-parser-plugin`."""
tool_parser_plugin: str = ""
"""Special the tool parser plugin write to parse the model-generated tool
into OpenAI API format, the name register in this plugin can be used in
"""Special the tool parser plugin write to parse the model-generated tool
into OpenAI API format, the name register in this plugin can be used in
`--tool-call-parser`."""
log_config_file: Optional[str] = envs.VLLM_LOGGING_CONFIG_PATH
"""Path to logging config JSON file for both vllm and uvicorn"""
max_log_len: Optional[int] = None
"""Max number of prompt characters or prompt ID numbers being printed in
"""Max number of prompt characters or prompt ID numbers being printed in
log. The default of None means unlimited."""
disable_fastapi_docs: bool = False
"""Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."""