mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-20 02:26:59 +08:00
[Feature] Support multiple api keys in server (#18548)
Signed-off-by: Yan Pashkovsky <yanp.bugz@gmail.com>
This commit is contained in:
parent
da3e0bd6e5
commit
bf668b5bf5
@ -126,6 +126,7 @@ curl http://localhost:8000/v1/models
|
|||||||
```
|
```
|
||||||
|
|
||||||
You can pass in the argument `--api-key` or environment variable `VLLM_API_KEY` to enable the server to check for API key in the header.
|
You can pass in the argument `--api-key` or environment variable `VLLM_API_KEY` to enable the server to check for API key in the header.
|
||||||
|
You can pass multiple keys after `--api-key`, and the server will accept any of the keys passed, this can be useful for key rotation.
|
||||||
|
|
||||||
### OpenAI Completions API with vLLM
|
### OpenAI Completions API with vLLM
|
||||||
|
|
||||||
|
|||||||
@ -1239,9 +1239,9 @@ class AuthenticationMiddleware:
|
|||||||
2. The request path doesn't start with /v1 (e.g. /health).
|
2. The request path doesn't start with /v1 (e.g. /health).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, app: ASGIApp, api_token: str) -> None:
|
def __init__(self, app: ASGIApp, tokens: list[str]) -> None:
|
||||||
self.app = app
|
self.app = app
|
||||||
self.api_token = api_token
|
self.api_tokens = {f"Bearer {token}" for token in tokens}
|
||||||
|
|
||||||
def __call__(self, scope: Scope, receive: Receive,
|
def __call__(self, scope: Scope, receive: Receive,
|
||||||
send: Send) -> Awaitable[None]:
|
send: Send) -> Awaitable[None]:
|
||||||
@ -1255,7 +1255,7 @@ class AuthenticationMiddleware:
|
|||||||
headers = Headers(scope=scope)
|
headers = Headers(scope=scope)
|
||||||
# Type narrow to satisfy mypy.
|
# Type narrow to satisfy mypy.
|
||||||
if url_path.startswith("/v1") and headers.get(
|
if url_path.startswith("/v1") and headers.get(
|
||||||
"Authorization") != f"Bearer {self.api_token}":
|
"Authorization") not in self.api_tokens:
|
||||||
response = JSONResponse(content={"error": "Unauthorized"},
|
response = JSONResponse(content={"error": "Unauthorized"},
|
||||||
status_code=401)
|
status_code=401)
|
||||||
return response(scope, receive, send)
|
return response(scope, receive, send)
|
||||||
@ -1303,7 +1303,7 @@ class ScalingMiddleware:
|
|||||||
"""
|
"""
|
||||||
Middleware that checks if the model is currently scaling and
|
Middleware that checks if the model is currently scaling and
|
||||||
returns a 503 Service Unavailable response if it is.
|
returns a 503 Service Unavailable response if it is.
|
||||||
|
|
||||||
This middleware applies to all HTTP requests and prevents
|
This middleware applies to all HTTP requests and prevents
|
||||||
processing when the model is in a scaling state.
|
processing when the model is in a scaling state.
|
||||||
"""
|
"""
|
||||||
@ -1512,8 +1512,8 @@ def build_app(args: Namespace) -> FastAPI:
|
|||||||
status_code=HTTPStatus.BAD_REQUEST)
|
status_code=HTTPStatus.BAD_REQUEST)
|
||||||
|
|
||||||
# Ensure --api-key option from CLI takes precedence over VLLM_API_KEY
|
# Ensure --api-key option from CLI takes precedence over VLLM_API_KEY
|
||||||
if token := args.api_key or envs.VLLM_API_KEY:
|
if tokens := [key for key in (args.api_key or [envs.VLLM_API_KEY]) if key]:
|
||||||
app.add_middleware(AuthenticationMiddleware, api_token=token)
|
app.add_middleware(AuthenticationMiddleware, tokens=tokens)
|
||||||
|
|
||||||
if args.enable_request_id_headers:
|
if args.enable_request_id_headers:
|
||||||
app.add_middleware(XRequestIdMiddleware)
|
app.add_middleware(XRequestIdMiddleware)
|
||||||
|
|||||||
@ -85,22 +85,22 @@ class FrontendArgs:
|
|||||||
"""Allowed methods."""
|
"""Allowed methods."""
|
||||||
allowed_headers: list[str] = field(default_factory=lambda: ["*"])
|
allowed_headers: list[str] = field(default_factory=lambda: ["*"])
|
||||||
"""Allowed headers."""
|
"""Allowed headers."""
|
||||||
api_key: Optional[str] = None
|
api_key: Optional[list[str]] = None
|
||||||
"""If provided, the server will require this key to be presented in the
|
"""If provided, the server will require one of these keys to be presented in
|
||||||
header."""
|
the header."""
|
||||||
lora_modules: Optional[list[LoRAModulePath]] = None
|
lora_modules: Optional[list[LoRAModulePath]] = None
|
||||||
"""LoRA modules configurations in either 'name=path' format or JSON format
|
"""LoRA modules configurations in either 'name=path' format or JSON format
|
||||||
or JSON list format. Example (old format): `'name=path'` Example (new
|
or JSON list format. Example (old format): `'name=path'` Example (new
|
||||||
format): `{\"name\": \"name\", \"path\": \"lora_path\",
|
format): `{\"name\": \"name\", \"path\": \"lora_path\",
|
||||||
\"base_model_name\": \"id\"}`"""
|
\"base_model_name\": \"id\"}`"""
|
||||||
chat_template: Optional[str] = None
|
chat_template: Optional[str] = None
|
||||||
"""The file path to the chat template, or the template in single-line form
|
"""The file path to the chat template, or the template in single-line form
|
||||||
for the specified model."""
|
for the specified model."""
|
||||||
chat_template_content_format: ChatTemplateContentFormatOption = "auto"
|
chat_template_content_format: ChatTemplateContentFormatOption = "auto"
|
||||||
"""The format to render message content within a chat template.
|
"""The format to render message content within a chat template.
|
||||||
|
|
||||||
* "string" will render the content as a string. Example: `"Hello World"`
|
* "string" will render the content as a string. Example: `"Hello World"`
|
||||||
* "openai" will render the content as a list of dictionaries, similar to OpenAI
|
* "openai" will render the content as a list of dictionaries, similar to OpenAI
|
||||||
schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
|
schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
|
||||||
response_role: str = "assistant"
|
response_role: str = "assistant"
|
||||||
"""The role name to return if `request.add_generation_prompt=true`."""
|
"""The role name to return if `request.add_generation_prompt=true`."""
|
||||||
@ -117,40 +117,40 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
|
|||||||
root_path: Optional[str] = None
|
root_path: Optional[str] = None
|
||||||
"""FastAPI root_path when app is behind a path based routing proxy."""
|
"""FastAPI root_path when app is behind a path based routing proxy."""
|
||||||
middleware: list[str] = field(default_factory=lambda: [])
|
middleware: list[str] = field(default_factory=lambda: [])
|
||||||
"""Additional ASGI middleware to apply to the app. We accept multiple
|
"""Additional ASGI middleware to apply to the app. We accept multiple
|
||||||
--middleware arguments. The value should be an import path. If a function
|
--middleware arguments. The value should be an import path. If a function
|
||||||
is provided, vLLM will add it to the server using
|
is provided, vLLM will add it to the server using
|
||||||
`@app.middleware('http')`. If a class is provided, vLLM will
|
`@app.middleware('http')`. If a class is provided, vLLM will
|
||||||
add it to the server using `app.add_middleware()`."""
|
add it to the server using `app.add_middleware()`."""
|
||||||
return_tokens_as_token_ids: bool = False
|
return_tokens_as_token_ids: bool = False
|
||||||
"""When `--max-logprobs` is specified, represents single tokens as
|
"""When `--max-logprobs` is specified, represents single tokens as
|
||||||
strings of the form 'token_id:{token_id}' so that tokens that are not
|
strings of the form 'token_id:{token_id}' so that tokens that are not
|
||||||
JSON-encodable can be identified."""
|
JSON-encodable can be identified."""
|
||||||
disable_frontend_multiprocessing: bool = False
|
disable_frontend_multiprocessing: bool = False
|
||||||
"""If specified, will run the OpenAI frontend server in the same process as
|
"""If specified, will run the OpenAI frontend server in the same process as
|
||||||
the model serving engine."""
|
the model serving engine."""
|
||||||
enable_request_id_headers: bool = False
|
enable_request_id_headers: bool = False
|
||||||
"""If specified, API server will add X-Request-Id header to responses.
|
"""If specified, API server will add X-Request-Id header to responses.
|
||||||
Caution: this hurts performance at high QPS."""
|
Caution: this hurts performance at high QPS."""
|
||||||
enable_auto_tool_choice: bool = False
|
enable_auto_tool_choice: bool = False
|
||||||
"""If specified, exclude tool definitions in prompts when
|
"""If specified, exclude tool definitions in prompts when
|
||||||
tool_choice='none'."""
|
tool_choice='none'."""
|
||||||
exclude_tools_when_tool_choice_none: bool = False
|
exclude_tools_when_tool_choice_none: bool = False
|
||||||
"""Enable auto tool choice for supported models. Use `--tool-call-parser`
|
"""Enable auto tool choice for supported models. Use `--tool-call-parser`
|
||||||
to specify which parser to use."""
|
to specify which parser to use."""
|
||||||
tool_call_parser: Optional[str] = None
|
tool_call_parser: Optional[str] = None
|
||||||
"""Select the tool call parser depending on the model that you're using.
|
"""Select the tool call parser depending on the model that you're using.
|
||||||
This is used to parse the model-generated tool call into OpenAI API format.
|
This is used to parse the model-generated tool call into OpenAI API format.
|
||||||
Required for `--enable-auto-tool-choice`. You can choose any option from
|
Required for `--enable-auto-tool-choice`. You can choose any option from
|
||||||
the built-in parsers or register a plugin via `--tool-parser-plugin`."""
|
the built-in parsers or register a plugin via `--tool-parser-plugin`."""
|
||||||
tool_parser_plugin: str = ""
|
tool_parser_plugin: str = ""
|
||||||
"""Special the tool parser plugin write to parse the model-generated tool
|
"""Special the tool parser plugin write to parse the model-generated tool
|
||||||
into OpenAI API format, the name register in this plugin can be used in
|
into OpenAI API format, the name register in this plugin can be used in
|
||||||
`--tool-call-parser`."""
|
`--tool-call-parser`."""
|
||||||
log_config_file: Optional[str] = envs.VLLM_LOGGING_CONFIG_PATH
|
log_config_file: Optional[str] = envs.VLLM_LOGGING_CONFIG_PATH
|
||||||
"""Path to logging config JSON file for both vllm and uvicorn"""
|
"""Path to logging config JSON file for both vllm and uvicorn"""
|
||||||
max_log_len: Optional[int] = None
|
max_log_len: Optional[int] = None
|
||||||
"""Max number of prompt characters or prompt ID numbers being printed in
|
"""Max number of prompt characters or prompt ID numbers being printed in
|
||||||
log. The default of None means unlimited."""
|
log. The default of None means unlimited."""
|
||||||
disable_fastapi_docs: bool = False
|
disable_fastapi_docs: bool = False
|
||||||
"""Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."""
|
"""Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."""
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user