mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-03 10:31:19 +08:00
[1/N] API Server (Remove Proxy) (#11529)
This commit is contained in:
parent
b85a977822
commit
720b10fdc6
@ -585,12 +585,18 @@ def build_app(args: Namespace) -> FastAPI:
|
|||||||
status_code=401)
|
status_code=401)
|
||||||
return await call_next(request)
|
return await call_next(request)
|
||||||
|
|
||||||
@app.middleware("http")
|
if args.enable_request_id_headers:
|
||||||
async def add_request_id(request: Request, call_next):
|
logger.warning(
|
||||||
request_id = request.headers.get("X-Request-Id") or uuid.uuid4().hex
|
"CAUTION: Enabling X-Request-Id headers in the API Server. "
|
||||||
response = await call_next(request)
|
"This can harm performance at high QPS.")
|
||||||
response.headers["X-Request-Id"] = request_id
|
|
||||||
return response
|
@app.middleware("http")
|
||||||
|
async def add_request_id(request: Request, call_next):
|
||||||
|
request_id = request.headers.get(
|
||||||
|
"X-Request-Id") or uuid.uuid4().hex
|
||||||
|
response = await call_next(request)
|
||||||
|
response.headers["X-Request-Id"] = request_id
|
||||||
|
return response
|
||||||
|
|
||||||
for middleware in args.middleware:
|
for middleware in args.middleware:
|
||||||
module_path, object_name = middleware.rsplit(".", 1)
|
module_path, object_name = middleware.rsplit(".", 1)
|
||||||
|
|||||||
@ -196,7 +196,11 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="If specified, will run the OpenAI frontend server in the same "
|
help="If specified, will run the OpenAI frontend server in the same "
|
||||||
"process as the model serving engine.")
|
"process as the model serving engine.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--enable-request-id-headers",
|
||||||
|
action="store_true",
|
||||||
|
help="If specified, API server will add X-Request-Id header to "
|
||||||
|
"responses. Caution: this hurts performance at high QPS.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--enable-auto-tool-choice",
|
"--enable-auto-tool-choice",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user