mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-08 07:29:08 +08:00
[Frontend] add add_request_id middleware (#9594)
Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com>
This commit is contained in:
parent
9e37266420
commit
d88bff1b96
@ -62,6 +62,32 @@ completion = client.chat.completions.create(
|
|||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Extra HTTP Headers
|
||||||
|
|
||||||
|
Only `X-Request-Id` HTTP request header is supported for now.
|
||||||
|
|
||||||
|
```python
|
||||||
|
completion = client.chat.completions.create(
|
||||||
|
model="NousResearch/Meta-Llama-3-8B-Instruct",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"}
|
||||||
|
],
|
||||||
|
extra_headers={
|
||||||
|
"x-request-id": "sentiment-classification-00001",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
print(completion._request_id)
|
||||||
|
|
||||||
|
completion = client.completions.create(
|
||||||
|
model="NousResearch/Meta-Llama-3-8B-Instruct",
|
||||||
|
prompt="A robot may not injure a human being",
|
||||||
|
extra_headers={
|
||||||
|
"x-request-id": "completion-test",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
print(completion._request_id)
|
||||||
|
```
|
||||||
|
|
||||||
### Extra Parameters for Completions API
|
### Extra Parameters for Completions API
|
||||||
|
|
||||||
The following [sampling parameters (click through to see documentation)](../dev/sampling_params.rst) are supported.
|
The following [sampling parameters (click through to see documentation)](../dev/sampling_params.rst) are supported.
|
||||||
|
|||||||
@ -7,6 +7,7 @@ import re
|
|||||||
import signal
|
import signal
|
||||||
import socket
|
import socket
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import uuid
|
||||||
from argparse import Namespace
|
from argparse import Namespace
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from functools import partial
|
from functools import partial
|
||||||
@ -475,6 +476,13 @@ def build_app(args: Namespace) -> FastAPI:
|
|||||||
status_code=401)
|
status_code=401)
|
||||||
return await call_next(request)
|
return await call_next(request)
|
||||||
|
|
||||||
|
@app.middleware("http")
|
||||||
|
async def add_request_id(request: Request, call_next):
|
||||||
|
request_id = request.headers.get("X-Request-Id") or uuid.uuid4().hex
|
||||||
|
response = await call_next(request)
|
||||||
|
response.headers["X-Request-Id"] = request_id
|
||||||
|
return response
|
||||||
|
|
||||||
for middleware in args.middleware:
|
for middleware in args.middleware:
|
||||||
module_path, object_name = middleware.rsplit(".", 1)
|
module_path, object_name = middleware.rsplit(".", 1)
|
||||||
imported = getattr(importlib.import_module(module_path), object_name)
|
imported = getattr(importlib.import_module(module_path), object_name)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user