From d88bff1b96c6f4c8abbd3d5ab4758bdc040f7b62 Mon Sep 17 00:00:00 2001 From: cjackal <44624812+cjackal@users.noreply.github.com> Date: Sat, 9 Nov 2024 19:18:29 +0900 Subject: [PATCH] [Frontend] add `add_request_id` middleware (#9594) Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com> --- .../serving/openai_compatible_server.md | 26 +++++++++++++++++++ vllm/entrypoints/openai/api_server.py | 8 ++++++ 2 files changed, 34 insertions(+) diff --git a/docs/source/serving/openai_compatible_server.md b/docs/source/serving/openai_compatible_server.md index a196f8b1e574e..9b29ca66022cb 100644 --- a/docs/source/serving/openai_compatible_server.md +++ b/docs/source/serving/openai_compatible_server.md @@ -62,6 +62,32 @@ completion = client.chat.completions.create( ) ``` +### Extra HTTP Headers + +Only `X-Request-Id` HTTP request header is supported for now. + +```python +completion = client.chat.completions.create( + model="NousResearch/Meta-Llama-3-8B-Instruct", + messages=[ + {"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"} + ], + extra_headers={ + "x-request-id": "sentiment-classification-00001", + } +) +print(completion._request_id) + +completion = client.completions.create( + model="NousResearch/Meta-Llama-3-8B-Instruct", + prompt="A robot may not injure a human being", + extra_headers={ + "x-request-id": "completion-test", + } +) +print(completion._request_id) +``` + ### Extra Parameters for Completions API The following [sampling parameters (click through to see documentation)](../dev/sampling_params.rst) are supported. diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 917b347ff1161..b8b7912742d45 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -7,6 +7,7 @@ import re import signal import socket import tempfile +import uuid from argparse import Namespace from contextlib import asynccontextmanager from functools import partial @@ -475,6 +476,13 @@ def build_app(args: Namespace) -> FastAPI: status_code=401) return await call_next(request) + @app.middleware("http") + async def add_request_id(request: Request, call_next): + request_id = request.headers.get("X-Request-Id") or uuid.uuid4().hex + response = await call_next(request) + response.headers["X-Request-Id"] = request_id + return response + for middleware in args.middleware: module_path, object_name = middleware.rsplit(".", 1) imported = getattr(importlib.import_module(module_path), object_name)