diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 374e43fb15341..1e735da641df9 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -24,6 +24,7 @@ from fastapi import APIRouter, Depends, FastAPI, Form, HTTPException, Request from fastapi.exceptions import RequestValidationError from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, Response, StreamingResponse +from starlette.concurrency import iterate_in_threadpool from starlette.datastructures import State from starlette.routing import Mount from typing_extensions import assert_never @@ -846,6 +847,21 @@ def build_app(args: Namespace) -> FastAPI: response.headers["X-Request-Id"] = request_id return response + if envs.VLLM_DEBUG_LOG_API_SERVER_RESPONSE: + logger.warning("CAUTION: Enabling log response in the API Server. " + "This can include sensitive information and should be " + "avoided in production.") + + @app.middleware("http") + async def log_response(request: Request, call_next): + response = await call_next(request) + response_body = [ + section async for section in response.body_iterator + ] + response.body_iterator = iterate_in_threadpool(iter(response_body)) + logger.info("response_body={%s}", response_body[0].decode()) + return response + for middleware in args.middleware: module_path, object_name = middleware.rsplit(".", 1) imported = getattr(importlib.import_module(module_path), object_name) diff --git a/vllm/envs.py b/vllm/envs.py index 23c304f124d36..e5025485a2501 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -270,6 +270,11 @@ environment_variables: dict[str, Callable[[], Any]] = { "VLLM_API_KEY": lambda: os.environ.get("VLLM_API_KEY", None), + # Whether to log responses from API Server for debugging + "VLLM_DEBUG_LOG_API_SERVER_RESPONSE": + lambda: os.environ.get("VLLM_DEBUG_LOG_API_SERVER_RESPONSE", "False"). + lower() == "true", + # S3 access information, used for tensorizer to load model from S3 "S3_ACCESS_KEY_ID": lambda: os.environ.get("S3_ACCESS_KEY_ID", None),