diff --git a/vllm/entrypoints/api_server.py b/vllm/entrypoints/api_server.py index e2b709ceadba..58ea2e229125 100644 --- a/vllm/entrypoints/api_server.py +++ b/vllm/entrypoints/api_server.py @@ -3,7 +3,7 @@ import json from typing import AsyncGenerator from fastapi import BackgroundTasks, FastAPI, Request -from fastapi.responses import Response, StreamingResponse +from fastapi.responses import JSONResponse, Response, StreamingResponse import uvicorn from vllm.engine.arg_utils import AsyncEngineArgs @@ -64,7 +64,7 @@ async def generate(request: Request) -> Response: prompt = final_output.prompt text_outputs = [prompt + output.text for output in final_output.outputs] ret = {"text": text_outputs} - return Response(content=json.dumps(ret)) + return JSONResponse(ret) if __name__ == "__main__":