From b396cb49982e740276b5dbd8c99b2f5a5d07aab0 Mon Sep 17 00:00:00 2001 From: Ricardo Lu <37237570+gesanqiu@users.noreply.github.com> Date: Fri, 7 Jul 2023 09:08:40 +0800 Subject: [PATCH] fix: only response [DONE] once when streaming response. (#378) --- vllm/entrypoints/openai/api_server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index b1a751d6e7f0..8fe664bb9400 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -269,7 +269,7 @@ async def create_chat_completion(raw_request: Request): finish_reason=output.finish_reason, ) yield f"data: {response_json}\n\n" - yield "data: [DONE]\n\n" + yield "data: [DONE]\n\n" # Streaming response if request.stream: @@ -465,7 +465,7 @@ async def create_completion(raw_request: Request): finish_reason=output.finish_reason, ) yield f"data: {response_json}\n\n" - yield "data: [DONE]\n\n" + yield "data: [DONE]\n\n" # Streaming response if stream: