diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 2a61259896a37..1e7d9eb83b9af 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -1098,9 +1098,10 @@ async def run_server(args, **uvicorn_kwargs) -> None: ) # NB: Await server shutdown only after the backend context is exited - await shutdown_task - - sock.close() + try: + await shutdown_task + finally: + sock.close() if __name__ == "__main__":