diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 1a52f0262d55a..a83a602d02221 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -395,8 +395,9 @@ class AsyncLLMEngine: async for request_output in stream: yield request_output - except Exception as e: - # If there is an exception, abort the request. + except (Exception, asyncio.CancelledError) as e: + # If there is an exception or coroutine is cancelled, abort the + # request. self._abort(request_id) raise e