mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 06:55:01 +08:00
[BugFix] Don't cancel asyncio tasks directly from destructors (#22476)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
904063907c
commit
ccdae737a0
@ -687,20 +687,31 @@ class AsyncMicrobatchTokenizer:
|
|||||||
max_length = kwargs.get("max_length")
|
max_length = kwargs.get("max_length")
|
||||||
|
|
||||||
if not truncation:
|
if not truncation:
|
||||||
return ("encode", add_special_tokens, False, None)
|
return "encode", add_special_tokens, False, None
|
||||||
|
|
||||||
model_max = getattr(self.tokenizer, "model_max_length", None)
|
model_max = getattr(self.tokenizer, "model_max_length", None)
|
||||||
if max_length is None or (model_max is not None
|
if max_length is None or (model_max is not None
|
||||||
and max_length == model_max):
|
and max_length == model_max):
|
||||||
return ("encode", add_special_tokens, True, "model_max")
|
return "encode", add_special_tokens, True, "model_max"
|
||||||
|
|
||||||
return ("encode", "other")
|
return "encode", "other"
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
for task in self._batcher_tasks:
|
if ((tasks := getattr(self, "_batcher_tasks", None))
|
||||||
if not task.done():
|
and (loop := getattr(self, "_loop", None))
|
||||||
|
and not loop.is_closed()):
|
||||||
|
|
||||||
|
def cancel_tasks():
|
||||||
|
for task in tasks:
|
||||||
task.cancel()
|
task.cancel()
|
||||||
|
|
||||||
|
loop.call_soon_threadsafe(cancel_tasks)
|
||||||
|
|
||||||
|
|
||||||
|
def cancel_task_threadsafe(task: Task):
|
||||||
|
if task and not task.done() and not (loop := task.get_loop()).is_closed():
|
||||||
|
loop.call_soon_threadsafe(task.cancel)
|
||||||
|
|
||||||
|
|
||||||
def make_async(
|
def make_async(
|
||||||
func: Callable[P, T],
|
func: Callable[P, T],
|
||||||
|
|||||||
@ -27,7 +27,7 @@ from vllm.transformers_utils.config import (
|
|||||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||||
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
|
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
|
||||||
from vllm.usage.usage_lib import UsageContext
|
from vllm.usage.usage_lib import UsageContext
|
||||||
from vllm.utils import Device, cdiv, deprecate_kwargs
|
from vllm.utils import Device, cancel_task_threadsafe, cdiv, deprecate_kwargs
|
||||||
from vllm.v1.engine import EngineCoreRequest
|
from vllm.v1.engine import EngineCoreRequest
|
||||||
from vllm.v1.engine.core_client import EngineCoreClient
|
from vllm.v1.engine.core_client import EngineCoreClient
|
||||||
from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError
|
from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError
|
||||||
@ -219,8 +219,7 @@ class AsyncLLM(EngineClient):
|
|||||||
if engine_core := getattr(self, "engine_core", None):
|
if engine_core := getattr(self, "engine_core", None):
|
||||||
engine_core.shutdown()
|
engine_core.shutdown()
|
||||||
|
|
||||||
if handler := getattr(self, "output_handler", None):
|
cancel_task_threadsafe(getattr(self, "output_handler", None))
|
||||||
handler.cancel()
|
|
||||||
|
|
||||||
async def get_supported_tasks(self) -> tuple[SupportedTask, ...]:
|
async def get_supported_tasks(self) -> tuple[SupportedTask, ...]:
|
||||||
return await self.engine_core.get_supported_tasks_async()
|
return await self.engine_core.get_supported_tasks_async()
|
||||||
|
|||||||
@ -23,7 +23,8 @@ from vllm.config import VllmConfig
|
|||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
from vllm.tasks import SupportedTask
|
from vllm.tasks import SupportedTask
|
||||||
from vllm.utils import get_open_port, get_open_zmq_inproc_path, make_zmq_socket
|
from vllm.utils import (cancel_task_threadsafe, get_open_port,
|
||||||
|
get_open_zmq_inproc_path, make_zmq_socket)
|
||||||
from vllm.v1.engine import (EngineCoreOutputs, EngineCoreRequest,
|
from vllm.v1.engine import (EngineCoreOutputs, EngineCoreRequest,
|
||||||
EngineCoreRequestType,
|
EngineCoreRequestType,
|
||||||
ReconfigureDistributedRequest, ReconfigureRankType,
|
ReconfigureDistributedRequest, ReconfigureRankType,
|
||||||
@ -342,10 +343,8 @@ class BackgroundResources:
|
|||||||
if self.coordinator is not None:
|
if self.coordinator is not None:
|
||||||
self.coordinator.close()
|
self.coordinator.close()
|
||||||
|
|
||||||
if self.output_queue_task is not None:
|
cancel_task_threadsafe(self.output_queue_task)
|
||||||
self.output_queue_task.cancel()
|
cancel_task_threadsafe(self.stats_update_task)
|
||||||
if self.stats_update_task is not None:
|
|
||||||
self.stats_update_task.cancel()
|
|
||||||
|
|
||||||
# ZMQ context termination can hang if the sockets
|
# ZMQ context termination can hang if the sockets
|
||||||
# aren't explicitly closed first.
|
# aren't explicitly closed first.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user