diff --git a/vllm/envs.py b/vllm/envs.py index e7796aa73df4..145ec3495a0c 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -63,6 +63,7 @@ if TYPE_CHECKING: VLLM_IMAGE_FETCH_TIMEOUT: int = 5 VLLM_VIDEO_FETCH_TIMEOUT: int = 30 VLLM_AUDIO_FETCH_TIMEOUT: int = 10 + VLLM_MEDIA_LOADING_THREAD_COUNT: int = 8 VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25 VLLM_VIDEO_LOADER_BACKEND: str = "opencv" VLLM_MM_INPUT_CACHE_GIB: int = 4 @@ -555,6 +556,12 @@ environment_variables: dict[str, Callable[[], Any]] = { "VLLM_AUDIO_FETCH_TIMEOUT": lambda: int(os.getenv("VLLM_AUDIO_FETCH_TIMEOUT", "10")), + # Max number of workers for the thread pool handling + # media bytes loading. Set to 1 to disable parallel processing. + # Default is 8 + "VLLM_MEDIA_LOADING_THREAD_COUNT": + lambda: int(os.getenv("VLLM_MEDIA_LOADING_THREAD_COUNT", "8")), + # Maximum filesize in MB for a single audio file when processing # speech-to-text requests. Files larger than this will be rejected. # Default is 25 MB diff --git a/vllm/multimodal/utils.py b/vllm/multimodal/utils.py index 8dfbc6503520..b8266fd350f5 100644 --- a/vllm/multimodal/utils.py +++ b/vllm/multimodal/utils.py @@ -1,6 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import asyncio +import atexit +from concurrent.futures import ThreadPoolExecutor from itertools import groupby from pathlib import Path from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union @@ -33,6 +36,10 @@ else: MultiModalKwargs = Any MultiModalPlaceholderDict = Any +global_thread_pool = ThreadPoolExecutor( + max_workers=envs.VLLM_MEDIA_LOADING_THREAD_COUNT) +atexit.register(global_thread_pool.shutdown) + class MediaConnector: @@ -139,19 +146,26 @@ class MediaConnector: fetch_timeout: Optional[int] = None, ) -> _M: url_spec = urlparse(url) + loop = asyncio.get_running_loop() if url_spec.scheme.startswith("http"): connection = self.connection data = await connection.async_get_bytes(url, timeout=fetch_timeout) - - return media_io.load_bytes(data) + future = loop.run_in_executor(global_thread_pool, + media_io.load_bytes, data) + return await future if url_spec.scheme == "data": - return self._load_data_url(url_spec, media_io) + future = loop.run_in_executor(global_thread_pool, + self._load_data_url, url_spec, + media_io) + return await future if url_spec.scheme == "file": - return self._load_file_url(url_spec, media_io) - + future = loop.run_in_executor(global_thread_pool, + self._load_file_url, url_spec, + media_io) + return await future msg = "The URL must be either a HTTP, data or file URL." raise ValueError(msg) @@ -489,4 +503,4 @@ def fetch_video( "video": video_io_kwargs } media_connector = MediaConnector(media_io_kwargs=media_io_kwargs) - return media_connector.fetch_video(video_url) \ No newline at end of file + return media_connector.fetch_video(video_url)