[Chore] Separate out vllm.utils.importlib (#27022)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-05-26 23:07:55 +08:00 · 2025-10-17 08:48:59 +08:00 · 2025-10-17 08:48:59 +08:00 · 4d4d6bad19
commit 4d4d6bad19
parent 11ae016bd7
41 changed files with 417 additions and 391 deletions
--- a/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py
+++ b/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py
@ -27,7 +27,7 @@ from vllm.model_executor.model_loader.tensorizer import (
 from vllm.model_executor.model_loader.tensorizer_loader import (
    BLACKLISTED_TENSORIZER_ARGS,
 )
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 from .conftest import DummyExecutor, assert_from_collective_rpc
--- a/tests/utils_/test_import_utils.py
+++ b/tests/utils_/test_import_utils.py
@ -0,0 +1,46 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import pytest
 from vllm.utils.import_utils import PlaceholderModule
 def _raises_module_not_found():
    return pytest.raises(ModuleNotFoundError, match="No module named")
 def test_placeholder_module_error_handling():
    placeholder = PlaceholderModule("placeholder_1234")
    with _raises_module_not_found():
        int(placeholder)
    with _raises_module_not_found():
        placeholder()
    with _raises_module_not_found():
        _ = placeholder.some_attr
    with _raises_module_not_found():
        # Test conflict with internal __name attribute
        _ = placeholder.name
    # OK to print the placeholder or use it in a f-string
    _ = repr(placeholder)
    _ = str(placeholder)
    # No error yet; only error when it is used downstream
    placeholder_attr = placeholder.placeholder_attr("attr")
    with _raises_module_not_found():
        int(placeholder_attr)
    with _raises_module_not_found():
        placeholder_attr()
    with _raises_module_not_found():
        _ = placeholder_attr.some_attr
    with _raises_module_not_found():
        # Test conflict with internal __module attribute
        _ = placeholder_attr.module
--- a/tests/utils_/test_utils.py
+++ b/tests/utils_/test_utils.py
@ -24,7 +24,6 @@ from vllm.transformers_utils.detokenizer_utils import convert_ids_list_to_tokens
 from vllm.utils import (
    FlexibleArgumentParser,
    MemorySnapshot,
    PlaceholderModule,
    bind_kv_cache,
    common_broadcastable_dtype,
    current_stream,
@ -475,46 +474,6 @@ def test_common_broadcastable_dtype(dtypes, expected_result):
    assert common_broadcastable_dtype(dtypes) == expected_result
 def test_placeholder_module_error_handling():
    placeholder = PlaceholderModule("placeholder_1234")
    def build_ctx():
        return pytest.raises(ModuleNotFoundError, match="No module named")
    with build_ctx():
        int(placeholder)
    with build_ctx():
        placeholder()
    with build_ctx():
        _ = placeholder.some_attr
    with build_ctx():
        # Test conflict with internal __name attribute
        _ = placeholder.name
    # OK to print the placeholder or use it in a f-string
    _ = repr(placeholder)
    _ = str(placeholder)
    # No error yet; only error when it is used downstream
    placeholder_attr = placeholder.placeholder_attr("attr")
    with build_ctx():
        int(placeholder_attr)
    with build_ctx():
        placeholder_attr()
    with build_ctx():
        _ = placeholder_attr.some_attr
    with build_ctx():
        # Test conflict with internal __module attribute
        _ = placeholder_attr.module
 def test_model_specification(
    parser_with_config, cli_config_file, cli_config_file_with_model
 ):
--- a/tests/v1/attention/utils.py
+++ b/tests/v1/attention/utils.py
@ -20,7 +20,7 @@ from vllm.config import (
    VllmConfig,
 )
 from vllm.config.model import ModelDType
-from vllm.utils import resolve_obj_by_qualname
+from vllm.utils.import_utils import resolve_obj_by_qualname
 from vllm.v1.attention.backends.utils import (
    AttentionMetadataBuilder,
    CommonAttentionMetadata,
--- a/vllm/assets/audio.py
+++ b/vllm/assets/audio.py
@ -8,7 +8,7 @@ from urllib.parse import urljoin
 import numpy.typing as npt
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 from .base import VLLM_S3_BUCKET_URL, get_vllm_public_assets
--- a/vllm/assets/video.py
+++ b/vllm/assets/video.py
@ -10,7 +10,7 @@ import numpy.typing as npt
 from huggingface_hub import hf_hub_download
 from PIL import Image
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 from .base import get_cache_dir
--- a/vllm/attention/backends/registry.py
+++ b/vllm/attention/backends/registry.py
@ -4,7 +4,7 @@
 import enum
-from vllm.utils import resolve_obj_by_qualname
+from vllm.utils.import_utils import resolve_obj_by_qualname
 class _Backend(enum.Enum):
--- a/vllm/attention/selector.py
+++ b/vllm/attention/selector.py
@ -13,7 +13,8 @@ import vllm.envs as envs
 from vllm.attention.backends.abstract import AttentionBackend
 from vllm.attention.backends.registry import _Backend, backend_name_to_enum
 from vllm.logger import init_logger
-from vllm.utils import STR_BACKEND_ENV_VAR, resolve_obj_by_qualname
+from vllm.utils import STR_BACKEND_ENV_VAR
 from vllm.utils.import_utils import resolve_obj_by_qualname
 logger = init_logger(__name__)
--- a/vllm/benchmarks/datasets.py
+++ b/vllm/benchmarks/datasets.py
@ -39,7 +39,7 @@ from vllm.lora.utils import get_adapter_absolute_path
 from vllm.multimodal import MultiModalDataDict
 from vllm.multimodal.image import convert_image_mode
 from vllm.transformers_utils.tokenizer import AnyTokenizer
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 try:
    from datasets import load_dataset
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@ -24,7 +24,8 @@ from vllm.compilation.partition_rules import (
 from vllm.config import CompilationConfig, CUDAGraphMode, VllmConfig
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
-from vllm.utils import is_torch_equal_or_newer, resolve_obj_by_qualname
+from vllm.utils import is_torch_equal_or_newer
 from vllm.utils.import_utils import resolve_obj_by_qualname
 from .caching import VllmSerializableFunction
 from .compiler_interface import (
--- a/vllm/compilation/decorators.py
+++ b/vllm/compilation/decorators.py
@ -21,7 +21,8 @@ from vllm.compilation.wrapper import TorchCompileWrapperWithCustomDispatcher
 from vllm.config import CompilationMode, VllmConfig, set_current_vllm_config
 from vllm.logger import init_logger
 from vllm.sequence import IntermediateTensors
-from vllm.utils import resolve_obj_by_qualname, supports_dynamo
+from vllm.utils import supports_dynamo
 from vllm.utils.import_utils import resolve_obj_by_qualname
 from .monitor import start_monitoring_torch_compile
--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@ -16,7 +16,8 @@ from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass
 from vllm.config.utils import config
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
-from vllm.utils import is_torch_equal_or_newer, resolve_obj_by_qualname
+from vllm.utils import is_torch_equal_or_newer
 from vllm.utils.import_utils import resolve_obj_by_qualname
 if TYPE_CHECKING:
    from vllm.config import VllmConfig
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@ -41,7 +41,8 @@ from vllm.transformers_utils.config import (
 )
 from vllm.transformers_utils.runai_utils import ObjectStorageModel, is_runai_obj_uri
 from vllm.transformers_utils.utils import maybe_model_redirect
-from vllm.utils import LayerBlockType, LazyLoader, common_broadcastable_dtype
+from vllm.utils import LayerBlockType, common_broadcastable_dtype
 from vllm.utils.import_utils import LazyLoader
 if TYPE_CHECKING:
    from transformers import PretrainedConfig
--- a/vllm/config/speculative.py
+++ b/vllm/config/speculative.py
@ -13,7 +13,7 @@ import vllm.envs as envs
 from vllm.config.parallel import ParallelConfig
 from vllm.config.utils import config
 from vllm.logger import init_logger
-from vllm.utils import LazyLoader
+from vllm.utils.import_utils import LazyLoader
 if TYPE_CHECKING:
    from transformers import PretrainedConfig
--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@ -52,9 +52,9 @@ from vllm.logger import init_logger
 from vllm.utils import (
    direct_register_custom_op,
    get_distributed_init_method,
    resolve_obj_by_qualname,
    supports_custom_op,
 )
 from vllm.utils.import_utils import resolve_obj_by_qualname
@dataclass
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@ -81,7 +81,8 @@ from vllm.sampling_params import (
    SamplingParams,
    StructuredOutputsParams,
 )
-from vllm.utils import random_uuid, resolve_obj_by_qualname
+from vllm.utils import random_uuid
 from vllm.utils.import_utils import resolve_obj_by_qualname
 EMBED_DTYPE_TO_TORCH_DTYPE = {
    "float32": torch.float32,
--- a/vllm/entrypoints/openai/speech_to_text.py
+++ b/vllm/entrypoints/openai/speech_to_text.py
@ -32,7 +32,7 @@ from vllm.inputs.data import PromptType
 from vllm.logger import init_logger
 from vllm.model_executor.models import SupportsTranscription
 from vllm.outputs import RequestOutput
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 try:
    import librosa
--- a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
@ -12,8 +12,8 @@ from vllm.entrypoints.openai.protocol import (
 )
 from vllm.logger import init_logger
 from vllm.transformers_utils.tokenizer import AnyTokenizer
 from vllm.utils import import_from_path
 from vllm.utils.collections import is_list_of
 from vllm.utils.import_utils import import_from_path
 logger = init_logger(__name__)
--- a/vllm/lora/punica_wrapper/punica_selector.py
+++ b/vllm/lora/punica_wrapper/punica_selector.py
@ -3,7 +3,7 @@
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
-from vllm.utils import resolve_obj_by_qualname
+from vllm.utils.import_utils import resolve_obj_by_qualname
 from .punica_base import PunicaWrapperBase
--- a/vllm/model_executor/layers/pooler.py
+++ b/vllm/model_executor/layers/pooler.py
@ -17,7 +17,7 @@ from vllm.logger import init_logger
 from vllm.model_executor.models.adapters import _load_st_projector
 from vllm.pooling_params import PoolingParams
 from vllm.tasks import PoolingTask
-from vllm.utils import resolve_obj_by_qualname
+from vllm.utils.import_utils import resolve_obj_by_qualname
 from vllm.v1.outputs import PoolerOutput
 from vllm.v1.pool.metadata import PoolingCursor, PoolingMetadata
--- a/vllm/model_executor/model_loader/tensorizer.py
+++ b/vllm/model_executor/model_loader/tensorizer.py
@ -26,7 +26,8 @@ from vllm.config import ModelConfig, ParallelConfig, VllmConfig, set_current_vll
 from vllm.logger import init_logger
 from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
 from vllm.platforms import current_platform
-from vllm.utils import FlexibleArgumentParser, PlaceholderModule
+from vllm.utils import FlexibleArgumentParser
 from vllm.utils.import_utils import PlaceholderModule
 if TYPE_CHECKING:
    from vllm.engine.arg_utils import EngineArgs
--- a/vllm/model_executor/model_loader/weight_utils.py
+++ b/vllm/model_executor/model_loader/weight_utils.py
@ -34,7 +34,7 @@ from vllm.model_executor.layers.quantization import (
    get_quantization_config,
 )
 from vllm.platforms import current_platform
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 try:
    from runai_model_streamer import SafetensorsStreamer
--- a/vllm/multimodal/audio.py
+++ b/vllm/multimodal/audio.py
@ -8,7 +8,7 @@ from typing import Literal
 import numpy as np
 import numpy.typing as npt
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 from .base import MediaIO
--- a/vllm/multimodal/inputs.py
+++ b/vllm/multimodal/inputs.py
@ -22,8 +22,8 @@ from typing import (
 import numpy as np
 from typing_extensions import NotRequired, TypeVar, deprecated
 from vllm.utils import LazyLoader
 from vllm.utils.collections import full_groupby, is_list_of
 from vllm.utils.import_utils import LazyLoader
 from vllm.utils.jsontree import json_map_leaves
 if TYPE_CHECKING:
--- a/vllm/multimodal/parse.py
+++ b/vllm/multimodal/parse.py
@ -19,8 +19,8 @@ import numpy as np
 import torch
 from typing_extensions import assert_never
 from vllm.utils import LazyLoader
 from vllm.utils.collections import is_list_of
 from vllm.utils.import_utils import LazyLoader
 from .audio import AudioResampler
 from .inputs import (
--- a/vllm/platforms/init.py
+++ b/vllm/platforms/init.py
@ -7,7 +7,8 @@ from typing import TYPE_CHECKING
 from vllm import envs
 from vllm.plugins import PLATFORM_PLUGINS_GROUP, load_plugins_by_group
-from vllm.utils import resolve_obj_by_qualname, supports_xccl
+from vllm.utils import supports_xccl
 from vllm.utils.import_utils import resolve_obj_by_qualname
 from .interface import CpuArchEnum, Platform, PlatformEnum
--- a/vllm/plugins/io_processors/init.py
+++ b/vllm/plugins/io_processors/init.py
@ -6,7 +6,7 @@ import logging
 from vllm.config import VllmConfig
 from vllm.plugins import IO_PROCESSOR_PLUGINS_GROUP, load_plugins_by_group
 from vllm.plugins.io_processors.interface import IOProcessor
-from vllm.utils import resolve_obj_by_qualname
+from vllm.utils.import_utils import resolve_obj_by_qualname
 logger = logging.getLogger(__name__)
--- a/vllm/reasoning/abs_reasoning_parsers.py
+++ b/vllm/reasoning/abs_reasoning_parsers.py
@ -8,8 +8,8 @@ from functools import cached_property
 from typing import TYPE_CHECKING, Any
 from vllm.logger import init_logger
 from vllm.utils import import_from_path
 from vllm.utils.collections import is_list_of
 from vllm.utils.import_utils import import_from_path
 if TYPE_CHECKING:
    from vllm.entrypoints.openai.protocol import (
--- a/vllm/transformers_utils/runai_utils.py
+++ b/vllm/transformers_utils/runai_utils.py
@ -9,7 +9,7 @@ import signal
 from vllm import envs
 from vllm.assets.base import get_cache_dir
 from vllm.logger import init_logger
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 logger = init_logger(__name__)
--- a/vllm/transformers_utils/s3_utils.py
+++ b/vllm/transformers_utils/s3_utils.py
@ -4,7 +4,7 @@
 import fnmatch
 from typing import TYPE_CHECKING, Optional
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 if TYPE_CHECKING:
    from botocore.client import BaseClient
--- a/vllm/utils/init.py
+++ b/vllm/utils/init.py
@ -8,8 +8,6 @@ import gc
 import getpass
 import hashlib
 import importlib
 import importlib.metadata
 import importlib.util
 import inspect
 import ipaddress
 import json
@ -25,7 +23,6 @@ import textwrap
 import threading
 import time
 import traceback
 import types
 import uuid
 import warnings
 import weakref
@ -68,7 +65,6 @@ import zmq.asyncio
 from packaging import version
 from packaging.version import Version
 from torch.library import Library
 from typing_extensions import Never
 import vllm.envs as envs
 from vllm.logger import enable_trace_function_call, init_logger
@ -801,8 +797,6 @@ def find_nccl_include_paths() -> list[str] | None:
        paths.append(inc)
    try:
        import importlib.util
        spec = importlib.util.find_spec("nvidia.nccl")
        if spec and getattr(spec, "submodule_search_locations", None):
            for loc in spec.submodule_search_locations:
@ -1560,253 +1554,6 @@ def get_cuda_view_from_cpu_tensor(cpu_tensor: torch.Tensor) -> torch.Tensor:
    return torch.ops._C.get_cuda_view_from_cpu_tensor(cpu_tensor)
 def import_from_path(module_name: str, file_path: str | os.PathLike):
    """
    Import a Python file according to its file path.
    Based on the official recipe:
    https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
    """
    spec = importlib.util.spec_from_file_location(module_name, file_path)
    if spec is None:
        raise ModuleNotFoundError(f"No module named '{module_name}'")
    assert spec.loader is not None
    module = importlib.util.module_from_spec(spec)
    sys.modules[module_name] = module
    spec.loader.exec_module(module)
    return module
@cache
 def get_vllm_optional_dependencies():
    metadata = importlib.metadata.metadata("vllm")
    requirements = metadata.get_all("Requires-Dist", [])
    extras = metadata.get_all("Provides-Extra", [])
    return {
        extra: [
            re.split(r";|>=|<=|==", req)[0]
            for req in requirements
            if req.endswith(f'extra == "{extra}"')
        ]
        for extra in extras
    }
 class _PlaceholderBase:
    """
    Disallows downstream usage of placeholder modules.
    We need to explicitly override each dunder method because
    [`__getattr__`][vllm.utils._PlaceholderBase.__getattr__]
    is not called when they are accessed.
    Info:
        [Special method lookup](https://docs.python.org/3/reference/datamodel.html#special-lookup)
    """
    def __getattr__(self, key: str) -> Never:
        """
        The main class should implement this to throw an error
        for attribute accesses representing downstream usage.
        """
        raise NotImplementedError
    # [Basic customization]
    def __lt__(self, other: object):
        return self.__getattr__("__lt__")
    def __le__(self, other: object):
        return self.__getattr__("__le__")
    def __eq__(self, other: object):
        return self.__getattr__("__eq__")
    def __ne__(self, other: object):
        return self.__getattr__("__ne__")
    def __gt__(self, other: object):
        return self.__getattr__("__gt__")
    def __ge__(self, other: object):
        return self.__getattr__("__ge__")
    def __hash__(self):
        return self.__getattr__("__hash__")
    def __bool__(self):
        return self.__getattr__("__bool__")
    # [Callable objects]
    def __call__(self, *args: object, **kwargs: object):
        return self.__getattr__("__call__")
    # [Container types]
    def __len__(self):
        return self.__getattr__("__len__")
    def __getitem__(self, key: object):
        return self.__getattr__("__getitem__")
    def __setitem__(self, key: object, value: object):
        return self.__getattr__("__setitem__")
    def __delitem__(self, key: object):
        return self.__getattr__("__delitem__")
    # __missing__ is optional according to __getitem__ specification,
    # so it is skipped
    # __iter__ and __reversed__ have a default implementation
    # based on __len__ and __getitem__, so they are skipped.
    # [Numeric Types]
    def __add__(self, other: object):
        return self.__getattr__("__add__")
    def __sub__(self, other: object):
        return self.__getattr__("__sub__")
    def __mul__(self, other: object):
        return self.__getattr__("__mul__")
    def __matmul__(self, other: object):
        return self.__getattr__("__matmul__")
    def __truediv__(self, other: object):
        return self.__getattr__("__truediv__")
    def __floordiv__(self, other: object):
        return self.__getattr__("__floordiv__")
    def __mod__(self, other: object):
        return self.__getattr__("__mod__")
    def __divmod__(self, other: object):
        return self.__getattr__("__divmod__")
    def __pow__(self, other: object, modulo: object = ...):
        return self.__getattr__("__pow__")
    def __lshift__(self, other: object):
        return self.__getattr__("__lshift__")
    def __rshift__(self, other: object):
        return self.__getattr__("__rshift__")
    def __and__(self, other: object):
        return self.__getattr__("__and__")
    def __xor__(self, other: object):
        return self.__getattr__("__xor__")
    def __or__(self, other: object):
        return self.__getattr__("__or__")
    # r* and i* methods have lower priority than
    # the methods for left operand so they are skipped
    def __neg__(self):
        return self.__getattr__("__neg__")
    def __pos__(self):
        return self.__getattr__("__pos__")
    def __abs__(self):
        return self.__getattr__("__abs__")
    def __invert__(self):
        return self.__getattr__("__invert__")
    # __complex__, __int__ and __float__ have a default implementation
    # based on __index__, so they are skipped.
    def __index__(self):
        return self.__getattr__("__index__")
    def __round__(self, ndigits: object = ...):
        return self.__getattr__("__round__")
    def __trunc__(self):
        return self.__getattr__("__trunc__")
    def __floor__(self):
        return self.__getattr__("__floor__")
    def __ceil__(self):
        return self.__getattr__("__ceil__")
    # [Context managers]
    def __enter__(self):
        return self.__getattr__("__enter__")
    def __exit__(self, *args: object, **kwargs: object):
        return self.__getattr__("__exit__")
 class PlaceholderModule(_PlaceholderBase):
    """
    A placeholder object to use when a module does not exist.
    This enables more informative errors when trying to access attributes
    of a module that does not exist.
    """
    def __init__(self, name: str) -> None:
        super().__init__()
        # Apply name mangling to avoid conflicting with module attributes
        self.__name = name
    def placeholder_attr(self, attr_path: str):
        return _PlaceholderModuleAttr(self, attr_path)
    def __getattr__(self, key: str):
        name = self.__name
        try:
            importlib.import_module(name)
        except ImportError as exc:
            for extra, names in get_vllm_optional_dependencies().items():
                if name in names:
                    msg = f"Please install vllm[{extra}] for {extra} support"
                    raise ImportError(msg) from exc
            raise exc
        raise AssertionError(
            "PlaceholderModule should not be used "
            "when the original module can be imported"
        )
 class _PlaceholderModuleAttr(_PlaceholderBase):
    def __init__(self, module: PlaceholderModule, attr_path: str) -> None:
        super().__init__()
        # Apply name mangling to avoid conflicting with module attributes
        self.__module = module
        self.__attr_path = attr_path
    def placeholder_attr(self, attr_path: str):
        return _PlaceholderModuleAttr(self.__module, f"{self.__attr_path}.{attr_path}")
    def __getattr__(self, key: str):
        getattr(self.__module, f"{self.__attr_path}.{key}")
        raise AssertionError(
            "PlaceholderModule should not be used "
            "when the original module can be imported"
        )
 # create a library to hold the custom op
 vllm_lib = Library("vllm", "FRAGMENT")  # noqa
@ -1871,15 +1618,6 @@ def direct_register_custom_op(
        my_lib._register_fake(op_name, fake_impl)
 def resolve_obj_by_qualname(qualname: str) -> Any:
    """
    Resolve an object by its fully-qualified class name.
    """
    module_name, obj_name = qualname.rsplit(".", 1)
    module = importlib.import_module(module_name)
    return getattr(module, obj_name)
 def kill_process_tree(pid: int):
    """
    Kills all descendant processes of the given pid by sending SIGKILL.
@ -2427,57 +2165,6 @@ def warn_for_unimplemented_methods(cls: type[T]) -> type[T]:
    return cls
 class LazyLoader(types.ModuleType):
    """
    LazyLoader module borrowed from Tensorflow
    https://github.com/tensorflow/tensorflow/blob/main/tensorflow/python/util/lazy_loader.py
    with an addition of "module caching".
    Lazily import a module, mainly to avoid pulling in large dependencies.
    Modules such as `xgrammar` might do additional side effects, so we
    only want to use this when it is needed, delaying all eager effects
    """
    def __init__(
        self,
        local_name: str,
        parent_module_globals: dict[str, Any],
        name: str,
    ):
        self._local_name = local_name
        self._parent_module_globals = parent_module_globals
        self._module: types.ModuleType | None = None
        super().__init__(str(name))
    def _load(self) -> types.ModuleType:
        # Import the target module and insert it into the parent's namespace
        try:
            module = importlib.import_module(self.__name__)
            self._parent_module_globals[self._local_name] = module
            # The additional add to sys.modules
            # ensures library is actually loaded.
            sys.modules[self._local_name] = module
        except ModuleNotFoundError as err:
            raise err from None
        # Update this object's dict so that if someone keeps a
        # reference to the LazyLoader, lookups are efficient
        # (__getattr__ is only called on lookups that fail).
        self.__dict__.update(module.__dict__)
        return module
    def __getattr__(self, item: Any) -> Any:
        if self._module is None:
            self._module = self._load()
        return getattr(self._module, item)
    def __dir__(self) -> list[str]:
        if self._module is None:
            self._module = self._load()
        return dir(self._module)
@contextlib.contextmanager
 def cprofile_context(save_file: str | None = None):
    """Run a cprofile
--- a/vllm/utils/import_utils.py
+++ b/vllm/utils/import_utils.py
@ -0,0 +1,326 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """
 Contains helpers related to importing modules.
 This is similar in concept to the `importlib` module.
 """
 import importlib.metadata
 import importlib.util
 import os
 import sys
 from functools import cache
 from types import ModuleType
 from typing import Any
 import regex as re
 from typing_extensions import Never
 def import_from_path(module_name: str, file_path: str | os.PathLike):
    """
    Import a Python file according to its file path.
    Based on the official recipe:
    https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
    """
    spec = importlib.util.spec_from_file_location(module_name, file_path)
    if spec is None:
        raise ModuleNotFoundError(f"No module named {module_name!r}")
    assert spec.loader is not None
    module = importlib.util.module_from_spec(spec)
    sys.modules[module_name] = module
    spec.loader.exec_module(module)
    return module
 def resolve_obj_by_qualname(qualname: str) -> Any:
    """
    Resolve an object by its fully-qualified class name.
    """
    module_name, obj_name = qualname.rsplit(".", 1)
    module = importlib.import_module(module_name)
    return getattr(module, obj_name)
@cache
 def get_vllm_optional_dependencies():
    metadata = importlib.metadata.metadata("vllm")
    requirements = metadata.get_all("Requires-Dist", [])
    extras = metadata.get_all("Provides-Extra", [])
    return {
        extra: [
            re.split(r";|>=|<=|==", req)[0]
            for req in requirements
            if req.endswith(f'extra == "{extra}"')
        ]
        for extra in extras
    }
 class _PlaceholderBase:
    """
    Disallows downstream usage of placeholder modules.
    We need to explicitly override each dunder method because
    [`__getattr__`][vllm.utils.import_utils._PlaceholderBase.__getattr__]
    is not called when they are accessed.
    Info:
        [Special method lookup](https://docs.python.org/3/reference/datamodel.html#special-lookup)
    """
    def __getattr__(self, key: str) -> Never:
        """
        The main class should implement this to throw an error
        for attribute accesses representing downstream usage.
        """
        raise NotImplementedError
    # [Basic customization]
    def __lt__(self, other: object):
        return self.__getattr__("__lt__")
    def __le__(self, other: object):
        return self.__getattr__("__le__")
    def __eq__(self, other: object):
        return self.__getattr__("__eq__")
    def __ne__(self, other: object):
        return self.__getattr__("__ne__")
    def __gt__(self, other: object):
        return self.__getattr__("__gt__")
    def __ge__(self, other: object):
        return self.__getattr__("__ge__")
    def __hash__(self):
        return self.__getattr__("__hash__")
    def __bool__(self):
        return self.__getattr__("__bool__")
    # [Callable objects]
    def __call__(self, *args: object, **kwargs: object):
        return self.__getattr__("__call__")
    # [Container types]
    def __len__(self):
        return self.__getattr__("__len__")
    def __getitem__(self, key: object):
        return self.__getattr__("__getitem__")
    def __setitem__(self, key: object, value: object):
        return self.__getattr__("__setitem__")
    def __delitem__(self, key: object):
        return self.__getattr__("__delitem__")
    # __missing__ is optional according to __getitem__ specification,
    # so it is skipped
    # __iter__ and __reversed__ have a default implementation
    # based on __len__ and __getitem__, so they are skipped.
    # [Numeric Types]
    def __add__(self, other: object):
        return self.__getattr__("__add__")
    def __sub__(self, other: object):
        return self.__getattr__("__sub__")
    def __mul__(self, other: object):
        return self.__getattr__("__mul__")
    def __matmul__(self, other: object):
        return self.__getattr__("__matmul__")
    def __truediv__(self, other: object):
        return self.__getattr__("__truediv__")
    def __floordiv__(self, other: object):
        return self.__getattr__("__floordiv__")
    def __mod__(self, other: object):
        return self.__getattr__("__mod__")
    def __divmod__(self, other: object):
        return self.__getattr__("__divmod__")
    def __pow__(self, other: object, modulo: object = ...):
        return self.__getattr__("__pow__")
    def __lshift__(self, other: object):
        return self.__getattr__("__lshift__")
    def __rshift__(self, other: object):
        return self.__getattr__("__rshift__")
    def __and__(self, other: object):
        return self.__getattr__("__and__")
    def __xor__(self, other: object):
        return self.__getattr__("__xor__")
    def __or__(self, other: object):
        return self.__getattr__("__or__")
    # r* and i* methods have lower priority than
    # the methods for left operand so they are skipped
    def __neg__(self):
        return self.__getattr__("__neg__")
    def __pos__(self):
        return self.__getattr__("__pos__")
    def __abs__(self):
        return self.__getattr__("__abs__")
    def __invert__(self):
        return self.__getattr__("__invert__")
    # __complex__, __int__ and __float__ have a default implementation
    # based on __index__, so they are skipped.
    def __index__(self):
        return self.__getattr__("__index__")
    def __round__(self, ndigits: object = ...):
        return self.__getattr__("__round__")
    def __trunc__(self):
        return self.__getattr__("__trunc__")
    def __floor__(self):
        return self.__getattr__("__floor__")
    def __ceil__(self):
        return self.__getattr__("__ceil__")
    # [Context managers]
    def __enter__(self):
        return self.__getattr__("__enter__")
    def __exit__(self, *args: object, **kwargs: object):
        return self.__getattr__("__exit__")
 class PlaceholderModule(_PlaceholderBase):
    """
    A placeholder object to use when a module does not exist.
    This enables more informative errors when trying to access attributes
    of a module that does not exist.
    """
    def __init__(self, name: str) -> None:
        super().__init__()
        # Apply name mangling to avoid conflicting with module attributes
        self.__name = name
    def placeholder_attr(self, attr_path: str):
        return _PlaceholderModuleAttr(self, attr_path)
    def __getattr__(self, key: str) -> Never:
        name = self.__name
        try:
            importlib.import_module(name)
        except ImportError as exc:
            for extra, names in get_vllm_optional_dependencies().items():
                if name in names:
                    msg = f"Please install vllm[{extra}] for {extra} support"
                    raise ImportError(msg) from exc
            raise exc
        raise AssertionError(
            "PlaceholderModule should not be used "
            "when the original module can be imported"
        )
 class _PlaceholderModuleAttr(_PlaceholderBase):
    def __init__(self, module: PlaceholderModule, attr_path: str) -> None:
        super().__init__()
        # Apply name mangling to avoid conflicting with module attributes
        self.__module = module
        self.__attr_path = attr_path
    def placeholder_attr(self, attr_path: str):
        return _PlaceholderModuleAttr(self.__module, f"{self.__attr_path}.{attr_path}")
    def __getattr__(self, key: str) -> Never:
        getattr(self.__module, f"{self.__attr_path}.{key}")
        raise AssertionError(
            "PlaceholderModule should not be used "
            "when the original module can be imported"
        )
 class LazyLoader(ModuleType):
    """
    `LazyLoader` module borrowed from [Tensorflow]
    (https://github.com/tensorflow/tensorflow/blob/main/tensorflow/python/util/lazy_loader.py)
    with an addition of "module caching".
    Lazily import a module, mainly to avoid pulling in large dependencies.
    Modules such as `xgrammar` might do additional side effects, so we
    only want to use this when it is needed, delaying all eager effects.
    """
    def __init__(
        self,
        local_name: str,
        parent_module_globals: dict[str, Any],
        name: str,
    ):
        self._local_name = local_name
        self._parent_module_globals = parent_module_globals
        self._module: ModuleType | None = None
        super().__init__(str(name))
    def _load(self) -> ModuleType:
        # Import the target module and insert it into the parent's namespace
        try:
            module = importlib.import_module(self.__name__)
            self._parent_module_globals[self._local_name] = module
            # The additional add to sys.modules
            # ensures library is actually loaded.
            sys.modules[self._local_name] = module
        except ModuleNotFoundError as err:
            raise err from None
        # Update this object's dict so that if someone keeps a
        # reference to the LazyLoader, lookups are efficient
        # (__getattr__ is only called on lookups that fail).
        self.__dict__.update(module.__dict__)
        return module
    def __getattr__(self, item: Any) -> Any:
        if self._module is None:
            self._module = self._load()
        return getattr(self._module, item)
    def __dir__(self) -> list[str]:
        if self._module is None:
            self._module = self._load()
        return dir(self._module)
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@ -32,10 +32,10 @@ from vllm.utils import (
    decorate_logs,
    get_hash_fn_by_name,
    make_zmq_socket,
    resolve_obj_by_qualname,
    set_process_title,
 )
 from vllm.utils.gc_utils import maybe_attach_gc_debug_callback
 from vllm.utils.import_utils import resolve_obj_by_qualname
 from vllm.v1.core.kv_cache_utils import (
    BlockHash,
    generate_scheduler_kv_cache_config,
--- a/vllm/v1/executor/abstract.py
+++ b/vllm/v1/executor/abstract.py
@ -14,7 +14,7 @@ from vllm.executor.uniproc_executor import (  # noqa
    ExecutorWithExternalLauncher as ExecutorWithExternalLauncherV0,
 )
 from vllm.executor.uniproc_executor import UniProcExecutor as UniProcExecutorV0  # noqa
-from vllm.utils import resolve_obj_by_qualname
+from vllm.utils.import_utils import resolve_obj_by_qualname
 from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheSpec
 from vllm.v1.outputs import DraftTokenIds, ModelRunnerOutput
--- a/vllm/v1/structured_output/init.py
+++ b/vllm/v1/structured_output/init.py
@ -8,7 +8,7 @@ from vllm.config import VllmConfig
 from vllm.logger import init_logger
 from vllm.reasoning import ReasoningParserManager
 from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
-from vllm.utils import LazyLoader
+from vllm.utils.import_utils import LazyLoader
 from vllm.v1.structured_output.backend_guidance import GuidanceBackend
 from vllm.v1.structured_output.backend_types import (
    StructuredOutputBackend,
--- a/vllm/v1/structured_output/backend_guidance.py
+++ b/vllm/v1/structured_output/backend_guidance.py
@ -11,7 +11,7 @@ import torch
 from vllm.logger import init_logger
 from vllm.sampling_params import SamplingParams
-from vllm.utils import LazyLoader
+from vllm.utils.import_utils import LazyLoader
 from vllm.v1.structured_output.backend_types import (
    StructuredOutputBackend,
    StructuredOutputGrammar,
--- a/vllm/v1/structured_output/backend_lm_format_enforcer.py
+++ b/vllm/v1/structured_output/backend_lm_format_enforcer.py
@ -10,7 +10,7 @@ import torch
 from transformers import PreTrainedTokenizerBase
 from vllm.sampling_params import SamplingParams
-from vllm.utils import LazyLoader
+from vllm.utils.import_utils import LazyLoader
 from vllm.v1.structured_output.backend_types import (
    StructuredOutputBackend,
    StructuredOutputGrammar,
--- a/vllm/v1/structured_output/backend_outlines.py
+++ b/vllm/v1/structured_output/backend_outlines.py
@ -12,7 +12,7 @@ import torch
 from regex import escape as regex_escape
 from vllm.sampling_params import SamplingParams
-from vllm.utils import LazyLoader
+from vllm.utils.import_utils import LazyLoader
 from vllm.v1.structured_output.backend_types import (
    StructuredOutputBackend,
    StructuredOutputGrammar,
--- a/vllm/v1/structured_output/backend_xgrammar.py
+++ b/vllm/v1/structured_output/backend_xgrammar.py
@ -11,7 +11,7 @@ import vllm.envs
 from vllm.logger import init_logger
 from vllm.sampling_params import SamplingParams
 from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer
-from vllm.utils import LazyLoader
+from vllm.utils.import_utils import LazyLoader
 from vllm.v1.structured_output.backend_types import (
    StructuredOutputBackend,
    StructuredOutputGrammar,
--- a/vllm/v1/structured_output/utils.py
+++ b/vllm/v1/structured_output/utils.py
@ -13,7 +13,7 @@ from diskcache import Cache
 import vllm.envs as envs
 from vllm.logger import init_logger
-from vllm.utils import LazyLoader
+from vllm.utils.import_utils import LazyLoader
 if TYPE_CHECKING:
    import outlines_core as oc
--- a/vllm/v1/worker/worker_base.py
+++ b/vllm/v1/worker/worker_base.py
@ -15,11 +15,11 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.multimodal.cache import worker_receiver_cache_from_config
 from vllm.utils import (
    enable_trace_function_call_for_thread,
    resolve_obj_by_qualname,
    run_method,
    update_environment_variables,
    warn_for_unimplemented_methods,
 )
 from vllm.utils.import_utils import resolve_obj_by_qualname
 from vllm.v1.kv_cache_interface import KVCacheSpec
 if TYPE_CHECKING: