[Chore] Separate out system utilities from vllm.utils (#27201)

Signed-off-by: dongbo910220 <1275604947@qq.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
2025-12-09 02:34:56 +08:00 · 2025-10-23 04:25:25 +08:00 · 2025-10-23 04:25:25 +08:00 · a0003b56b0
commit a0003b56b0
parent 5beacce2ea
25 changed files with 199 additions and 155 deletions
--- a/tests/compile/test_async_tp.py
+++ b/tests/compile/test_async_tp.py
@ -25,7 +25,7 @@ from vllm.distributed.parallel_state import (
    initialize_model_parallel,
 )
 from vllm.platforms import current_platform
-from vllm.utils import update_environment_variables
+from vllm.utils.system_utils import update_environment_variables

 from ..models.registry import HF_EXAMPLE_MODELS
 from ..utils import (
--- a/tests/compile/test_fusion_all_reduce.py
+++ b/tests/compile/test_fusion_all_reduce.py
@ -31,7 +31,7 @@ from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
    GroupShape,
 )
 from vllm.platforms import current_platform
-from vllm.utils import update_environment_variables
+from vllm.utils.system_utils import update_environment_variables

 from ..utils import has_module_attribute, multi_gpu_test
 from .backend import TestBackend
--- a/tests/compile/test_sequence_parallelism.py
+++ b/tests/compile/test_sequence_parallelism.py
@ -29,7 +29,7 @@ from vllm.distributed.parallel_state import (
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.quantization.utils.w8a8_utils import Fp8LinearOp
 from vllm.platforms import current_platform
-from vllm.utils import update_environment_variables
+from vllm.utils.system_utils import update_environment_variables

 from ..utils import multi_gpu_test
 from .backend import TestBackend
--- a/tests/distributed/test_eplb_execute.py
+++ b/tests/distributed/test_eplb_execute.py
@ -15,7 +15,7 @@ from vllm.distributed.parallel_state import (
    get_tp_group,
    init_distributed_environment,
 )
-from vllm.utils import update_environment_variables
+from vllm.utils.system_utils import update_environment_variables


 def distributed_run(fn, world_size):
--- a/tests/distributed/test_nccl_symm_mem_allreduce.py
+++ b/tests/distributed/test_nccl_symm_mem_allreduce.py
@ -23,7 +23,7 @@ from vllm.distributed.parallel_state import (
    initialize_model_parallel,
 )
 from vllm.platforms import current_platform
-from vllm.utils import update_environment_variables
+from vllm.utils.system_utils import update_environment_variables

 torch.manual_seed(42)
 random.seed(44)
--- a/tests/distributed/test_pynccl.py
+++ b/tests/distributed/test_pynccl.py
@ -18,7 +18,7 @@ from vllm.distributed.parallel_state import (
    graph_capture,
    init_distributed_environment,
 )
-from vllm.utils import update_environment_variables
+from vllm.utils.system_utils import update_environment_variables


 def distributed_run(fn, world_size):
--- a/tests/distributed/test_shm_broadcast.py
+++ b/tests/distributed/test_shm_broadcast.py
@ -10,8 +10,8 @@ import torch.distributed as dist

 from vllm.distributed.device_communicators.shm_broadcast import MessageQueue
 from vllm.distributed.utils import StatelessProcessGroup
-from vllm.utils import update_environment_variables
 from vllm.utils.network_utils import get_open_port
+from vllm.utils.system_utils import update_environment_variables


 def get_arrays(n: int, seed: int = 0) -> list[np.ndarray]:
--- a/tests/distributed/test_symm_mem_allreduce.py
+++ b/tests/distributed/test_symm_mem_allreduce.py
@ -23,7 +23,7 @@ from vllm.distributed.parallel_state import (
 from vllm.engine.arg_utils import EngineArgs
 from vllm.engine.llm_engine import LLMEngine
 from vllm.platforms import current_platform
-from vllm.utils import update_environment_variables
+from vllm.utils.system_utils import update_environment_variables

 torch.manual_seed(42)
 random.seed(44)
--- a/tests/distributed/test_utils.py
+++ b/tests/distributed/test_utils.py
@ -10,8 +10,8 @@ import torch
 import vllm.envs as envs
 from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator
 from vllm.distributed.utils import StatelessProcessGroup
-from vllm.utils import update_environment_variables
 from vllm.utils.network_utils import get_open_port
+from vllm.utils.system_utils import update_environment_variables
 from vllm.utils.torch_utils import cuda_device_count_stateless

 from ..utils import multi_gpu_test
--- a/tests/kernels/mamba/test_mamba_mixer2.py
+++ b/tests/kernels/mamba/test_mamba_mixer2.py
@ -13,7 +13,7 @@ from vllm.distributed.parallel_state import (
 )
 from vllm.model_executor.layers.mamba.mamba_mixer2 import Mixer2RMSNormGated
 from vllm.platforms import current_platform
-from vllm.utils import update_environment_variables
+from vllm.utils.system_utils import update_environment_variables


@multi_gpu_test(num_gpus=2)
--- a/tests/models/test_vision.py
+++ b/tests/models/test_vision.py
@ -19,8 +19,8 @@ from vllm.model_executor.models.vision import (
    run_dp_sharded_vision_model,
 )
 from vllm.platforms import current_platform
-from vllm.utils import update_environment_variables
 from vllm.utils.network_utils import get_open_port
+from vllm.utils.system_utils import update_environment_variables

 pytestmark = pytest.mark.cpu_test

--- a/tests/utils_/test_system_utils.py
+++ b/tests/utils_/test_system_utils.py
@ -0,0 +1,19 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import tempfile
+from pathlib import Path
+
+from vllm.utils.system_utils import unique_filepath
+
+
+def test_unique_filepath():
+    temp_dir = tempfile.mkdtemp()
+    path_fn = lambda i: Path(temp_dir) / f"file_{i}.txt"
+    paths = set()
+    for i in range(10):
+        path = unique_filepath(path_fn)
+        path.write_text("test")
+        paths.add(path)
+    assert len(paths) == 10
+    assert len(list(Path(temp_dir).glob("*.txt"))) == 10
--- a/tests/utils_/test_utils.py
+++ b/tests/utils_/test_utils.py
@ -19,7 +19,6 @@ from vllm.transformers_utils.detokenizer_utils import convert_ids_list_to_tokens
 from vllm.utils import (
    FlexibleArgumentParser,
    bind_kv_cache,
-    unique_filepath,
 )
 from ..utils import create_new_process_for_each_test, flat_product

@ -466,18 +465,6 @@ def test_load_config_file(tmp_path):
    os.remove(str(config_file_path))


-def test_unique_filepath():
-    temp_dir = tempfile.mkdtemp()
-    path_fn = lambda i: Path(temp_dir) / f"file_{i}.txt"
-    paths = set()
-    for i in range(10):
-        path = unique_filepath(path_fn)
-        path.write_text("test")
-        paths.add(path)
-    assert len(paths) == 10
-    assert len(list(Path(temp_dir).glob("*.txt"))) == 10
-
-
 def test_flat_product():
    # Check regular itertools.product behavior
    result1 = list(flat_product([1, 2, 3], ["a", "b"]))
--- a/tests/v1/worker/test_gpu_model_runner.py
+++ b/tests/v1/worker/test_gpu_model_runner.py
@ -21,8 +21,8 @@ from vllm.distributed.parallel_state import (
 from vllm.model_executor.layers.mamba.mamba_mixer2 import MambaMixer2
 from vllm.platforms import current_platform
 from vllm.sampling_params import SamplingParams
-from vllm.utils import update_environment_variables
 from vllm.utils.mem_constants import GiB_bytes
+from vllm.utils.system_utils import update_environment_variables
 from vllm.v1.core.kv_cache_utils import estimate_max_model_len, get_kv_cache_configs
 from vllm.v1.core.sched.output import CachedRequestData, NewRequestData, SchedulerOutput
 from vllm.v1.kv_cache_interface import (
--- a/vllm/compilation/pass_manager.py
+++ b/vllm/compilation/pass_manager.py
@ -8,7 +8,7 @@ from vllm import envs
 from vllm.config import VllmConfig, set_current_vllm_config
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
-from vllm.utils import set_env_var
+from vllm.utils.system_utils import set_env_var

 from .post_cleanup import PostCleanupPass
 from .vllm_inductor_pass import VllmInductorPass
--- a/vllm/compilation/vllm_inductor_pass.py
+++ b/vllm/compilation/vllm_inductor_pass.py
@ -114,7 +114,7 @@ class VllmPatternMatcherPass(VllmInductorPass):

        debug_dump_path.mkdir(parents=True, exist_ok=True)

-        from vllm.utils import unique_filepath
+        from vllm.utils.system_utils import unique_filepath

        file_path = unique_filepath(
            lambda i: debug_dump_path / f"patterns.{self.pass_name}.{i}.py"
--- a/vllm/distributed/device_communicators/all_reduce_utils.py
+++ b/vllm/distributed/device_communicators/all_reduce_utils.py
@ -22,7 +22,7 @@ from vllm.logger import init_logger
 from vllm.model_executor.layers.batch_invariant import (
    vllm_is_batch_invariant,
 )
-from vllm.utils import update_environment_variables
+from vllm.utils.system_utils import update_environment_variables
 from vllm.utils.torch_utils import cuda_device_count_stateless

 logger = init_logger(__name__)
--- a/vllm/entrypoints/cli/serve.py
+++ b/vllm/entrypoints/cli/serve.py
@ -18,12 +18,9 @@ from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_se
 from vllm.entrypoints.utils import VLLM_SUBCMD_PARSER_EPILOG
 from vllm.logger import init_logger
 from vllm.usage.usage_lib import UsageContext
-from vllm.utils import (
-    FlexibleArgumentParser,
-    decorate_logs,
-    set_process_title,
-)
+from vllm.utils import FlexibleArgumentParser
 from vllm.utils.network_utils import get_tcp_uri
+from vllm.utils.system_utils import decorate_logs, set_process_title
 from vllm.v1.engine.core import EngineCoreProc
 from vllm.v1.engine.utils import CoreEngineProcManager, launch_core_engines
 from vllm.v1.executor import Executor
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@ -108,13 +108,9 @@ from vllm.entrypoints.utils import (
 from vllm.logger import init_logger
 from vllm.reasoning import ReasoningParserManager
 from vllm.usage.usage_lib import UsageContext
-from vllm.utils import (
-    Device,
-    FlexibleArgumentParser,
-    decorate_logs,
-    set_ulimit,
-)
+from vllm.utils import Device, FlexibleArgumentParser, set_ulimit
 from vllm.utils.network_utils import is_valid_ipv6_address
+from vllm.utils.system_utils import decorate_logs
 from vllm.v1.engine.exceptions import EngineDeadError
 from vllm.v1.metrics.prometheus import get_prometheus_registry
 from vllm.version import __version__ as VLLM_VERSION
--- a/vllm/utils/init.py
+++ b/vllm/utils/init.py
@ -5,6 +5,7 @@ import contextlib
 import datetime
 import enum
 import getpass
+import importlib.util
 import inspect
 import json
 import multiprocessing
@ -33,13 +34,11 @@ from collections.abc import (
 )
 from concurrent.futures.process import ProcessPoolExecutor
 from functools import cache, partial, wraps
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, TextIO, TypeVar
+from typing import TYPE_CHECKING, Any, TypeVar

 import cloudpickle
 import psutil
 import regex as re
-import setproctitle
 import torch
 import yaml

@ -144,18 +143,6 @@ def random_uuid() -> str:
    return str(uuid.uuid4().hex)


-def update_environment_variables(envs: dict[str, str]):
-    for k, v in envs.items():
-        if k in os.environ and os.environ[k] != v:
-            logger.warning(
-                "Overwriting environment variable %s from '%s' to '%s'",
-                k,
-                os.environ[k],
-                v,
-            )
-        os.environ[k] = v
-
-
 def cdiv(a: int, b: int) -> int:
    """Ceiling division."""
    return -(a // -b)
@ -1061,70 +1048,44 @@ def check_use_alibi(model_config: ModelConfig) -> bool:
    )


-def set_process_title(
-    name: str, suffix: str = "", prefix: str = envs.VLLM_PROCESS_NAME_PREFIX
-) -> None:
+@cache
+def _has_module(module_name: str) -> bool:
+    """Return True if *module_name* can be found in the current environment.
+
+    The result is cached so that subsequent queries for the same module incur
+    no additional overhead.
    """
-    Set the current process title to a specific name with an
-    optional suffix.
-
-    Args:
-        name: The title to assign to the current process.
-        suffix: An optional suffix to append to the base name.
-        prefix: A prefix to prepend to the front separated by `::`.
-    """
-    if suffix:
-        name = f"{name}_{suffix}"
-    setproctitle.setproctitle(f"{prefix}::{name}")
+    return importlib.util.find_spec(module_name) is not None


-def _add_prefix(file: TextIO, worker_name: str, pid: int) -> None:
-    """Prepend each output line with process-specific prefix"""
+def has_pplx() -> bool:
+    """Whether the optional `pplx_kernels` package is available."""

-    prefix = f"{CYAN}({worker_name} pid={pid}){RESET} "
-    file_write = file.write
-
-    def write_with_prefix(s: str):
-        if not s:
-            return
-        if file.start_new_line:  # type: ignore[attr-defined]
-            file_write(prefix)
-        idx = 0
-        while (next_idx := s.find("\n", idx)) != -1:
-            next_idx += 1
-            file_write(s[idx:next_idx])
-            if next_idx == len(s):
-                file.start_new_line = True  # type: ignore[attr-defined]
-                return
-            file_write(prefix)
-            idx = next_idx
-        file_write(s[idx:])
-        file.start_new_line = False  # type: ignore[attr-defined]
-
-    file.start_new_line = True  # type: ignore[attr-defined]
-    file.write = write_with_prefix  # type: ignore[method-assign]
+    return _has_module("pplx_kernels")


-def decorate_logs(process_name: str | None = None) -> None:
-    """
-    Adds a process-specific prefix to each line of output written to stdout and
-    stderr.
+def has_deep_ep() -> bool:
+    """Whether the optional `deep_ep` package is available."""

-    This function is intended to be called before initializing the api_server,
-    engine_core, or worker classes, so that all subsequent output from the
-    process is prefixed with the process name and PID. This helps distinguish
-    log output from different processes in multi-process environments.
+    return _has_module("deep_ep")

-    Args:
-        process_name: Optional; the name of the process to use in the prefix.
-            If not provided, the current process name from the multiprocessing
-            context is used.
-    """
-    if process_name is None:
-        process_name = get_mp_context().current_process().name
-    pid = os.getpid()
-    _add_prefix(sys.stdout, process_name, pid)
-    _add_prefix(sys.stderr, process_name, pid)
+
+def has_deep_gemm() -> bool:
+    """Whether the optional `deep_gemm` package is available."""
+
+    return _has_module("deep_gemm")
+
+
+def has_triton_kernels() -> bool:
+    """Whether the optional `triton_kernels` package is available."""
+
+    return _has_module("triton_kernels")
+
+
+def has_tilelang() -> bool:
+    """Whether the optional `tilelang` package is available."""
+
+    return _has_module("tilelang")


 def length_from_prompt_token_ids_or_embeds(
@ -1149,36 +1110,3 @@ def length_from_prompt_token_ids_or_embeds(
                f" prompt_embeds={prompt_embeds_len}"
            )
        return prompt_token_len
-
-
-@contextlib.contextmanager
-def set_env_var(key, value):
-    old = os.environ.get(key)
-    os.environ[key] = value
-    try:
-        yield
-    finally:
-        if old is None:
-            del os.environ[key]
-        else:
-            os.environ[key] = old
-
-
-def unique_filepath(fn: Callable[[int], Path]) -> Path:
-    """
-    unique_filepath returns a unique path by trying
-    to include an integer in increasing order.
-
-    fn should be a callable that returns a path that
-    includes the passed int at a fixed location.
-
-    Note: This function has a TOCTOU race condition.
-    Caller should use atomic operations (e.g., open with 'x' mode)
-    when creating the file to ensure thread safety.
-    """
-    i = 0
-    while True:
-        p = fn(i)
-        if not p.exists():
-            return p
-        i += 1
--- a/vllm/utils/system_utils.py
+++ b/vllm/utils/system_utils.py
@ -0,0 +1,123 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from __future__ import annotations
+
+import contextlib
+import os
+import sys
+from collections.abc import Callable, Iterator
+from pathlib import Path
+from typing import TextIO
+
+try:
+    import setproctitle
+except ImportError:
+    setproctitle = None  # type: ignore[assignment]
+
+import vllm.envs as envs
+from vllm.logger import init_logger
+
+logger = init_logger(__name__)
+
+CYAN = "\033[1;36m"
+RESET = "\033[0;0m"
+
+
+# Environment variable utilities
+
+
+def update_environment_variables(envs_dict: dict[str, str]):
+    """Update multiple environment variables with logging."""
+    for k, v in envs_dict.items():
+        if k in os.environ and os.environ[k] != v:
+            logger.warning(
+                "Overwriting environment variable %s from '%s' to '%s'",
+                k,
+                os.environ[k],
+                v,
+            )
+        os.environ[k] = v
+
+
+@contextlib.contextmanager
+def set_env_var(key: str, value: str) -> Iterator[None]:
+    """Temporarily set an environment variable."""
+    old = os.environ.get(key)
+    os.environ[key] = value
+    try:
+        yield
+    finally:
+        if old is None:
+            os.environ.pop(key, None)
+        else:
+            os.environ[key] = old
+
+
+# File path utilities
+
+
+def unique_filepath(fn: Callable[[int], Path]) -> Path:
+    """Generate a unique file path by trying incrementing integers.
+
+    Note: This function has a TOCTOU race condition.
+    Caller should use atomic operations (e.g., open with 'x' mode)
+    when creating the file to ensure thread safety.
+    """
+    i = 0
+    while True:
+        p = fn(i)
+        if not p.exists():
+            return p
+        i += 1
+
+
+# Process management utilities
+
+
+def set_process_title(
+    name: str, suffix: str = "", prefix: str = envs.VLLM_PROCESS_NAME_PREFIX
+) -> None:
+    """Set the current process title with optional suffix."""
+    if setproctitle is None:
+        return
+    if suffix:
+        name = f"{name}_{suffix}"
+    setproctitle.setproctitle(f"{prefix}::{name}")
+
+
+def _add_prefix(file: TextIO, worker_name: str, pid: int) -> None:
+    """Add colored prefix to file output for log decoration."""
+    prefix = f"{CYAN}({worker_name} pid={pid}){RESET} "
+    file_write = file.write
+
+    def write_with_prefix(s: str):
+        if not s:
+            return
+        if file.start_new_line:  # type: ignore[attr-defined]
+            file_write(prefix)
+        idx = 0
+        while (next_idx := s.find("\n", idx)) != -1:
+            next_idx += 1
+            file_write(s[idx:next_idx])
+            if next_idx == len(s):
+                file.start_new_line = True  # type: ignore[attr-defined]
+                return
+            file_write(prefix)
+            idx = next_idx
+        file_write(s[idx:])
+        file.start_new_line = False  # type: ignore[attr-defined]
+
+    file.start_new_line = True  # type: ignore[attr-defined]
+    file.write = write_with_prefix  # type: ignore[method-assign]
+
+
+def decorate_logs(process_name: str | None = None) -> None:
+    """Decorate stdout/stderr with process name and PID prefix."""
+    from vllm.utils import get_mp_context
+
+    if process_name is None:
+        process_name = get_mp_context().current_process().name
+    pid = os.getpid()
+    _add_prefix(sys.stdout, process_name, pid)
+    _add_prefix(sys.stderr, process_name, pid)
--- a/vllm/v1/engine/coordinator.py
+++ b/vllm/v1/engine/coordinator.py
@ -10,8 +10,9 @@ import zmq

 from vllm.config import ParallelConfig
 from vllm.logger import init_logger
-from vllm.utils import get_mp_context, set_process_title
+from vllm.utils import get_mp_context
 from vllm.utils.network_utils import make_zmq_socket
+from vllm.utils.system_utils import set_process_title
 from vllm.v1.engine import EngineCoreOutputs, EngineCoreRequestType
 from vllm.v1.serial_utils import MsgpackDecoder
 from vllm.v1.utils import get_engine_client_zmq_addr, shutdown
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@ -28,14 +28,11 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.multimodal.cache import engine_receiver_cache_from_config
 from vllm.tasks import POOLING_TASKS, SupportedTask
 from vllm.transformers_utils.config import maybe_register_config_serialize_by_value
-from vllm.utils import (
-    decorate_logs,
-    set_process_title,
-)
 from vllm.utils.gc_utils import maybe_attach_gc_debug_callback
 from vllm.utils.hashing import get_hash_fn_by_name
 from vllm.utils.import_utils import resolve_obj_by_qualname
 from vllm.utils.network_utils import make_zmq_socket
+from vllm.utils.system_utils import decorate_logs, set_process_title
 from vllm.v1.core.kv_cache_utils import (
    BlockHash,
    generate_scheduler_kv_cache_config,
--- a/vllm/v1/executor/multiproc_executor.py
+++ b/vllm/v1/executor/multiproc_executor.py
@ -35,17 +35,13 @@ from vllm.distributed.parallel_state import (
 )
 from vllm.envs import enable_envs_cache
 from vllm.logger import init_logger
-from vllm.utils import (
-    _maybe_force_spawn,
-    decorate_logs,
-    get_mp_context,
-    set_process_title,
-)
+from vllm.utils import _maybe_force_spawn, get_mp_context
 from vllm.utils.network_utils import (
    get_distributed_init_method,
    get_loopback_ip,
    get_open_port,
 )
+from vllm.utils.system_utils import decorate_logs, set_process_title
 from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.executor.abstract import Executor, FailureCallback
 from vllm.v1.outputs import AsyncModelRunnerOutput, DraftTokenIds, ModelRunnerOutput
--- a/vllm/v1/worker/worker_base.py
+++ b/vllm/v1/worker/worker_base.py
@ -16,10 +16,10 @@ from vllm.multimodal.cache import worker_receiver_cache_from_config
 from vllm.utils import (
    enable_trace_function_call_for_thread,
    run_method,
-    update_environment_variables,
    warn_for_unimplemented_methods,
 )
 from vllm.utils.import_utils import resolve_obj_by_qualname
+from vllm.utils.system_utils import update_environment_variables
 from vllm.v1.kv_cache_interface import KVCacheSpec

 if TYPE_CHECKING: