[Chore] Separate out system utilities from vllm.utils (#27201)

Signed-off-by: dongbo910220 <1275604947@qq.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
dongbo910220 2025-10-23 04:25:25 +08:00 committed by GitHub
parent 5beacce2ea
commit a0003b56b0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
25 changed files with 199 additions and 155 deletions

View File

@ -25,7 +25,7 @@ from vllm.distributed.parallel_state import (
initialize_model_parallel,
)
from vllm.platforms import current_platform
from vllm.utils import update_environment_variables
from vllm.utils.system_utils import update_environment_variables
from ..models.registry import HF_EXAMPLE_MODELS
from ..utils import (

View File

@ -31,7 +31,7 @@ from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
GroupShape,
)
from vllm.platforms import current_platform
from vllm.utils import update_environment_variables
from vllm.utils.system_utils import update_environment_variables
from ..utils import has_module_attribute, multi_gpu_test
from .backend import TestBackend

View File

@ -29,7 +29,7 @@ from vllm.distributed.parallel_state import (
from vllm.model_executor.layers.layernorm import RMSNorm
from vllm.model_executor.layers.quantization.utils.w8a8_utils import Fp8LinearOp
from vllm.platforms import current_platform
from vllm.utils import update_environment_variables
from vllm.utils.system_utils import update_environment_variables
from ..utils import multi_gpu_test
from .backend import TestBackend

View File

@ -15,7 +15,7 @@ from vllm.distributed.parallel_state import (
get_tp_group,
init_distributed_environment,
)
from vllm.utils import update_environment_variables
from vllm.utils.system_utils import update_environment_variables
def distributed_run(fn, world_size):

View File

@ -23,7 +23,7 @@ from vllm.distributed.parallel_state import (
initialize_model_parallel,
)
from vllm.platforms import current_platform
from vllm.utils import update_environment_variables
from vllm.utils.system_utils import update_environment_variables
torch.manual_seed(42)
random.seed(44)

View File

@ -18,7 +18,7 @@ from vllm.distributed.parallel_state import (
graph_capture,
init_distributed_environment,
)
from vllm.utils import update_environment_variables
from vllm.utils.system_utils import update_environment_variables
def distributed_run(fn, world_size):

View File

@ -10,8 +10,8 @@ import torch.distributed as dist
from vllm.distributed.device_communicators.shm_broadcast import MessageQueue
from vllm.distributed.utils import StatelessProcessGroup
from vllm.utils import update_environment_variables
from vllm.utils.network_utils import get_open_port
from vllm.utils.system_utils import update_environment_variables
def get_arrays(n: int, seed: int = 0) -> list[np.ndarray]:

View File

@ -23,7 +23,7 @@ from vllm.distributed.parallel_state import (
from vllm.engine.arg_utils import EngineArgs
from vllm.engine.llm_engine import LLMEngine
from vllm.platforms import current_platform
from vllm.utils import update_environment_variables
from vllm.utils.system_utils import update_environment_variables
torch.manual_seed(42)
random.seed(44)

View File

@ -10,8 +10,8 @@ import torch
import vllm.envs as envs
from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator
from vllm.distributed.utils import StatelessProcessGroup
from vllm.utils import update_environment_variables
from vllm.utils.network_utils import get_open_port
from vllm.utils.system_utils import update_environment_variables
from vllm.utils.torch_utils import cuda_device_count_stateless
from ..utils import multi_gpu_test

View File

@ -13,7 +13,7 @@ from vllm.distributed.parallel_state import (
)
from vllm.model_executor.layers.mamba.mamba_mixer2 import Mixer2RMSNormGated
from vllm.platforms import current_platform
from vllm.utils import update_environment_variables
from vllm.utils.system_utils import update_environment_variables
@multi_gpu_test(num_gpus=2)

View File

@ -19,8 +19,8 @@ from vllm.model_executor.models.vision import (
run_dp_sharded_vision_model,
)
from vllm.platforms import current_platform
from vllm.utils import update_environment_variables
from vllm.utils.network_utils import get_open_port
from vllm.utils.system_utils import update_environment_variables
pytestmark = pytest.mark.cpu_test

View File

@ -0,0 +1,19 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import tempfile
from pathlib import Path
from vllm.utils.system_utils import unique_filepath
def test_unique_filepath():
temp_dir = tempfile.mkdtemp()
path_fn = lambda i: Path(temp_dir) / f"file_{i}.txt"
paths = set()
for i in range(10):
path = unique_filepath(path_fn)
path.write_text("test")
paths.add(path)
assert len(paths) == 10
assert len(list(Path(temp_dir).glob("*.txt"))) == 10

View File

@ -19,7 +19,6 @@ from vllm.transformers_utils.detokenizer_utils import convert_ids_list_to_tokens
from vllm.utils import (
FlexibleArgumentParser,
bind_kv_cache,
unique_filepath,
)
from ..utils import create_new_process_for_each_test, flat_product
@ -466,18 +465,6 @@ def test_load_config_file(tmp_path):
os.remove(str(config_file_path))
def test_unique_filepath():
temp_dir = tempfile.mkdtemp()
path_fn = lambda i: Path(temp_dir) / f"file_{i}.txt"
paths = set()
for i in range(10):
path = unique_filepath(path_fn)
path.write_text("test")
paths.add(path)
assert len(paths) == 10
assert len(list(Path(temp_dir).glob("*.txt"))) == 10
def test_flat_product():
# Check regular itertools.product behavior
result1 = list(flat_product([1, 2, 3], ["a", "b"]))

View File

@ -21,8 +21,8 @@ from vllm.distributed.parallel_state import (
from vllm.model_executor.layers.mamba.mamba_mixer2 import MambaMixer2
from vllm.platforms import current_platform
from vllm.sampling_params import SamplingParams
from vllm.utils import update_environment_variables
from vllm.utils.mem_constants import GiB_bytes
from vllm.utils.system_utils import update_environment_variables
from vllm.v1.core.kv_cache_utils import estimate_max_model_len, get_kv_cache_configs
from vllm.v1.core.sched.output import CachedRequestData, NewRequestData, SchedulerOutput
from vllm.v1.kv_cache_interface import (

View File

@ -8,7 +8,7 @@ from vllm import envs
from vllm.config import VllmConfig, set_current_vllm_config
from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.utils import set_env_var
from vllm.utils.system_utils import set_env_var
from .post_cleanup import PostCleanupPass
from .vllm_inductor_pass import VllmInductorPass

View File

@ -114,7 +114,7 @@ class VllmPatternMatcherPass(VllmInductorPass):
debug_dump_path.mkdir(parents=True, exist_ok=True)
from vllm.utils import unique_filepath
from vllm.utils.system_utils import unique_filepath
file_path = unique_filepath(
lambda i: debug_dump_path / f"patterns.{self.pass_name}.{i}.py"

View File

@ -22,7 +22,7 @@ from vllm.logger import init_logger
from vllm.model_executor.layers.batch_invariant import (
vllm_is_batch_invariant,
)
from vllm.utils import update_environment_variables
from vllm.utils.system_utils import update_environment_variables
from vllm.utils.torch_utils import cuda_device_count_stateless
logger = init_logger(__name__)

View File

@ -18,12 +18,9 @@ from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_se
from vllm.entrypoints.utils import VLLM_SUBCMD_PARSER_EPILOG
from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext
from vllm.utils import (
FlexibleArgumentParser,
decorate_logs,
set_process_title,
)
from vllm.utils import FlexibleArgumentParser
from vllm.utils.network_utils import get_tcp_uri
from vllm.utils.system_utils import decorate_logs, set_process_title
from vllm.v1.engine.core import EngineCoreProc
from vllm.v1.engine.utils import CoreEngineProcManager, launch_core_engines
from vllm.v1.executor import Executor

View File

@ -108,13 +108,9 @@ from vllm.entrypoints.utils import (
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParserManager
from vllm.usage.usage_lib import UsageContext
from vllm.utils import (
Device,
FlexibleArgumentParser,
decorate_logs,
set_ulimit,
)
from vllm.utils import Device, FlexibleArgumentParser, set_ulimit
from vllm.utils.network_utils import is_valid_ipv6_address
from vllm.utils.system_utils import decorate_logs
from vllm.v1.engine.exceptions import EngineDeadError
from vllm.v1.metrics.prometheus import get_prometheus_registry
from vllm.version import __version__ as VLLM_VERSION

View File

@ -5,6 +5,7 @@ import contextlib
import datetime
import enum
import getpass
import importlib.util
import inspect
import json
import multiprocessing
@ -33,13 +34,11 @@ from collections.abc import (
)
from concurrent.futures.process import ProcessPoolExecutor
from functools import cache, partial, wraps
from pathlib import Path
from typing import TYPE_CHECKING, Any, TextIO, TypeVar
from typing import TYPE_CHECKING, Any, TypeVar
import cloudpickle
import psutil
import regex as re
import setproctitle
import torch
import yaml
@ -144,18 +143,6 @@ def random_uuid() -> str:
return str(uuid.uuid4().hex)
def update_environment_variables(envs: dict[str, str]):
for k, v in envs.items():
if k in os.environ and os.environ[k] != v:
logger.warning(
"Overwriting environment variable %s from '%s' to '%s'",
k,
os.environ[k],
v,
)
os.environ[k] = v
def cdiv(a: int, b: int) -> int:
"""Ceiling division."""
return -(a // -b)
@ -1061,70 +1048,44 @@ def check_use_alibi(model_config: ModelConfig) -> bool:
)
def set_process_title(
name: str, suffix: str = "", prefix: str = envs.VLLM_PROCESS_NAME_PREFIX
) -> None:
@cache
def _has_module(module_name: str) -> bool:
"""Return True if *module_name* can be found in the current environment.
The result is cached so that subsequent queries for the same module incur
no additional overhead.
"""
Set the current process title to a specific name with an
optional suffix.
Args:
name: The title to assign to the current process.
suffix: An optional suffix to append to the base name.
prefix: A prefix to prepend to the front separated by `::`.
"""
if suffix:
name = f"{name}_{suffix}"
setproctitle.setproctitle(f"{prefix}::{name}")
return importlib.util.find_spec(module_name) is not None
def _add_prefix(file: TextIO, worker_name: str, pid: int) -> None:
"""Prepend each output line with process-specific prefix"""
def has_pplx() -> bool:
"""Whether the optional `pplx_kernels` package is available."""
prefix = f"{CYAN}({worker_name} pid={pid}){RESET} "
file_write = file.write
def write_with_prefix(s: str):
if not s:
return
if file.start_new_line: # type: ignore[attr-defined]
file_write(prefix)
idx = 0
while (next_idx := s.find("\n", idx)) != -1:
next_idx += 1
file_write(s[idx:next_idx])
if next_idx == len(s):
file.start_new_line = True # type: ignore[attr-defined]
return
file_write(prefix)
idx = next_idx
file_write(s[idx:])
file.start_new_line = False # type: ignore[attr-defined]
file.start_new_line = True # type: ignore[attr-defined]
file.write = write_with_prefix # type: ignore[method-assign]
return _has_module("pplx_kernels")
def decorate_logs(process_name: str | None = None) -> None:
"""
Adds a process-specific prefix to each line of output written to stdout and
stderr.
def has_deep_ep() -> bool:
"""Whether the optional `deep_ep` package is available."""
This function is intended to be called before initializing the api_server,
engine_core, or worker classes, so that all subsequent output from the
process is prefixed with the process name and PID. This helps distinguish
log output from different processes in multi-process environments.
return _has_module("deep_ep")
Args:
process_name: Optional; the name of the process to use in the prefix.
If not provided, the current process name from the multiprocessing
context is used.
"""
if process_name is None:
process_name = get_mp_context().current_process().name
pid = os.getpid()
_add_prefix(sys.stdout, process_name, pid)
_add_prefix(sys.stderr, process_name, pid)
def has_deep_gemm() -> bool:
"""Whether the optional `deep_gemm` package is available."""
return _has_module("deep_gemm")
def has_triton_kernels() -> bool:
"""Whether the optional `triton_kernels` package is available."""
return _has_module("triton_kernels")
def has_tilelang() -> bool:
"""Whether the optional `tilelang` package is available."""
return _has_module("tilelang")
def length_from_prompt_token_ids_or_embeds(
@ -1149,36 +1110,3 @@ def length_from_prompt_token_ids_or_embeds(
f" prompt_embeds={prompt_embeds_len}"
)
return prompt_token_len
@contextlib.contextmanager
def set_env_var(key, value):
old = os.environ.get(key)
os.environ[key] = value
try:
yield
finally:
if old is None:
del os.environ[key]
else:
os.environ[key] = old
def unique_filepath(fn: Callable[[int], Path]) -> Path:
"""
unique_filepath returns a unique path by trying
to include an integer in increasing order.
fn should be a callable that returns a path that
includes the passed int at a fixed location.
Note: This function has a TOCTOU race condition.
Caller should use atomic operations (e.g., open with 'x' mode)
when creating the file to ensure thread safety.
"""
i = 0
while True:
p = fn(i)
if not p.exists():
return p
i += 1

123
vllm/utils/system_utils.py Normal file
View File

@ -0,0 +1,123 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from __future__ import annotations
import contextlib
import os
import sys
from collections.abc import Callable, Iterator
from pathlib import Path
from typing import TextIO
try:
import setproctitle
except ImportError:
setproctitle = None # type: ignore[assignment]
import vllm.envs as envs
from vllm.logger import init_logger
logger = init_logger(__name__)
CYAN = "\033[1;36m"
RESET = "\033[0;0m"
# Environment variable utilities
def update_environment_variables(envs_dict: dict[str, str]):
"""Update multiple environment variables with logging."""
for k, v in envs_dict.items():
if k in os.environ and os.environ[k] != v:
logger.warning(
"Overwriting environment variable %s from '%s' to '%s'",
k,
os.environ[k],
v,
)
os.environ[k] = v
@contextlib.contextmanager
def set_env_var(key: str, value: str) -> Iterator[None]:
"""Temporarily set an environment variable."""
old = os.environ.get(key)
os.environ[key] = value
try:
yield
finally:
if old is None:
os.environ.pop(key, None)
else:
os.environ[key] = old
# File path utilities
def unique_filepath(fn: Callable[[int], Path]) -> Path:
"""Generate a unique file path by trying incrementing integers.
Note: This function has a TOCTOU race condition.
Caller should use atomic operations (e.g., open with 'x' mode)
when creating the file to ensure thread safety.
"""
i = 0
while True:
p = fn(i)
if not p.exists():
return p
i += 1
# Process management utilities
def set_process_title(
name: str, suffix: str = "", prefix: str = envs.VLLM_PROCESS_NAME_PREFIX
) -> None:
"""Set the current process title with optional suffix."""
if setproctitle is None:
return
if suffix:
name = f"{name}_{suffix}"
setproctitle.setproctitle(f"{prefix}::{name}")
def _add_prefix(file: TextIO, worker_name: str, pid: int) -> None:
"""Add colored prefix to file output for log decoration."""
prefix = f"{CYAN}({worker_name} pid={pid}){RESET} "
file_write = file.write
def write_with_prefix(s: str):
if not s:
return
if file.start_new_line: # type: ignore[attr-defined]
file_write(prefix)
idx = 0
while (next_idx := s.find("\n", idx)) != -1:
next_idx += 1
file_write(s[idx:next_idx])
if next_idx == len(s):
file.start_new_line = True # type: ignore[attr-defined]
return
file_write(prefix)
idx = next_idx
file_write(s[idx:])
file.start_new_line = False # type: ignore[attr-defined]
file.start_new_line = True # type: ignore[attr-defined]
file.write = write_with_prefix # type: ignore[method-assign]
def decorate_logs(process_name: str | None = None) -> None:
"""Decorate stdout/stderr with process name and PID prefix."""
from vllm.utils import get_mp_context
if process_name is None:
process_name = get_mp_context().current_process().name
pid = os.getpid()
_add_prefix(sys.stdout, process_name, pid)
_add_prefix(sys.stderr, process_name, pid)

View File

@ -10,8 +10,9 @@ import zmq
from vllm.config import ParallelConfig
from vllm.logger import init_logger
from vllm.utils import get_mp_context, set_process_title
from vllm.utils import get_mp_context
from vllm.utils.network_utils import make_zmq_socket
from vllm.utils.system_utils import set_process_title
from vllm.v1.engine import EngineCoreOutputs, EngineCoreRequestType
from vllm.v1.serial_utils import MsgpackDecoder
from vllm.v1.utils import get_engine_client_zmq_addr, shutdown

View File

@ -28,14 +28,11 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.cache import engine_receiver_cache_from_config
from vllm.tasks import POOLING_TASKS, SupportedTask
from vllm.transformers_utils.config import maybe_register_config_serialize_by_value
from vllm.utils import (
decorate_logs,
set_process_title,
)
from vllm.utils.gc_utils import maybe_attach_gc_debug_callback
from vllm.utils.hashing import get_hash_fn_by_name
from vllm.utils.import_utils import resolve_obj_by_qualname
from vllm.utils.network_utils import make_zmq_socket
from vllm.utils.system_utils import decorate_logs, set_process_title
from vllm.v1.core.kv_cache_utils import (
BlockHash,
generate_scheduler_kv_cache_config,

View File

@ -35,17 +35,13 @@ from vllm.distributed.parallel_state import (
)
from vllm.envs import enable_envs_cache
from vllm.logger import init_logger
from vllm.utils import (
_maybe_force_spawn,
decorate_logs,
get_mp_context,
set_process_title,
)
from vllm.utils import _maybe_force_spawn, get_mp_context
from vllm.utils.network_utils import (
get_distributed_init_method,
get_loopback_ip,
get_open_port,
)
from vllm.utils.system_utils import decorate_logs, set_process_title
from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.executor.abstract import Executor, FailureCallback
from vllm.v1.outputs import AsyncModelRunnerOutput, DraftTokenIds, ModelRunnerOutput

View File

@ -16,10 +16,10 @@ from vllm.multimodal.cache import worker_receiver_cache_from_config
from vllm.utils import (
enable_trace_function_call_for_thread,
run_method,
update_environment_variables,
warn_for_unimplemented_methods,
)
from vllm.utils.import_utils import resolve_obj_by_qualname
from vllm.utils.system_utils import update_environment_variables
from vllm.v1.kv_cache_interface import KVCacheSpec
if TYPE_CHECKING: