mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 02:34:56 +08:00
[Chore] Separate out system utilities from vllm.utils (#27201)
Signed-off-by: dongbo910220 <1275604947@qq.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
parent
5beacce2ea
commit
a0003b56b0
@ -25,7 +25,7 @@ from vllm.distributed.parallel_state import (
|
||||
initialize_model_parallel,
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils import update_environment_variables
|
||||
from vllm.utils.system_utils import update_environment_variables
|
||||
|
||||
from ..models.registry import HF_EXAMPLE_MODELS
|
||||
from ..utils import (
|
||||
|
||||
@ -31,7 +31,7 @@ from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
|
||||
GroupShape,
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils import update_environment_variables
|
||||
from vllm.utils.system_utils import update_environment_variables
|
||||
|
||||
from ..utils import has_module_attribute, multi_gpu_test
|
||||
from .backend import TestBackend
|
||||
|
||||
@ -29,7 +29,7 @@ from vllm.distributed.parallel_state import (
|
||||
from vllm.model_executor.layers.layernorm import RMSNorm
|
||||
from vllm.model_executor.layers.quantization.utils.w8a8_utils import Fp8LinearOp
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils import update_environment_variables
|
||||
from vllm.utils.system_utils import update_environment_variables
|
||||
|
||||
from ..utils import multi_gpu_test
|
||||
from .backend import TestBackend
|
||||
|
||||
@ -15,7 +15,7 @@ from vllm.distributed.parallel_state import (
|
||||
get_tp_group,
|
||||
init_distributed_environment,
|
||||
)
|
||||
from vllm.utils import update_environment_variables
|
||||
from vllm.utils.system_utils import update_environment_variables
|
||||
|
||||
|
||||
def distributed_run(fn, world_size):
|
||||
|
||||
@ -23,7 +23,7 @@ from vllm.distributed.parallel_state import (
|
||||
initialize_model_parallel,
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils import update_environment_variables
|
||||
from vllm.utils.system_utils import update_environment_variables
|
||||
|
||||
torch.manual_seed(42)
|
||||
random.seed(44)
|
||||
|
||||
@ -18,7 +18,7 @@ from vllm.distributed.parallel_state import (
|
||||
graph_capture,
|
||||
init_distributed_environment,
|
||||
)
|
||||
from vllm.utils import update_environment_variables
|
||||
from vllm.utils.system_utils import update_environment_variables
|
||||
|
||||
|
||||
def distributed_run(fn, world_size):
|
||||
|
||||
@ -10,8 +10,8 @@ import torch.distributed as dist
|
||||
|
||||
from vllm.distributed.device_communicators.shm_broadcast import MessageQueue
|
||||
from vllm.distributed.utils import StatelessProcessGroup
|
||||
from vllm.utils import update_environment_variables
|
||||
from vllm.utils.network_utils import get_open_port
|
||||
from vllm.utils.system_utils import update_environment_variables
|
||||
|
||||
|
||||
def get_arrays(n: int, seed: int = 0) -> list[np.ndarray]:
|
||||
|
||||
@ -23,7 +23,7 @@ from vllm.distributed.parallel_state import (
|
||||
from vllm.engine.arg_utils import EngineArgs
|
||||
from vllm.engine.llm_engine import LLMEngine
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils import update_environment_variables
|
||||
from vllm.utils.system_utils import update_environment_variables
|
||||
|
||||
torch.manual_seed(42)
|
||||
random.seed(44)
|
||||
|
||||
@ -10,8 +10,8 @@ import torch
|
||||
import vllm.envs as envs
|
||||
from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator
|
||||
from vllm.distributed.utils import StatelessProcessGroup
|
||||
from vllm.utils import update_environment_variables
|
||||
from vllm.utils.network_utils import get_open_port
|
||||
from vllm.utils.system_utils import update_environment_variables
|
||||
from vllm.utils.torch_utils import cuda_device_count_stateless
|
||||
|
||||
from ..utils import multi_gpu_test
|
||||
|
||||
@ -13,7 +13,7 @@ from vllm.distributed.parallel_state import (
|
||||
)
|
||||
from vllm.model_executor.layers.mamba.mamba_mixer2 import Mixer2RMSNormGated
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils import update_environment_variables
|
||||
from vllm.utils.system_utils import update_environment_variables
|
||||
|
||||
|
||||
@multi_gpu_test(num_gpus=2)
|
||||
|
||||
@ -19,8 +19,8 @@ from vllm.model_executor.models.vision import (
|
||||
run_dp_sharded_vision_model,
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils import update_environment_variables
|
||||
from vllm.utils.network_utils import get_open_port
|
||||
from vllm.utils.system_utils import update_environment_variables
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
19
tests/utils_/test_system_utils.py
Normal file
19
tests/utils_/test_system_utils.py
Normal file
@ -0,0 +1,19 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from vllm.utils.system_utils import unique_filepath
|
||||
|
||||
|
||||
def test_unique_filepath():
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
path_fn = lambda i: Path(temp_dir) / f"file_{i}.txt"
|
||||
paths = set()
|
||||
for i in range(10):
|
||||
path = unique_filepath(path_fn)
|
||||
path.write_text("test")
|
||||
paths.add(path)
|
||||
assert len(paths) == 10
|
||||
assert len(list(Path(temp_dir).glob("*.txt"))) == 10
|
||||
@ -19,7 +19,6 @@ from vllm.transformers_utils.detokenizer_utils import convert_ids_list_to_tokens
|
||||
from vllm.utils import (
|
||||
FlexibleArgumentParser,
|
||||
bind_kv_cache,
|
||||
unique_filepath,
|
||||
)
|
||||
from ..utils import create_new_process_for_each_test, flat_product
|
||||
|
||||
@ -466,18 +465,6 @@ def test_load_config_file(tmp_path):
|
||||
os.remove(str(config_file_path))
|
||||
|
||||
|
||||
def test_unique_filepath():
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
path_fn = lambda i: Path(temp_dir) / f"file_{i}.txt"
|
||||
paths = set()
|
||||
for i in range(10):
|
||||
path = unique_filepath(path_fn)
|
||||
path.write_text("test")
|
||||
paths.add(path)
|
||||
assert len(paths) == 10
|
||||
assert len(list(Path(temp_dir).glob("*.txt"))) == 10
|
||||
|
||||
|
||||
def test_flat_product():
|
||||
# Check regular itertools.product behavior
|
||||
result1 = list(flat_product([1, 2, 3], ["a", "b"]))
|
||||
|
||||
@ -21,8 +21,8 @@ from vllm.distributed.parallel_state import (
|
||||
from vllm.model_executor.layers.mamba.mamba_mixer2 import MambaMixer2
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.utils import update_environment_variables
|
||||
from vllm.utils.mem_constants import GiB_bytes
|
||||
from vllm.utils.system_utils import update_environment_variables
|
||||
from vllm.v1.core.kv_cache_utils import estimate_max_model_len, get_kv_cache_configs
|
||||
from vllm.v1.core.sched.output import CachedRequestData, NewRequestData, SchedulerOutput
|
||||
from vllm.v1.kv_cache_interface import (
|
||||
|
||||
@ -8,7 +8,7 @@ from vllm import envs
|
||||
from vllm.config import VllmConfig, set_current_vllm_config
|
||||
from vllm.logger import init_logger
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils import set_env_var
|
||||
from vllm.utils.system_utils import set_env_var
|
||||
|
||||
from .post_cleanup import PostCleanupPass
|
||||
from .vllm_inductor_pass import VllmInductorPass
|
||||
|
||||
@ -114,7 +114,7 @@ class VllmPatternMatcherPass(VllmInductorPass):
|
||||
|
||||
debug_dump_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
from vllm.utils import unique_filepath
|
||||
from vllm.utils.system_utils import unique_filepath
|
||||
|
||||
file_path = unique_filepath(
|
||||
lambda i: debug_dump_path / f"patterns.{self.pass_name}.{i}.py"
|
||||
|
||||
@ -22,7 +22,7 @@ from vllm.logger import init_logger
|
||||
from vllm.model_executor.layers.batch_invariant import (
|
||||
vllm_is_batch_invariant,
|
||||
)
|
||||
from vllm.utils import update_environment_variables
|
||||
from vllm.utils.system_utils import update_environment_variables
|
||||
from vllm.utils.torch_utils import cuda_device_count_stateless
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@ -18,12 +18,9 @@ from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_se
|
||||
from vllm.entrypoints.utils import VLLM_SUBCMD_PARSER_EPILOG
|
||||
from vllm.logger import init_logger
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils import (
|
||||
FlexibleArgumentParser,
|
||||
decorate_logs,
|
||||
set_process_title,
|
||||
)
|
||||
from vllm.utils import FlexibleArgumentParser
|
||||
from vllm.utils.network_utils import get_tcp_uri
|
||||
from vllm.utils.system_utils import decorate_logs, set_process_title
|
||||
from vllm.v1.engine.core import EngineCoreProc
|
||||
from vllm.v1.engine.utils import CoreEngineProcManager, launch_core_engines
|
||||
from vllm.v1.executor import Executor
|
||||
|
||||
@ -108,13 +108,9 @@ from vllm.entrypoints.utils import (
|
||||
from vllm.logger import init_logger
|
||||
from vllm.reasoning import ReasoningParserManager
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils import (
|
||||
Device,
|
||||
FlexibleArgumentParser,
|
||||
decorate_logs,
|
||||
set_ulimit,
|
||||
)
|
||||
from vllm.utils import Device, FlexibleArgumentParser, set_ulimit
|
||||
from vllm.utils.network_utils import is_valid_ipv6_address
|
||||
from vllm.utils.system_utils import decorate_logs
|
||||
from vllm.v1.engine.exceptions import EngineDeadError
|
||||
from vllm.v1.metrics.prometheus import get_prometheus_registry
|
||||
from vllm.version import __version__ as VLLM_VERSION
|
||||
|
||||
@ -5,6 +5,7 @@ import contextlib
|
||||
import datetime
|
||||
import enum
|
||||
import getpass
|
||||
import importlib.util
|
||||
import inspect
|
||||
import json
|
||||
import multiprocessing
|
||||
@ -33,13 +34,11 @@ from collections.abc import (
|
||||
)
|
||||
from concurrent.futures.process import ProcessPoolExecutor
|
||||
from functools import cache, partial, wraps
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, TextIO, TypeVar
|
||||
from typing import TYPE_CHECKING, Any, TypeVar
|
||||
|
||||
import cloudpickle
|
||||
import psutil
|
||||
import regex as re
|
||||
import setproctitle
|
||||
import torch
|
||||
import yaml
|
||||
|
||||
@ -144,18 +143,6 @@ def random_uuid() -> str:
|
||||
return str(uuid.uuid4().hex)
|
||||
|
||||
|
||||
def update_environment_variables(envs: dict[str, str]):
|
||||
for k, v in envs.items():
|
||||
if k in os.environ and os.environ[k] != v:
|
||||
logger.warning(
|
||||
"Overwriting environment variable %s from '%s' to '%s'",
|
||||
k,
|
||||
os.environ[k],
|
||||
v,
|
||||
)
|
||||
os.environ[k] = v
|
||||
|
||||
|
||||
def cdiv(a: int, b: int) -> int:
|
||||
"""Ceiling division."""
|
||||
return -(a // -b)
|
||||
@ -1061,70 +1048,44 @@ def check_use_alibi(model_config: ModelConfig) -> bool:
|
||||
)
|
||||
|
||||
|
||||
def set_process_title(
|
||||
name: str, suffix: str = "", prefix: str = envs.VLLM_PROCESS_NAME_PREFIX
|
||||
) -> None:
|
||||
@cache
|
||||
def _has_module(module_name: str) -> bool:
|
||||
"""Return True if *module_name* can be found in the current environment.
|
||||
|
||||
The result is cached so that subsequent queries for the same module incur
|
||||
no additional overhead.
|
||||
"""
|
||||
Set the current process title to a specific name with an
|
||||
optional suffix.
|
||||
|
||||
Args:
|
||||
name: The title to assign to the current process.
|
||||
suffix: An optional suffix to append to the base name.
|
||||
prefix: A prefix to prepend to the front separated by `::`.
|
||||
"""
|
||||
if suffix:
|
||||
name = f"{name}_{suffix}"
|
||||
setproctitle.setproctitle(f"{prefix}::{name}")
|
||||
return importlib.util.find_spec(module_name) is not None
|
||||
|
||||
|
||||
def _add_prefix(file: TextIO, worker_name: str, pid: int) -> None:
|
||||
"""Prepend each output line with process-specific prefix"""
|
||||
def has_pplx() -> bool:
|
||||
"""Whether the optional `pplx_kernels` package is available."""
|
||||
|
||||
prefix = f"{CYAN}({worker_name} pid={pid}){RESET} "
|
||||
file_write = file.write
|
||||
|
||||
def write_with_prefix(s: str):
|
||||
if not s:
|
||||
return
|
||||
if file.start_new_line: # type: ignore[attr-defined]
|
||||
file_write(prefix)
|
||||
idx = 0
|
||||
while (next_idx := s.find("\n", idx)) != -1:
|
||||
next_idx += 1
|
||||
file_write(s[idx:next_idx])
|
||||
if next_idx == len(s):
|
||||
file.start_new_line = True # type: ignore[attr-defined]
|
||||
return
|
||||
file_write(prefix)
|
||||
idx = next_idx
|
||||
file_write(s[idx:])
|
||||
file.start_new_line = False # type: ignore[attr-defined]
|
||||
|
||||
file.start_new_line = True # type: ignore[attr-defined]
|
||||
file.write = write_with_prefix # type: ignore[method-assign]
|
||||
return _has_module("pplx_kernels")
|
||||
|
||||
|
||||
def decorate_logs(process_name: str | None = None) -> None:
|
||||
"""
|
||||
Adds a process-specific prefix to each line of output written to stdout and
|
||||
stderr.
|
||||
def has_deep_ep() -> bool:
|
||||
"""Whether the optional `deep_ep` package is available."""
|
||||
|
||||
This function is intended to be called before initializing the api_server,
|
||||
engine_core, or worker classes, so that all subsequent output from the
|
||||
process is prefixed with the process name and PID. This helps distinguish
|
||||
log output from different processes in multi-process environments.
|
||||
return _has_module("deep_ep")
|
||||
|
||||
Args:
|
||||
process_name: Optional; the name of the process to use in the prefix.
|
||||
If not provided, the current process name from the multiprocessing
|
||||
context is used.
|
||||
"""
|
||||
if process_name is None:
|
||||
process_name = get_mp_context().current_process().name
|
||||
pid = os.getpid()
|
||||
_add_prefix(sys.stdout, process_name, pid)
|
||||
_add_prefix(sys.stderr, process_name, pid)
|
||||
|
||||
def has_deep_gemm() -> bool:
|
||||
"""Whether the optional `deep_gemm` package is available."""
|
||||
|
||||
return _has_module("deep_gemm")
|
||||
|
||||
|
||||
def has_triton_kernels() -> bool:
|
||||
"""Whether the optional `triton_kernels` package is available."""
|
||||
|
||||
return _has_module("triton_kernels")
|
||||
|
||||
|
||||
def has_tilelang() -> bool:
|
||||
"""Whether the optional `tilelang` package is available."""
|
||||
|
||||
return _has_module("tilelang")
|
||||
|
||||
|
||||
def length_from_prompt_token_ids_or_embeds(
|
||||
@ -1149,36 +1110,3 @@ def length_from_prompt_token_ids_or_embeds(
|
||||
f" prompt_embeds={prompt_embeds_len}"
|
||||
)
|
||||
return prompt_token_len
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def set_env_var(key, value):
|
||||
old = os.environ.get(key)
|
||||
os.environ[key] = value
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
if old is None:
|
||||
del os.environ[key]
|
||||
else:
|
||||
os.environ[key] = old
|
||||
|
||||
|
||||
def unique_filepath(fn: Callable[[int], Path]) -> Path:
|
||||
"""
|
||||
unique_filepath returns a unique path by trying
|
||||
to include an integer in increasing order.
|
||||
|
||||
fn should be a callable that returns a path that
|
||||
includes the passed int at a fixed location.
|
||||
|
||||
Note: This function has a TOCTOU race condition.
|
||||
Caller should use atomic operations (e.g., open with 'x' mode)
|
||||
when creating the file to ensure thread safety.
|
||||
"""
|
||||
i = 0
|
||||
while True:
|
||||
p = fn(i)
|
||||
if not p.exists():
|
||||
return p
|
||||
i += 1
|
||||
|
||||
123
vllm/utils/system_utils.py
Normal file
123
vllm/utils/system_utils.py
Normal file
@ -0,0 +1,123 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import os
|
||||
import sys
|
||||
from collections.abc import Callable, Iterator
|
||||
from pathlib import Path
|
||||
from typing import TextIO
|
||||
|
||||
try:
|
||||
import setproctitle
|
||||
except ImportError:
|
||||
setproctitle = None # type: ignore[assignment]
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
CYAN = "\033[1;36m"
|
||||
RESET = "\033[0;0m"
|
||||
|
||||
|
||||
# Environment variable utilities
|
||||
|
||||
|
||||
def update_environment_variables(envs_dict: dict[str, str]):
|
||||
"""Update multiple environment variables with logging."""
|
||||
for k, v in envs_dict.items():
|
||||
if k in os.environ and os.environ[k] != v:
|
||||
logger.warning(
|
||||
"Overwriting environment variable %s from '%s' to '%s'",
|
||||
k,
|
||||
os.environ[k],
|
||||
v,
|
||||
)
|
||||
os.environ[k] = v
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def set_env_var(key: str, value: str) -> Iterator[None]:
|
||||
"""Temporarily set an environment variable."""
|
||||
old = os.environ.get(key)
|
||||
os.environ[key] = value
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
if old is None:
|
||||
os.environ.pop(key, None)
|
||||
else:
|
||||
os.environ[key] = old
|
||||
|
||||
|
||||
# File path utilities
|
||||
|
||||
|
||||
def unique_filepath(fn: Callable[[int], Path]) -> Path:
|
||||
"""Generate a unique file path by trying incrementing integers.
|
||||
|
||||
Note: This function has a TOCTOU race condition.
|
||||
Caller should use atomic operations (e.g., open with 'x' mode)
|
||||
when creating the file to ensure thread safety.
|
||||
"""
|
||||
i = 0
|
||||
while True:
|
||||
p = fn(i)
|
||||
if not p.exists():
|
||||
return p
|
||||
i += 1
|
||||
|
||||
|
||||
# Process management utilities
|
||||
|
||||
|
||||
def set_process_title(
|
||||
name: str, suffix: str = "", prefix: str = envs.VLLM_PROCESS_NAME_PREFIX
|
||||
) -> None:
|
||||
"""Set the current process title with optional suffix."""
|
||||
if setproctitle is None:
|
||||
return
|
||||
if suffix:
|
||||
name = f"{name}_{suffix}"
|
||||
setproctitle.setproctitle(f"{prefix}::{name}")
|
||||
|
||||
|
||||
def _add_prefix(file: TextIO, worker_name: str, pid: int) -> None:
|
||||
"""Add colored prefix to file output for log decoration."""
|
||||
prefix = f"{CYAN}({worker_name} pid={pid}){RESET} "
|
||||
file_write = file.write
|
||||
|
||||
def write_with_prefix(s: str):
|
||||
if not s:
|
||||
return
|
||||
if file.start_new_line: # type: ignore[attr-defined]
|
||||
file_write(prefix)
|
||||
idx = 0
|
||||
while (next_idx := s.find("\n", idx)) != -1:
|
||||
next_idx += 1
|
||||
file_write(s[idx:next_idx])
|
||||
if next_idx == len(s):
|
||||
file.start_new_line = True # type: ignore[attr-defined]
|
||||
return
|
||||
file_write(prefix)
|
||||
idx = next_idx
|
||||
file_write(s[idx:])
|
||||
file.start_new_line = False # type: ignore[attr-defined]
|
||||
|
||||
file.start_new_line = True # type: ignore[attr-defined]
|
||||
file.write = write_with_prefix # type: ignore[method-assign]
|
||||
|
||||
|
||||
def decorate_logs(process_name: str | None = None) -> None:
|
||||
"""Decorate stdout/stderr with process name and PID prefix."""
|
||||
from vllm.utils import get_mp_context
|
||||
|
||||
if process_name is None:
|
||||
process_name = get_mp_context().current_process().name
|
||||
pid = os.getpid()
|
||||
_add_prefix(sys.stdout, process_name, pid)
|
||||
_add_prefix(sys.stderr, process_name, pid)
|
||||
@ -10,8 +10,9 @@ import zmq
|
||||
|
||||
from vllm.config import ParallelConfig
|
||||
from vllm.logger import init_logger
|
||||
from vllm.utils import get_mp_context, set_process_title
|
||||
from vllm.utils import get_mp_context
|
||||
from vllm.utils.network_utils import make_zmq_socket
|
||||
from vllm.utils.system_utils import set_process_title
|
||||
from vllm.v1.engine import EngineCoreOutputs, EngineCoreRequestType
|
||||
from vllm.v1.serial_utils import MsgpackDecoder
|
||||
from vllm.v1.utils import get_engine_client_zmq_addr, shutdown
|
||||
|
||||
@ -28,14 +28,11 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
from vllm.multimodal.cache import engine_receiver_cache_from_config
|
||||
from vllm.tasks import POOLING_TASKS, SupportedTask
|
||||
from vllm.transformers_utils.config import maybe_register_config_serialize_by_value
|
||||
from vllm.utils import (
|
||||
decorate_logs,
|
||||
set_process_title,
|
||||
)
|
||||
from vllm.utils.gc_utils import maybe_attach_gc_debug_callback
|
||||
from vllm.utils.hashing import get_hash_fn_by_name
|
||||
from vllm.utils.import_utils import resolve_obj_by_qualname
|
||||
from vllm.utils.network_utils import make_zmq_socket
|
||||
from vllm.utils.system_utils import decorate_logs, set_process_title
|
||||
from vllm.v1.core.kv_cache_utils import (
|
||||
BlockHash,
|
||||
generate_scheduler_kv_cache_config,
|
||||
|
||||
@ -35,17 +35,13 @@ from vllm.distributed.parallel_state import (
|
||||
)
|
||||
from vllm.envs import enable_envs_cache
|
||||
from vllm.logger import init_logger
|
||||
from vllm.utils import (
|
||||
_maybe_force_spawn,
|
||||
decorate_logs,
|
||||
get_mp_context,
|
||||
set_process_title,
|
||||
)
|
||||
from vllm.utils import _maybe_force_spawn, get_mp_context
|
||||
from vllm.utils.network_utils import (
|
||||
get_distributed_init_method,
|
||||
get_loopback_ip,
|
||||
get_open_port,
|
||||
)
|
||||
from vllm.utils.system_utils import decorate_logs, set_process_title
|
||||
from vllm.v1.core.sched.output import SchedulerOutput
|
||||
from vllm.v1.executor.abstract import Executor, FailureCallback
|
||||
from vllm.v1.outputs import AsyncModelRunnerOutput, DraftTokenIds, ModelRunnerOutput
|
||||
|
||||
@ -16,10 +16,10 @@ from vllm.multimodal.cache import worker_receiver_cache_from_config
|
||||
from vllm.utils import (
|
||||
enable_trace_function_call_for_thread,
|
||||
run_method,
|
||||
update_environment_variables,
|
||||
warn_for_unimplemented_methods,
|
||||
)
|
||||
from vllm.utils.import_utils import resolve_obj_by_qualname
|
||||
from vllm.utils.system_utils import update_environment_variables
|
||||
from vllm.v1.kv_cache_interface import KVCacheSpec
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user