From 07ca70af8d8a0d0e20727d8de6972a7ad87cf996 Mon Sep 17 00:00:00 2001 From: Jialin Ouyang Date: Tue, 14 Oct 2025 18:41:18 -0700 Subject: [PATCH] [Core][Easy] Use envs.__getattr__ for all Unify to environment variable access (#26810) Signed-off-by: Jialin Ouyang --- vllm/multimodal/cache.py | 6 +++--- vllm/transformers_utils/utils.py | 4 ++-- vllm/utils/gc_utils.py | 6 +++--- vllm/v1/engine/async_llm.py | 5 ++--- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/vllm/multimodal/cache.py b/vllm/multimodal/cache.py index f6ef675aa7c29..a29da2a56afc1 100644 --- a/vllm/multimodal/cache.py +++ b/vllm/multimodal/cache.py @@ -10,12 +10,12 @@ from typing import TYPE_CHECKING, Generic, TypeAlias, TypeVar, cast import torch from typing_extensions import override +import vllm.envs as envs from vllm.distributed.device_communicators.shm_object_storage import ( MsgpackSerde, SingleWriterShmObjectStorage, SingleWriterShmRingBuffer, ) -from vllm.envs import VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME from vllm.logger import init_logger from vllm.utils import GiB_bytes, MiB_bytes from vllm.utils.cache import CacheInfo, LRUCache @@ -436,7 +436,7 @@ class ShmObjectStoreSenderCache(BaseMultiModalProcessorCache): ring_buffer = SingleWriterShmRingBuffer( data_buffer_size=int(mm_config.mm_processor_cache_gb * GiB_bytes), - name=VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME, + name=envs.VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME, create=True, # sender is the writer ) self._shm_cache = SingleWriterShmObjectStorage( @@ -678,7 +678,7 @@ class ShmObjectStoreReceiverCache(BaseMultiModalReceiverCache): ring_buffer = SingleWriterShmRingBuffer( data_buffer_size=int(mm_config.mm_processor_cache_gb * GiB_bytes), - name=VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME, + name=envs.VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME, create=False, # Server is a reader ) self._shm_cache = SingleWriterShmObjectStorage( diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py index b87414d79df0f..58c754dbd3974 100644 --- a/vllm/transformers_utils/utils.py +++ b/vllm/transformers_utils/utils.py @@ -8,7 +8,7 @@ from os import PathLike from pathlib import Path from typing import Any -from vllm.envs import VLLM_MODEL_REDIRECT_PATH +import vllm.envs as envs from vllm.logger import init_logger logger = init_logger(__name__) @@ -86,7 +86,7 @@ def maybe_model_redirect(model: str) -> str: :return: maybe redirect to a local folder """ - model_redirect_path = VLLM_MODEL_REDIRECT_PATH + model_redirect_path = envs.VLLM_MODEL_REDIRECT_PATH if not model_redirect_path: return model diff --git a/vllm/utils/gc_utils.py b/vllm/utils/gc_utils.py index 99c19c9db28e9..6894ccff11d93 100644 --- a/vllm/utils/gc_utils.py +++ b/vllm/utils/gc_utils.py @@ -7,7 +7,7 @@ from collections import Counter from contextlib import suppress from typing import Any -from vllm.envs import VLLM_GC_DEBUG +import vllm.envs as envs from vllm.logger import init_logger logger = init_logger(__name__) @@ -36,7 +36,7 @@ class GCDebugConfig: self.top_objects = json_conf.get("top_objects", -1) except Exception: self.enabled = False - logger.error("Failed to parse VLLM_GC_DEBUG(%s)", VLLM_GC_DEBUG) + logger.error("Failed to parse VLLM_GC_DEBUG(%s)", envs.VLLM_GC_DEBUG) logger.info("GC Debug Config. %s", str(self)) def __repr__(self) -> str: @@ -93,7 +93,7 @@ def maybe_attach_gc_debug_callback() -> None: """ Attached a callback for GC debug when VLLM_GC_DEBUG is enabled. """ - config = GCDebugConfig(VLLM_GC_DEBUG) + config = GCDebugConfig(envs.VLLM_GC_DEBUG) if config.enabled: debugger: GCDebugger = GCDebugger(config) diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index 39cd1d97c280a..0ec153e233161 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -16,7 +16,6 @@ from vllm.config import VllmConfig from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.protocol import EngineClient from vllm.entrypoints.utils import _validate_truncation_size -from vllm.envs import VLLM_V1_OUTPUT_PROC_CHUNK_SIZE from vllm.inputs import PromptType from vllm.logger import init_logger from vllm.lora.request import LoRARequest @@ -483,12 +482,12 @@ class AsyncLLM(EngineClient): # Split outputs into chunks of at most # VLLM_V1_OUTPUT_PROC_CHUNK_SIZE, so that we don't block the # event loop for too long. - if num_outputs <= VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: + if num_outputs <= envs.VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: slices = (outputs.outputs,) else: slices = np.array_split( outputs.outputs, - cdiv(num_outputs, VLLM_V1_OUTPUT_PROC_CHUNK_SIZE), + cdiv(num_outputs, envs.VLLM_V1_OUTPUT_PROC_CHUNK_SIZE), ) for i, outputs_slice in enumerate(slices):