mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-23 23:14:30 +08:00
[Core][Easy] Use envs.__getattr__ for all Unify to environment variable access (#26810)
Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
This commit is contained in:
parent
2dcd12d357
commit
07ca70af8d
@ -10,12 +10,12 @@ from typing import TYPE_CHECKING, Generic, TypeAlias, TypeVar, cast
|
||||
import torch
|
||||
from typing_extensions import override
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.distributed.device_communicators.shm_object_storage import (
|
||||
MsgpackSerde,
|
||||
SingleWriterShmObjectStorage,
|
||||
SingleWriterShmRingBuffer,
|
||||
)
|
||||
from vllm.envs import VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME
|
||||
from vllm.logger import init_logger
|
||||
from vllm.utils import GiB_bytes, MiB_bytes
|
||||
from vllm.utils.cache import CacheInfo, LRUCache
|
||||
@ -436,7 +436,7 @@ class ShmObjectStoreSenderCache(BaseMultiModalProcessorCache):
|
||||
|
||||
ring_buffer = SingleWriterShmRingBuffer(
|
||||
data_buffer_size=int(mm_config.mm_processor_cache_gb * GiB_bytes),
|
||||
name=VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME,
|
||||
name=envs.VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME,
|
||||
create=True, # sender is the writer
|
||||
)
|
||||
self._shm_cache = SingleWriterShmObjectStorage(
|
||||
@ -678,7 +678,7 @@ class ShmObjectStoreReceiverCache(BaseMultiModalReceiverCache):
|
||||
|
||||
ring_buffer = SingleWriterShmRingBuffer(
|
||||
data_buffer_size=int(mm_config.mm_processor_cache_gb * GiB_bytes),
|
||||
name=VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME,
|
||||
name=envs.VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME,
|
||||
create=False, # Server is a reader
|
||||
)
|
||||
self._shm_cache = SingleWriterShmObjectStorage(
|
||||
|
||||
@ -8,7 +8,7 @@ from os import PathLike
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from vllm.envs import VLLM_MODEL_REDIRECT_PATH
|
||||
import vllm.envs as envs
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
@ -86,7 +86,7 @@ def maybe_model_redirect(model: str) -> str:
|
||||
:return: maybe redirect to a local folder
|
||||
"""
|
||||
|
||||
model_redirect_path = VLLM_MODEL_REDIRECT_PATH
|
||||
model_redirect_path = envs.VLLM_MODEL_REDIRECT_PATH
|
||||
|
||||
if not model_redirect_path:
|
||||
return model
|
||||
|
||||
@ -7,7 +7,7 @@ from collections import Counter
|
||||
from contextlib import suppress
|
||||
from typing import Any
|
||||
|
||||
from vllm.envs import VLLM_GC_DEBUG
|
||||
import vllm.envs as envs
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
@ -36,7 +36,7 @@ class GCDebugConfig:
|
||||
self.top_objects = json_conf.get("top_objects", -1)
|
||||
except Exception:
|
||||
self.enabled = False
|
||||
logger.error("Failed to parse VLLM_GC_DEBUG(%s)", VLLM_GC_DEBUG)
|
||||
logger.error("Failed to parse VLLM_GC_DEBUG(%s)", envs.VLLM_GC_DEBUG)
|
||||
logger.info("GC Debug Config. %s", str(self))
|
||||
|
||||
def __repr__(self) -> str:
|
||||
@ -93,7 +93,7 @@ def maybe_attach_gc_debug_callback() -> None:
|
||||
"""
|
||||
Attached a callback for GC debug when VLLM_GC_DEBUG is enabled.
|
||||
"""
|
||||
config = GCDebugConfig(VLLM_GC_DEBUG)
|
||||
config = GCDebugConfig(envs.VLLM_GC_DEBUG)
|
||||
if config.enabled:
|
||||
debugger: GCDebugger = GCDebugger(config)
|
||||
|
||||
|
||||
@ -16,7 +16,6 @@ from vllm.config import VllmConfig
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||
from vllm.engine.protocol import EngineClient
|
||||
from vllm.entrypoints.utils import _validate_truncation_size
|
||||
from vllm.envs import VLLM_V1_OUTPUT_PROC_CHUNK_SIZE
|
||||
from vllm.inputs import PromptType
|
||||
from vllm.logger import init_logger
|
||||
from vllm.lora.request import LoRARequest
|
||||
@ -483,12 +482,12 @@ class AsyncLLM(EngineClient):
|
||||
# Split outputs into chunks of at most
|
||||
# VLLM_V1_OUTPUT_PROC_CHUNK_SIZE, so that we don't block the
|
||||
# event loop for too long.
|
||||
if num_outputs <= VLLM_V1_OUTPUT_PROC_CHUNK_SIZE:
|
||||
if num_outputs <= envs.VLLM_V1_OUTPUT_PROC_CHUNK_SIZE:
|
||||
slices = (outputs.outputs,)
|
||||
else:
|
||||
slices = np.array_split(
|
||||
outputs.outputs,
|
||||
cdiv(num_outputs, VLLM_V1_OUTPUT_PROC_CHUNK_SIZE),
|
||||
cdiv(num_outputs, envs.VLLM_V1_OUTPUT_PROC_CHUNK_SIZE),
|
||||
)
|
||||
|
||||
for i, outputs_slice in enumerate(slices):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user