mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 04:24:58 +08:00
[Perf] Cache vllm.env.__getattr__ result to avoid recomputation (#26146)
Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
This commit is contained in:
parent
b92ab3deda
commit
380f17527c
@ -6,7 +6,54 @@ from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.envs import env_list_with_choices, env_with_choices
|
||||
import vllm.envs as envs
|
||||
from vllm.envs import (
|
||||
enable_envs_cache,
|
||||
env_list_with_choices,
|
||||
env_with_choices,
|
||||
environment_variables,
|
||||
)
|
||||
|
||||
|
||||
def test_getattr_without_cache(monkeypatch: pytest.MonkeyPatch):
|
||||
assert envs.VLLM_HOST_IP == ""
|
||||
assert envs.VLLM_PORT is None
|
||||
monkeypatch.setenv("VLLM_HOST_IP", "1.1.1.1")
|
||||
monkeypatch.setenv("VLLM_PORT", "1234")
|
||||
assert envs.VLLM_HOST_IP == "1.1.1.1"
|
||||
assert envs.VLLM_PORT == 1234
|
||||
# __getattr__ is not decorated with functools.cache
|
||||
assert not hasattr(envs.__getattr__, "cache_info")
|
||||
|
||||
|
||||
def test_getattr_with_cache(monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setenv("VLLM_HOST_IP", "1.1.1.1")
|
||||
monkeypatch.setenv("VLLM_PORT", "1234")
|
||||
# __getattr__ is not decorated with functools.cache
|
||||
assert not hasattr(envs.__getattr__, "cache_info")
|
||||
|
||||
# Enable envs cache and ignore ongoing environment changes
|
||||
enable_envs_cache()
|
||||
|
||||
# __getattr__ is not decorated with functools.cache
|
||||
assert hasattr(envs.__getattr__, "cache_info")
|
||||
start_hits = envs.__getattr__.cache_info().hits
|
||||
|
||||
# 2 more hits due to VLLM_HOST_IP and VLLM_PORT accesses
|
||||
assert envs.VLLM_HOST_IP == "1.1.1.1"
|
||||
assert envs.VLLM_PORT == 1234
|
||||
assert envs.__getattr__.cache_info().hits == start_hits + 2
|
||||
|
||||
# All environment variables are cached
|
||||
for environment_variable in environment_variables:
|
||||
envs.__getattr__(environment_variable)
|
||||
assert envs.__getattr__.cache_info().hits == start_hits + 2 + len(
|
||||
environment_variables
|
||||
)
|
||||
|
||||
# Reset envs.__getattr__ back to none-cached version to
|
||||
# avoid affecting other tests
|
||||
envs.__getattr__ = envs.__getattr__.__wrapped__
|
||||
|
||||
|
||||
class TestEnvWithChoices:
|
||||
|
||||
27
vllm/envs.py
27
vllm/envs.py
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import functools
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
@ -1408,12 +1409,36 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
# lazy evaluation of environment variables
|
||||
"""
|
||||
Gets environment variables lazily.
|
||||
|
||||
NOTE: After enable_envs_cache() invocation (which triggered after service
|
||||
initialization), all environment variables will be cached.
|
||||
"""
|
||||
if name in environment_variables:
|
||||
return environment_variables[name]()
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
|
||||
def enable_envs_cache() -> None:
|
||||
"""
|
||||
Enables caching of environment variables. This is useful for performance
|
||||
reasons, as it avoids the need to re-evaluate environment variables on
|
||||
every call.
|
||||
|
||||
NOTE: Currently, it's invoked after service initialization to reduce
|
||||
runtime overhead. This also means that environment variables should NOT
|
||||
be updated after the service is initialized.
|
||||
"""
|
||||
# Tag __getattr__ with functools.cache
|
||||
global __getattr__
|
||||
__getattr__ = functools.cache(__getattr__)
|
||||
|
||||
# Cache all environment variables
|
||||
for key in environment_variables:
|
||||
__getattr__(key)
|
||||
|
||||
|
||||
def __dir__():
|
||||
return list(environment_variables.keys())
|
||||
|
||||
|
||||
@ -20,6 +20,7 @@ import zmq
|
||||
from vllm.config import ParallelConfig, VllmConfig
|
||||
from vllm.distributed import stateless_destroy_torch_distributed_process_group
|
||||
from vllm.distributed.parallel_state import is_global_first_rank
|
||||
from vllm.envs import enable_envs_cache
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logging_utils.dump_input import dump_engine_exception
|
||||
from vllm.lora.request import LoRARequest
|
||||
@ -601,6 +602,10 @@ class EngineCoreProc(EngineCore):
|
||||
# If enable, attach GC debugger after static variable freeze.
|
||||
maybe_attach_gc_debug_callback()
|
||||
|
||||
# Enable environment variable cache (e.g. assume no more
|
||||
# environment variable overrides after this point)
|
||||
enable_envs_cache()
|
||||
|
||||
@contextmanager
|
||||
def _perform_handshakes(
|
||||
self,
|
||||
|
||||
@ -33,6 +33,7 @@ from vllm.distributed.parallel_state import (
|
||||
get_pp_group,
|
||||
get_tp_group,
|
||||
)
|
||||
from vllm.envs import enable_envs_cache
|
||||
from vllm.logger import init_logger
|
||||
from vllm.utils import (
|
||||
_maybe_force_spawn,
|
||||
@ -455,6 +456,10 @@ class WorkerProc:
|
||||
# Load model
|
||||
self.worker.load_model()
|
||||
|
||||
# Enable environment variable cache (e.g. assume no more
|
||||
# environment variable overrides after this point)
|
||||
enable_envs_cache()
|
||||
|
||||
@staticmethod
|
||||
def make_worker_process(
|
||||
vllm_config: VllmConfig,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user