From 9f042ba26b59e1bfc9bef031165033fa931f3457 Mon Sep 17 00:00:00 2001 From: Jialin Ouyang Date: Wed, 10 Dec 2025 11:13:01 -0800 Subject: [PATCH] [Perf] Enable environment cache in EngineCore to enable the feature for UniProcExecutor as well (#29289) Signed-off-by: Jialin Ouyang --- tests/test_envs.py | 38 ++++++++++++++++++++++++++++++ vllm/distributed/parallel_state.py | 2 ++ vllm/envs.py | 20 ++++++++++++++++ vllm/v1/engine/core.py | 7 +++--- 4 files changed, 63 insertions(+), 4 deletions(-) diff --git a/tests/test_envs.py b/tests/test_envs.py index 11bbec38202bf..b6b7cf38d4abc 100644 --- a/tests/test_envs.py +++ b/tests/test_envs.py @@ -8,6 +8,7 @@ import pytest import vllm.envs as envs from vllm.envs import ( + disable_envs_cache, enable_envs_cache, env_list_with_choices, env_set_with_choices, @@ -57,6 +58,43 @@ def test_getattr_with_cache(monkeypatch: pytest.MonkeyPatch): envs.__getattr__ = envs.__getattr__.__wrapped__ +def test_getattr_with_reset(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("VLLM_HOST_IP", "1.1.1.1") + # __getattr__ is not decorated with functools.cache + assert not hasattr(envs.__getattr__, "cache_info") + + # Enable envs cache and ignore ongoing environment changes + enable_envs_cache() + assert envs.VLLM_HOST_IP == "1.1.1.1" + # With cache enabled, the environment variable value is cached and unchanged + monkeypatch.setenv("VLLM_HOST_IP", "2.2.2.2") + assert envs.VLLM_HOST_IP == "1.1.1.1" + + disable_envs_cache() + assert envs.VLLM_HOST_IP == "2.2.2.2" + # After cache disabled, the environment variable value would be synced + # with os.environ + monkeypatch.setenv("VLLM_HOST_IP", "3.3.3.3") + assert envs.VLLM_HOST_IP == "3.3.3.3" + + +def test_is_envs_cache_enabled() -> None: + assert not envs._is_envs_cache_enabled() + enable_envs_cache() + assert envs._is_envs_cache_enabled() + + # Only wrap one-layer of cache, so we only need to + # call disable once to reset. + enable_envs_cache() + enable_envs_cache() + enable_envs_cache() + disable_envs_cache() + assert not envs._is_envs_cache_enabled() + + disable_envs_cache() + assert not envs._is_envs_cache_enabled() + + class TestEnvWithChoices: """Test cases for env_with_choices function.""" diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py index f910f10407d44..338cb1f1814b5 100644 --- a/vllm/distributed/parallel_state.py +++ b/vllm/distributed/parallel_state.py @@ -1586,6 +1586,8 @@ def destroy_distributed_environment(): def cleanup_dist_env_and_memory(shutdown_ray: bool = False): + # Reset environment variable cache + envs.disable_envs_cache() # Ensure all objects are not frozen before cleanup gc.unfreeze() diff --git a/vllm/envs.py b/vllm/envs.py index 8246109eb73af..230f2cf3450a9 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -1580,6 +1580,12 @@ def __getattr__(name: str): raise AttributeError(f"module {__name__!r} has no attribute {name!r}") +def _is_envs_cache_enabled() -> bool: + """Checked if __getattr__ is wrapped with functools.cache""" + global __getattr__ + return hasattr(__getattr__, "cache_clear") + + def enable_envs_cache() -> None: """ Enables caching of environment variables. This is useful for performance @@ -1590,6 +1596,9 @@ def enable_envs_cache() -> None: runtime overhead. This also means that environment variables should NOT be updated after the service is initialized. """ + if _is_envs_cache_enabled(): + # Avoid wrapping functools.cache multiple times + return # Tag __getattr__ with functools.cache global __getattr__ __getattr__ = functools.cache(__getattr__) @@ -1599,6 +1608,17 @@ def enable_envs_cache() -> None: __getattr__(key) +def disable_envs_cache() -> None: + """ + Resets the environment variables cache. It could be used to isolate environments + between unit tests. + """ + global __getattr__ + # If __getattr__ is wrapped by functions.cache, unwrap the caching layer. + if _is_envs_cache_enabled(): + __getattr__ = __getattr__.__wrapped__ + + def __dir__(): return list(environment_variables.keys()) diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 3d3a1e138ddef..0045b8c1dd3e7 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -211,6 +211,9 @@ class EngineCore: freeze_gc_heap() # If enable, attach GC debugger after static variable freeze. maybe_attach_gc_debug_callback() + # Enable environment variable cache (e.g. assume no more + # environment variable overrides after this point) + enable_envs_cache() def _initialize_kv_caches( self, vllm_config: VllmConfig @@ -672,10 +675,6 @@ class EngineCoreProc(EngineCore): assert addresses.coordinator_input is not None logger.info("Waiting for READY message from DP Coordinator...") - # Enable environment variable cache (e.g. assume no more - # environment variable overrides after this point) - enable_envs_cache() - @contextmanager def _perform_handshakes( self,