From 5f2f3fba1d9ed0aa433171b86c415a5f02055035 Mon Sep 17 00:00:00 2001 From: Zhengxu Chen Date: Wed, 17 Dec 2025 23:22:23 -0500 Subject: [PATCH] [compile] Fix CI for test_gpt2_cache_hit (#30902) Signed-off-by: zhxchen17 --- tests/compile/test_aot_compile.py | 11 ++++++++++- vllm/config/compilation.py | 10 +++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/compile/test_aot_compile.py b/tests/compile/test_aot_compile.py index 8fa305d6d72f5..2ffcd627e476a 100644 --- a/tests/compile/test_aot_compile.py +++ b/tests/compile/test_aot_compile.py @@ -9,6 +9,7 @@ from contextlib import contextmanager import pytest import torch +import vllm.model_executor.layers.activation from vllm.compilation.decorators import support_torch_compile from vllm.config import ( CompilationConfig, @@ -16,9 +17,12 @@ from vllm.config import ( VllmConfig, set_current_vllm_config, ) +from vllm.envs import disable_envs_cache from vllm.forward_context import set_forward_context from vllm.utils.torch_utils import is_torch_equal_or_newer +from ..utils import create_new_process_for_each_test + def reference_fn(x: torch.Tensor): assert x.shape[0] <= 42 @@ -66,6 +70,7 @@ def test_no_dynamo_cache_entry(monkeypatch: pytest.MonkeyPatch): torch.compiler.set_stance("fail_on_recompile"), ): CompiledMod(vllm_config=vllm_config)(*args) + disable_envs_cache() m.setenv("VLLM_USE_AOT_COMPILE", "1") torch._dynamo.reset() @@ -101,6 +106,7 @@ def test_save_and_load(monkeypatch: pytest.MonkeyPatch): vllm_config = make_vllm_config() with use_vllm_config(vllm_config): expected = CompiledMod(vllm_config=vllm_config)(*args) + disable_envs_cache() m.setenv("VLLM_FORCE_AOT_LOAD", "1") vllm_config = make_vllm_config() @@ -130,6 +136,7 @@ def test_shape_env(monkeypatch: pytest.MonkeyPatch): artifacts = compiled_mod.aot_compiled_fn._artifacts guards_string = artifacts.compiled_fn.shape_env.format_guards() assert guards_string == " - s77 <= 42\n - Eq(Mod(s77, 2), 0)" + disable_envs_cache() m.setenv("VLLM_FORCE_AOT_LOAD", "1") vllm_config = make_vllm_config() @@ -144,7 +151,7 @@ def test_shape_env(monkeypatch: pytest.MonkeyPatch): @pytest.mark.skipif( not is_torch_equal_or_newer("2.10.0.dev"), reason="requires torch 2.10" ) -@use_vllm_config(make_vllm_config()) +@create_new_process_for_each_test("spawn") def test_gpt2_cache_hit(monkeypatch: pytest.MonkeyPatch): """ Test that compiling gpt2 twice results in a cache hit and @@ -186,6 +193,8 @@ def test_gpt2_cache_hit(monkeypatch: pytest.MonkeyPatch): # Clean up first model del llm_model + disable_envs_cache() + vllm.model_executor.layers.activation._ACTIVATION_REGISTRY._dict.clear() # Second compilation - should hit cache m.setenv("VLLM_FORCE_AOT_LOAD", "1") diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 4a98494b3c7b3..3e3ee1e572ec8 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -437,14 +437,14 @@ class CompilationConfig: compile_ranges_split_points: list[int] | None = None """Split points that represent compile ranges for inductor. - The compile ranges are - [1, split_points[0]], - [split_points[0] + 1, split_points[1]], ..., + The compile ranges are + [1, split_points[0]], + [split_points[0] + 1, split_points[1]], ..., [split_points[-1] + 1, max_num_batched_tokens]. Compile sizes are also used single element ranges, the range is represented as [compile_sizes[i], compile_sizes[i]]. - - If a range overlaps with the compile size, graph for compile size + + If a range overlaps with the compile size, graph for compile size will be prioritized, i.e. if we have a range [1, 8] and a compile size 4, graph for compile size 4 will be compiled and used instead of the graph for range [1, 8].