From 5f2f3fba1d9ed0aa433171b86c415a5f02055035 Mon Sep 17 00:00:00 2001
From: Zhengxu Chen <zhxchen17@fb.com>
Date: Wed, 17 Dec 2025 23:22:23 -0500
Subject: [PATCH] [compile] Fix CI for test_gpt2_cache_hit (#30902)

Signed-off-by: zhxchen17 <zhxchen17@fb.com>
---
 tests/compile/test_aot_compile.py | 11 ++++++++++-
 vllm/config/compilation.py        | 10 +++++-----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/tests/compile/test_aot_compile.py b/tests/compile/test_aot_compile.py
index 8fa305d6d72f5..2ffcd627e476a 100644
--- a/tests/compile/test_aot_compile.py
+++ b/tests/compile/test_aot_compile.py
@@ -9,6 +9,7 @@ from contextlib import contextmanager
 import pytest
 import torch
 
+import vllm.model_executor.layers.activation
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import (
     CompilationConfig,
@@ -16,9 +17,12 @@ from vllm.config import (
     VllmConfig,
     set_current_vllm_config,
 )
+from vllm.envs import disable_envs_cache
 from vllm.forward_context import set_forward_context
 from vllm.utils.torch_utils import is_torch_equal_or_newer
 
+from ..utils import create_new_process_for_each_test
+
 
 def reference_fn(x: torch.Tensor):
     assert x.shape[0] <= 42
@@ -66,6 +70,7 @@ def test_no_dynamo_cache_entry(monkeypatch: pytest.MonkeyPatch):
                 torch.compiler.set_stance("fail_on_recompile"),
             ):
                 CompiledMod(vllm_config=vllm_config)(*args)
+            disable_envs_cache()
 
             m.setenv("VLLM_USE_AOT_COMPILE", "1")
             torch._dynamo.reset()
@@ -101,6 +106,7 @@ def test_save_and_load(monkeypatch: pytest.MonkeyPatch):
             vllm_config = make_vllm_config()
             with use_vllm_config(vllm_config):
                 expected = CompiledMod(vllm_config=vllm_config)(*args)
+            disable_envs_cache()
 
             m.setenv("VLLM_FORCE_AOT_LOAD", "1")
             vllm_config = make_vllm_config()
@@ -130,6 +136,7 @@ def test_shape_env(monkeypatch: pytest.MonkeyPatch):
                 artifacts = compiled_mod.aot_compiled_fn._artifacts
                 guards_string = artifacts.compiled_fn.shape_env.format_guards()
                 assert guards_string == " - s77 <= 42\n - Eq(Mod(s77, 2), 0)"
+            disable_envs_cache()
 
             m.setenv("VLLM_FORCE_AOT_LOAD", "1")
             vllm_config = make_vllm_config()
@@ -144,7 +151,7 @@ def test_shape_env(monkeypatch: pytest.MonkeyPatch):
 @pytest.mark.skipif(
     not is_torch_equal_or_newer("2.10.0.dev"), reason="requires torch 2.10"
 )
-@use_vllm_config(make_vllm_config())
+@create_new_process_for_each_test("spawn")
 def test_gpt2_cache_hit(monkeypatch: pytest.MonkeyPatch):
     """
     Test that compiling gpt2 twice results in a cache hit and
@@ -186,6 +193,8 @@ def test_gpt2_cache_hit(monkeypatch: pytest.MonkeyPatch):
 
             # Clean up first model
             del llm_model
+            disable_envs_cache()
+            vllm.model_executor.layers.activation._ACTIVATION_REGISTRY._dict.clear()
 
             # Second compilation - should hit cache
             m.setenv("VLLM_FORCE_AOT_LOAD", "1")
diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py
index 4a98494b3c7b3..3e3ee1e572ec8 100644
--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -437,14 +437,14 @@ class CompilationConfig:
 
     compile_ranges_split_points: list[int] | None = None
     """Split points that represent compile ranges for inductor.
-    The compile ranges are 
-    [1, split_points[0]], 
-    [split_points[0] + 1, split_points[1]], ..., 
+    The compile ranges are
+    [1, split_points[0]],
+    [split_points[0] + 1, split_points[1]], ...,
     [split_points[-1] + 1, max_num_batched_tokens].
     Compile sizes are also used single element ranges,
     the range is represented as [compile_sizes[i], compile_sizes[i]].
-    
-    If a range overlaps with the compile size, graph for compile size 
+
+    If a range overlaps with the compile size, graph for compile size
     will be prioritized, i.e. if we have a range [1, 8] and a compile size 4,
     graph for compile size 4 will be compiled and used instead of the graph
     for range [1, 8].