Remove last level references not removed in #26355 (#27260)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2026-03-16 13:57:12 +08:00 · 2025-10-22 10:18:17 +01:00 · 2025-10-22 10:18:17 +01:00 · 8f18feb191
commit 8f18feb191
parent ed540d6d4c
6 changed files with 13 additions and 13 deletions
--- a/tests/compile/piecewise/test_toy_llama.py
+++ b/tests/compile/piecewise/test_toy_llama.py
@ -355,13 +355,13 @@ def test_toy_llama(
    )

    compile_config_no_compile = CompilationConfig(
-        level=CompilationMode.NONE,
+        mode=CompilationMode.NONE,
        cudagraph_mode=CUDAGraphMode.NONE,
        backend="eager",
    )

    compile_config_no_split = CompilationConfig(
-        level=CompilationMode.VLLM_COMPILE,
+        mode=CompilationMode.VLLM_COMPILE,
        use_inductor_graph_partition=use_inductor_graph_partition,
        cudagraph_mode=CUDAGraphMode.PIECEWISE,
        backend=backend,
--- a/tests/compile/test_aot_compile.py
+++ b/tests/compile/test_aot_compile.py
@ -38,7 +38,7 @@ class CompiledMod(torch.nn.Module):
 def make_vllm_config() -> VllmConfig:
    return VllmConfig(
        compilation_config=CompilationConfig(
-            level=CompilationMode.VLLM_COMPILE,
+            mode=CompilationMode.VLLM_COMPILE,
        )
    )

--- a/tests/compile/test_config.py
+++ b/tests/compile/test_config.py
@ -168,7 +168,7 @@ def test_splitting_ops_dynamic():
    if is_torch_equal_or_newer("2.9.0.dev"):
        config = VllmConfig(
            compilation_config=CompilationConfig(
-                level=CompilationMode.VLLM_COMPILE,
+                mode=CompilationMode.VLLM_COMPILE,
                use_inductor_graph_partition=True,
                splitting_ops=["vllm::unified_attention"],
            )
@ -180,7 +180,7 @@ def test_splitting_ops_dynamic():
    # When attn_fusion pass enabled, splitting_ops now default to attention ops.
    config = VllmConfig(
        compilation_config=CompilationConfig(
-            level=CompilationMode.VLLM_COMPILE,
+            mode=CompilationMode.VLLM_COMPILE,
            pass_config={"enable_attn_fusion": True, "enable_noop": True},
            custom_ops=["+quant_fp8"],
            cudagraph_mode=CUDAGraphMode.PIECEWISE,
@ -195,7 +195,7 @@ def test_splitting_ops_dynamic():
    if is_torch_equal_or_newer("2.9.0.dev"):
        config = VllmConfig(
            compilation_config=CompilationConfig(
-                level=CompilationMode.VLLM_COMPILE,
+                mode=CompilationMode.VLLM_COMPILE,
                use_inductor_graph_partition=True,
                pass_config={"enable_attn_fusion": True, "enable_noop": True},
                custom_ops=["+quant_fp8"],
--- a/tests/compile/test_full_graph.py
+++ b/tests/compile/test_full_graph.py
@ -198,7 +198,7 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg
    compilation_config = (
        compile_config
        if isinstance(compile_config, CompilationConfig)
-        else CompilationConfig(level=compile_config)
+        else CompilationConfig(mode=compile_config)
    )

    prompts = [
--- a/tests/compile/test_fusions_e2e.py
+++ b/tests/compile/test_fusions_e2e.py
@ -151,7 +151,7 @@ def test_attn_quant(
        cudagraph_mode=mode,
        splitting_ops=splitting_ops,
        # Common
-        level=CompilationMode.VLLM_COMPILE,
+        mode=CompilationMode.VLLM_COMPILE,
        pass_config=PassConfig(enable_attn_fusion=True, enable_noop=True),
        # Inductor caches custom passes by default as well via uuid
        inductor_compile_config={"force_disable_caches": True},
@ -236,7 +236,7 @@ def test_tp2_attn_quant_allreduce_rmsnorm(
        custom_ops=custom_ops_list,
        splitting_ops=splitting_ops,
        # Common
-        level=CompilationMode.VLLM_COMPILE,
+        mode=CompilationMode.VLLM_COMPILE,
        pass_config=PassConfig(
            enable_attn_fusion=True,
            enable_noop=True,
@ -273,7 +273,7 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg
    compilation_config = (
        compile_config
        if isinstance(compile_config, CompilationConfig)
-        else CompilationConfig(level=compile_config)
+        else CompilationConfig(mode=compile_config)
    )

    prompts = [
--- a/tests/model_executor/test_enabled_custom_ops.py
+++ b/tests/model_executor/test_enabled_custom_ops.py
@ -36,7 +36,7 @@ class Relu3(ReLUSquaredActivation):


@pytest.mark.parametrize(
-    "env, torch_level, backend, ops_enabled, default_on",
+    "env, compilation_mode, backend, ops_enabled, default_on",
    [
        # Default values based on compile level
        # - All by default (no Inductor compilation)
@ -77,7 +77,7 @@ class Relu3(ReLUSquaredActivation):
 )
 def test_enabled_ops(
    env: str | None,
-    torch_level: int,
+    compilation_mode: int,
    backend: str,
    ops_enabled: list[int],
    default_on: bool,
@ -85,7 +85,7 @@ def test_enabled_ops(
    custom_ops = env.split(",") if env else []
    vllm_config = VllmConfig(
        compilation_config=CompilationConfig(
-            backend=backend, level=torch_level, custom_ops=custom_ops
+            backend=backend, mode=compilation_mode, custom_ops=custom_ops
        )
    )
    with set_current_vllm_config(vllm_config):