From 8f18feb191d731dd4be000377a18ffb84ed8dc18 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 22 Oct 2025 10:18:17 +0100 Subject: [PATCH] Remove last `level` references not removed in #26355 (#27260) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/compile/piecewise/test_toy_llama.py | 4 ++-- tests/compile/test_aot_compile.py | 2 +- tests/compile/test_config.py | 6 +++--- tests/compile/test_full_graph.py | 2 +- tests/compile/test_fusions_e2e.py | 6 +++--- tests/model_executor/test_enabled_custom_ops.py | 6 +++--- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/compile/piecewise/test_toy_llama.py b/tests/compile/piecewise/test_toy_llama.py index 175ca4a23043..6887673eb6a5 100644 --- a/tests/compile/piecewise/test_toy_llama.py +++ b/tests/compile/piecewise/test_toy_llama.py @@ -355,13 +355,13 @@ def test_toy_llama( ) compile_config_no_compile = CompilationConfig( - level=CompilationMode.NONE, + mode=CompilationMode.NONE, cudagraph_mode=CUDAGraphMode.NONE, backend="eager", ) compile_config_no_split = CompilationConfig( - level=CompilationMode.VLLM_COMPILE, + mode=CompilationMode.VLLM_COMPILE, use_inductor_graph_partition=use_inductor_graph_partition, cudagraph_mode=CUDAGraphMode.PIECEWISE, backend=backend, diff --git a/tests/compile/test_aot_compile.py b/tests/compile/test_aot_compile.py index b2734af575a1..c65e5a25934d 100644 --- a/tests/compile/test_aot_compile.py +++ b/tests/compile/test_aot_compile.py @@ -38,7 +38,7 @@ class CompiledMod(torch.nn.Module): def make_vllm_config() -> VllmConfig: return VllmConfig( compilation_config=CompilationConfig( - level=CompilationMode.VLLM_COMPILE, + mode=CompilationMode.VLLM_COMPILE, ) ) diff --git a/tests/compile/test_config.py b/tests/compile/test_config.py index c6fe65ab5146..4f782ef92c55 100644 --- a/tests/compile/test_config.py +++ b/tests/compile/test_config.py @@ -168,7 +168,7 @@ def test_splitting_ops_dynamic(): if is_torch_equal_or_newer("2.9.0.dev"): config = VllmConfig( compilation_config=CompilationConfig( - level=CompilationMode.VLLM_COMPILE, + mode=CompilationMode.VLLM_COMPILE, use_inductor_graph_partition=True, splitting_ops=["vllm::unified_attention"], ) @@ -180,7 +180,7 @@ def test_splitting_ops_dynamic(): # When attn_fusion pass enabled, splitting_ops now default to attention ops. config = VllmConfig( compilation_config=CompilationConfig( - level=CompilationMode.VLLM_COMPILE, + mode=CompilationMode.VLLM_COMPILE, pass_config={"enable_attn_fusion": True, "enable_noop": True}, custom_ops=["+quant_fp8"], cudagraph_mode=CUDAGraphMode.PIECEWISE, @@ -195,7 +195,7 @@ def test_splitting_ops_dynamic(): if is_torch_equal_or_newer("2.9.0.dev"): config = VllmConfig( compilation_config=CompilationConfig( - level=CompilationMode.VLLM_COMPILE, + mode=CompilationMode.VLLM_COMPILE, use_inductor_graph_partition=True, pass_config={"enable_attn_fusion": True, "enable_noop": True}, custom_ops=["+quant_fp8"], diff --git a/tests/compile/test_full_graph.py b/tests/compile/test_full_graph.py index 7a4e859b3e6c..0ad8c17d8668 100644 --- a/tests/compile/test_full_graph.py +++ b/tests/compile/test_full_graph.py @@ -198,7 +198,7 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg compilation_config = ( compile_config if isinstance(compile_config, CompilationConfig) - else CompilationConfig(level=compile_config) + else CompilationConfig(mode=compile_config) ) prompts = [ diff --git a/tests/compile/test_fusions_e2e.py b/tests/compile/test_fusions_e2e.py index efb5774b7870..50271e2a4d70 100644 --- a/tests/compile/test_fusions_e2e.py +++ b/tests/compile/test_fusions_e2e.py @@ -151,7 +151,7 @@ def test_attn_quant( cudagraph_mode=mode, splitting_ops=splitting_ops, # Common - level=CompilationMode.VLLM_COMPILE, + mode=CompilationMode.VLLM_COMPILE, pass_config=PassConfig(enable_attn_fusion=True, enable_noop=True), # Inductor caches custom passes by default as well via uuid inductor_compile_config={"force_disable_caches": True}, @@ -236,7 +236,7 @@ def test_tp2_attn_quant_allreduce_rmsnorm( custom_ops=custom_ops_list, splitting_ops=splitting_ops, # Common - level=CompilationMode.VLLM_COMPILE, + mode=CompilationMode.VLLM_COMPILE, pass_config=PassConfig( enable_attn_fusion=True, enable_noop=True, @@ -273,7 +273,7 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg compilation_config = ( compile_config if isinstance(compile_config, CompilationConfig) - else CompilationConfig(level=compile_config) + else CompilationConfig(mode=compile_config) ) prompts = [ diff --git a/tests/model_executor/test_enabled_custom_ops.py b/tests/model_executor/test_enabled_custom_ops.py index 254e9b3ab8af..41419553aa83 100644 --- a/tests/model_executor/test_enabled_custom_ops.py +++ b/tests/model_executor/test_enabled_custom_ops.py @@ -36,7 +36,7 @@ class Relu3(ReLUSquaredActivation): @pytest.mark.parametrize( - "env, torch_level, backend, ops_enabled, default_on", + "env, compilation_mode, backend, ops_enabled, default_on", [ # Default values based on compile level # - All by default (no Inductor compilation) @@ -77,7 +77,7 @@ class Relu3(ReLUSquaredActivation): ) def test_enabled_ops( env: str | None, - torch_level: int, + compilation_mode: int, backend: str, ops_enabled: list[int], default_on: bool, @@ -85,7 +85,7 @@ def test_enabled_ops( custom_ops = env.split(",") if env else [] vllm_config = VllmConfig( compilation_config=CompilationConfig( - backend=backend, level=torch_level, custom_ops=custom_ops + backend=backend, mode=compilation_mode, custom_ops=custom_ops ) ) with set_current_vllm_config(vllm_config):