Remove last level references not removed in #26355 (#27260)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor 2025-10-22 10:18:17 +01:00 committed by GitHub
parent ed540d6d4c
commit 8f18feb191
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 13 additions and 13 deletions

View File

@ -355,13 +355,13 @@ def test_toy_llama(
)
compile_config_no_compile = CompilationConfig(
level=CompilationMode.NONE,
mode=CompilationMode.NONE,
cudagraph_mode=CUDAGraphMode.NONE,
backend="eager",
)
compile_config_no_split = CompilationConfig(
level=CompilationMode.VLLM_COMPILE,
mode=CompilationMode.VLLM_COMPILE,
use_inductor_graph_partition=use_inductor_graph_partition,
cudagraph_mode=CUDAGraphMode.PIECEWISE,
backend=backend,

View File

@ -38,7 +38,7 @@ class CompiledMod(torch.nn.Module):
def make_vllm_config() -> VllmConfig:
return VllmConfig(
compilation_config=CompilationConfig(
level=CompilationMode.VLLM_COMPILE,
mode=CompilationMode.VLLM_COMPILE,
)
)

View File

@ -168,7 +168,7 @@ def test_splitting_ops_dynamic():
if is_torch_equal_or_newer("2.9.0.dev"):
config = VllmConfig(
compilation_config=CompilationConfig(
level=CompilationMode.VLLM_COMPILE,
mode=CompilationMode.VLLM_COMPILE,
use_inductor_graph_partition=True,
splitting_ops=["vllm::unified_attention"],
)
@ -180,7 +180,7 @@ def test_splitting_ops_dynamic():
# When attn_fusion pass enabled, splitting_ops now default to attention ops.
config = VllmConfig(
compilation_config=CompilationConfig(
level=CompilationMode.VLLM_COMPILE,
mode=CompilationMode.VLLM_COMPILE,
pass_config={"enable_attn_fusion": True, "enable_noop": True},
custom_ops=["+quant_fp8"],
cudagraph_mode=CUDAGraphMode.PIECEWISE,
@ -195,7 +195,7 @@ def test_splitting_ops_dynamic():
if is_torch_equal_or_newer("2.9.0.dev"):
config = VllmConfig(
compilation_config=CompilationConfig(
level=CompilationMode.VLLM_COMPILE,
mode=CompilationMode.VLLM_COMPILE,
use_inductor_graph_partition=True,
pass_config={"enable_attn_fusion": True, "enable_noop": True},
custom_ops=["+quant_fp8"],

View File

@ -198,7 +198,7 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg
compilation_config = (
compile_config
if isinstance(compile_config, CompilationConfig)
else CompilationConfig(level=compile_config)
else CompilationConfig(mode=compile_config)
)
prompts = [

View File

@ -151,7 +151,7 @@ def test_attn_quant(
cudagraph_mode=mode,
splitting_ops=splitting_ops,
# Common
level=CompilationMode.VLLM_COMPILE,
mode=CompilationMode.VLLM_COMPILE,
pass_config=PassConfig(enable_attn_fusion=True, enable_noop=True),
# Inductor caches custom passes by default as well via uuid
inductor_compile_config={"force_disable_caches": True},
@ -236,7 +236,7 @@ def test_tp2_attn_quant_allreduce_rmsnorm(
custom_ops=custom_ops_list,
splitting_ops=splitting_ops,
# Common
level=CompilationMode.VLLM_COMPILE,
mode=CompilationMode.VLLM_COMPILE,
pass_config=PassConfig(
enable_attn_fusion=True,
enable_noop=True,
@ -273,7 +273,7 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg
compilation_config = (
compile_config
if isinstance(compile_config, CompilationConfig)
else CompilationConfig(level=compile_config)
else CompilationConfig(mode=compile_config)
)
prompts = [

View File

@ -36,7 +36,7 @@ class Relu3(ReLUSquaredActivation):
@pytest.mark.parametrize(
"env, torch_level, backend, ops_enabled, default_on",
"env, compilation_mode, backend, ops_enabled, default_on",
[
# Default values based on compile level
# - All by default (no Inductor compilation)
@ -77,7 +77,7 @@ class Relu3(ReLUSquaredActivation):
)
def test_enabled_ops(
env: str | None,
torch_level: int,
compilation_mode: int,
backend: str,
ops_enabled: list[int],
default_on: bool,
@ -85,7 +85,7 @@ def test_enabled_ops(
custom_ops = env.split(",") if env else []
vllm_config = VllmConfig(
compilation_config=CompilationConfig(
backend=backend, level=torch_level, custom_ops=custom_ops
backend=backend, mode=compilation_mode, custom_ops=custom_ops
)
)
with set_current_vllm_config(vllm_config):