Scheduled removal of CompilationConfig.use_inductor (#29323)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2026-06-12 04:17:13 +08:00 · 2025-11-25 12:55:42 +00:00 · 2025-11-25 12:55:42 +00:00 · 51fc9e017a
commit 51fc9e017a
parent bf0c75cd4f
4 changed files with 13 additions and 41 deletions
--- a/tests/compile/fullgraph/test_simple.py
+++ b/tests/compile/fullgraph/test_simple.py
@ -55,7 +55,7 @@ class SillyModel(nn.Module):
 def _run_simple_model(
    splitting_ops,
    use_inductor_graph_partition,
-    use_inductor,
+    backend,
    expected_num_piecewise_graphs_seen,
    expected_num_piecewise_capturable_graphs_seen,
    expected_num_backend_compilations,
@ -64,7 +64,7 @@ def _run_simple_model(
    vllm_config = VllmConfig(
        compilation_config=CompilationConfig(
            mode=CompilationMode.VLLM_COMPILE,
-            use_inductor=use_inductor,
+            backend=backend,
            splitting_ops=splitting_ops,
            use_inductor_graph_partition=use_inductor_graph_partition,
            cudagraph_copy_inputs=True,
@ -124,14 +124,14 @@ def _run_simple_model(
        assert torch.allclose(output.cpu(), torch.tensor([19.0, 19.0]))
-@pytest.mark.parametrize("use_inductor", [True, False])
+@pytest.mark.parametrize("backend", ["inductor", "eager"])
@torch.inference_mode()
@create_new_process_for_each_test("spawn")
-def test_simple_piecewise_compile(use_inductor):
+def test_simple_piecewise_compile(backend):
    _run_simple_model(
        splitting_ops=["silly::attention"],
        use_inductor_graph_partition=False,
-        use_inductor=use_inductor,
+        backend=backend,
        # 2 * num_layers + 1
        expected_num_piecewise_graphs_seen=5,
        # 1 + num_layers
@ -155,7 +155,7 @@ def test_simple_inductor_graph_partition(monkeypatch):
    _run_simple_model(
        splitting_ops=["silly::attention"],
        use_inductor_graph_partition=True,
-        use_inductor=True,
+        backend="inductor",
        # Since not splitting at fx graph level
        expected_num_piecewise_graphs_seen=1,
        # Since not splitting at fx graph level
--- a/tests/engine/test_arg_utils.py
+++ b/tests/engine/test_arg_utils.py
@ -249,14 +249,13 @@ def test_compilation_config():
    args = parser.parse_args(
        [
            "-O",
-            '{"mode": 3, "cudagraph_capture_sizes": [1, 2, 4, 8], '
+            '{"mode": 3, "cudagraph_capture_sizes": [1, 2, 4, 8], "backend": "eager"}',
            '"use_inductor": false}',
        ]
    )
    assert (
        args.compilation_config.mode == 3
        and args.compilation_config.cudagraph_capture_sizes == [1, 2, 4, 8]
-        and not args.compilation_config.use_inductor
+        and args.compilation_config.backend == "eager"
    )
    # set to string form of a dict
@ -264,13 +263,13 @@ def test_compilation_config():
        [
            "--compilation-config="
            '{"mode": 3, "cudagraph_capture_sizes": [1, 2, 4, 8], '
-            '"use_inductor": true}',
+            '"backend": "inductor"}',
        ]
    )
    assert (
        args.compilation_config.mode == 3
        and args.compilation_config.cudagraph_capture_sizes == [1, 2, 4, 8]
-        and args.compilation_config.use_inductor
+        and args.compilation_config.backend == "inductor"
    )
--- a/tests/utils_/test_argparse_utils.py
+++ b/tests/utils_/test_argparse_utils.py
@ -166,7 +166,7 @@ def test_dict_args(parser):
        "--hf-overrides.key2.key4",
        "val3",
        # Test compile config and compilation mode
-        "-O.use_inductor=true",
+        "-O.use_inductor_graph_partition=true",
        "-O.backend",
        "custom",
        "-O1",
@ -219,7 +219,7 @@ def test_dict_args(parser):
    }
    assert parsed_args.compilation_config == {
        "mode": 1,
-        "use_inductor": True,
+        "use_inductor_graph_partition": True,
        "backend": "custom",
        "custom_ops": ["-quant_fp8", "+silu_mul", "-rms_norm"],
    }
--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@ -264,7 +264,6 @@ class CompilationConfig:
        - [`cudagraph_copy_inputs`]
        [vllm.config.CompilationConfig.cudagraph_copy_inputs]
    - Inductor compilation:
        - [`use_inductor`][vllm.config.CompilationConfig.use_inductor]
        - [`compile_sizes`][vllm.config.CompilationConfig.compile_sizes]
        - [`inductor_compile_config`]
        [vllm.config.CompilationConfig.inductor_compile_config]
@ -348,7 +347,7 @@ class CompilationConfig:
    - 'none,+op1,+op2' to enable only op1 and op2
    By default, all custom ops are enabled when running without Inductor and
-    disabled when running with Inductor: mode>=VLLM_COMPILE and use_inductor=True.
+    disabled when running with Inductor: mode>=VLLM_COMPILE and backend="inductor".
    Inductor generates (fused) Triton kernels for disabled custom ops."""
    splitting_ops: list[str] | None = None
    """A list of ops to exclude from cudagraphs, used in piecewise compilation.
@ -374,24 +373,6 @@ class CompilationConfig:
    Disabled by default until more models are supported/tested to work."""
    # Inductor capture
    use_inductor: bool | None = None
    """
    Whether to use inductor compilation.
    This flag is deprecated and will be removed in the next release 0.12.0.
    Please use the 'backend' option instead.
    - False: inductor compilation is not used. graph runs in eager
        (custom_ops enabled by default).
    - True: inductor compilation is used (custom_ops disabled by default).
        One graph for symbolic shape and one graph per size in compile_sizes
        are compiled using configurations in inductor_compile_config.
    This setting is ignored if mode<VLLM_COMPILE.
    For future compatibility:
    If use_inductor is True, backend="inductor" otherwise backend="eager".
    """
    compile_sizes: list[int | str] | None = None
    """Sizes to compile for inductor. In addition
    to integers, it also supports "cudagraph_capture_sizes" to
@ -759,14 +740,6 @@ class CompilationConfig:
                f"Invalid backend for piecewise compilation: {self.backend}"
            )
        if self.use_inductor is not None:
            logger.warning_once(
                "The 'use_inductor' flag is deprecated and will be "
                "removed in the next release (v0.12.0). "
                "Please use the 'backend' option instead.",
            )
            self.backend = "inductor" if self.use_inductor else "eager"
        if self.backend == "":
            self.backend = current_platform.get_compile_backend()