[Chore] Update more locations to use attention_config.backend (#31153)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-03-16 08:27:07 +08:00 · 2025-12-23 11:19:50 +08:00 · 2025-12-23 11:19:50 +08:00 · 8cef137689
commit 8cef137689
parent a37328fc5c
2 changed files with 3 additions and 2 deletions
--- a/benchmarks/benchmark_batch_invariance.py
+++ b/benchmarks/benchmark_batch_invariance.py
@ -104,7 +104,6 @@ def run_benchmark_with_batch_invariant(
    random.seed(seed)

    # Set environment variables
-    os.environ["VLLM_ATTENTION_BACKEND"] = backend
    if batch_invariant:
        os.environ["VLLM_BATCH_INVARIANT"] = "1"
    else:
@ -140,6 +139,7 @@ def run_benchmark_with_batch_invariant(
            max_model_len=max_model_len,
            dtype="bfloat16",
            tensor_parallel_size=tp_size,
+            attention_config={"backend": backend},
            enable_prefix_caching=False,
        )
        init_time = time.perf_counter() - start_init
--- a/tests/compile/distributed/test_fusions_e2e.py
+++ b/tests/compile/distributed/test_fusions_e2e.py
@ -557,7 +557,8 @@ def test_rms_group_quant(
    # To capture subprocess logs, we need to know whether spawn or fork is used.
    # Force spawn as it is more general.
    monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
-    monkeypatch.setenv("VLLM_ATTENTION_BACKEND", backend.name)
+
+    model_kwargs["attention_config"] = {"backend": backend.name}

    compilation_config = CompilationConfig(
        # Testing properties