From 8cef137689170338fdd573838414fc7f9e3ec84a Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Tue, 23 Dec 2025 11:19:50 +0800
Subject: [PATCH] [Chore] Update more locations to use
 `attention_config.backend` (#31153)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 benchmarks/benchmark_batch_invariance.py      | 2 +-
 tests/compile/distributed/test_fusions_e2e.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/benchmarks/benchmark_batch_invariance.py b/benchmarks/benchmark_batch_invariance.py
index b5c16c42de467..7473a41e51406 100755
--- a/benchmarks/benchmark_batch_invariance.py
+++ b/benchmarks/benchmark_batch_invariance.py
@@ -104,7 +104,6 @@ def run_benchmark_with_batch_invariant(
     random.seed(seed)
 
     # Set environment variables
-    os.environ["VLLM_ATTENTION_BACKEND"] = backend
     if batch_invariant:
         os.environ["VLLM_BATCH_INVARIANT"] = "1"
     else:
@@ -140,6 +139,7 @@ def run_benchmark_with_batch_invariant(
             max_model_len=max_model_len,
             dtype="bfloat16",
             tensor_parallel_size=tp_size,
+            attention_config={"backend": backend},
             enable_prefix_caching=False,
         )
         init_time = time.perf_counter() - start_init
diff --git a/tests/compile/distributed/test_fusions_e2e.py b/tests/compile/distributed/test_fusions_e2e.py
index 28ab2cee71a6a..f8a629ed46cee 100644
--- a/tests/compile/distributed/test_fusions_e2e.py
+++ b/tests/compile/distributed/test_fusions_e2e.py
@@ -557,7 +557,8 @@ def test_rms_group_quant(
     # To capture subprocess logs, we need to know whether spawn or fork is used.
     # Force spawn as it is more general.
     monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
-    monkeypatch.setenv("VLLM_ATTENTION_BACKEND", backend.name)
+
+    model_kwargs["attention_config"] = {"backend": backend.name}
 
     compilation_config = CompilationConfig(
         # Testing properties