From 8cef137689170338fdd573838414fc7f9e3ec84a Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Tue, 23 Dec 2025 11:19:50 +0800 Subject: [PATCH] [Chore] Update more locations to use `attention_config.backend` (#31153) Signed-off-by: DarkLight1337 --- benchmarks/benchmark_batch_invariance.py | 2 +- tests/compile/distributed/test_fusions_e2e.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmarks/benchmark_batch_invariance.py b/benchmarks/benchmark_batch_invariance.py index b5c16c42de467..7473a41e51406 100755 --- a/benchmarks/benchmark_batch_invariance.py +++ b/benchmarks/benchmark_batch_invariance.py @@ -104,7 +104,6 @@ def run_benchmark_with_batch_invariant( random.seed(seed) # Set environment variables - os.environ["VLLM_ATTENTION_BACKEND"] = backend if batch_invariant: os.environ["VLLM_BATCH_INVARIANT"] = "1" else: @@ -140,6 +139,7 @@ def run_benchmark_with_batch_invariant( max_model_len=max_model_len, dtype="bfloat16", tensor_parallel_size=tp_size, + attention_config={"backend": backend}, enable_prefix_caching=False, ) init_time = time.perf_counter() - start_init diff --git a/tests/compile/distributed/test_fusions_e2e.py b/tests/compile/distributed/test_fusions_e2e.py index 28ab2cee71a6a..f8a629ed46cee 100644 --- a/tests/compile/distributed/test_fusions_e2e.py +++ b/tests/compile/distributed/test_fusions_e2e.py @@ -557,7 +557,8 @@ def test_rms_group_quant( # To capture subprocess logs, we need to know whether spawn or fork is used. # Force spawn as it is more general. monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn") - monkeypatch.setenv("VLLM_ATTENTION_BACKEND", backend.name) + + model_kwargs["attention_config"] = {"backend": backend.name} compilation_config = CompilationConfig( # Testing properties