[Chore] Update more locations to use attention_config.backend (#31153)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung 2025-12-23 11:19:50 +08:00 committed by GitHub
parent a37328fc5c
commit 8cef137689
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 3 additions and 2 deletions

View File

@ -104,7 +104,6 @@ def run_benchmark_with_batch_invariant(
random.seed(seed)
# Set environment variables
os.environ["VLLM_ATTENTION_BACKEND"] = backend
if batch_invariant:
os.environ["VLLM_BATCH_INVARIANT"] = "1"
else:
@ -140,6 +139,7 @@ def run_benchmark_with_batch_invariant(
max_model_len=max_model_len,
dtype="bfloat16",
tensor_parallel_size=tp_size,
attention_config={"backend": backend},
enable_prefix_caching=False,
)
init_time = time.perf_counter() - start_init

View File

@ -557,7 +557,8 @@ def test_rms_group_quant(
# To capture subprocess logs, we need to know whether spawn or fork is used.
# Force spawn as it is more general.
monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
monkeypatch.setenv("VLLM_ATTENTION_BACKEND", backend.name)
model_kwargs["attention_config"] = {"backend": backend.name}
compilation_config = CompilationConfig(
# Testing properties