mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-20 04:34:29 +08:00
[Chore] Update more locations to use attention_config.backend (#31153)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
a37328fc5c
commit
8cef137689
@ -104,7 +104,6 @@ def run_benchmark_with_batch_invariant(
|
||||
random.seed(seed)
|
||||
|
||||
# Set environment variables
|
||||
os.environ["VLLM_ATTENTION_BACKEND"] = backend
|
||||
if batch_invariant:
|
||||
os.environ["VLLM_BATCH_INVARIANT"] = "1"
|
||||
else:
|
||||
@ -140,6 +139,7 @@ def run_benchmark_with_batch_invariant(
|
||||
max_model_len=max_model_len,
|
||||
dtype="bfloat16",
|
||||
tensor_parallel_size=tp_size,
|
||||
attention_config={"backend": backend},
|
||||
enable_prefix_caching=False,
|
||||
)
|
||||
init_time = time.perf_counter() - start_init
|
||||
|
||||
@ -557,7 +557,8 @@ def test_rms_group_quant(
|
||||
# To capture subprocess logs, we need to know whether spawn or fork is used.
|
||||
# Force spawn as it is more general.
|
||||
monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
|
||||
monkeypatch.setenv("VLLM_ATTENTION_BACKEND", backend.name)
|
||||
|
||||
model_kwargs["attention_config"] = {"backend": backend.name}
|
||||
|
||||
compilation_config = CompilationConfig(
|
||||
# Testing properties
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user