From 8e22da1d7fcd43efd8fec18c0c0bf6a8e7cf61a6 Mon Sep 17 00:00:00 2001 From: rasmith Date: Sat, 22 Nov 2025 05:00:54 -0600 Subject: [PATCH] [CI/Build Don't add FLASHINFER backend in test_cpu_offloading.py (#29229) Signed-off-by: Randall Smith Co-authored-by: Randall Smith --- tests/v1/kv_offload/test_cpu_offloading.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/v1/kv_offload/test_cpu_offloading.py b/tests/v1/kv_offload/test_cpu_offloading.py index 3ee41c40859d..406d4c0b4c1f 100644 --- a/tests/v1/kv_offload/test_cpu_offloading.py +++ b/tests/v1/kv_offload/test_cpu_offloading.py @@ -12,10 +12,14 @@ from tqdm import tqdm from vllm import LLM, SamplingParams, TokensPrompt from vllm.config import KVEventsConfig, KVTransferConfig from vllm.distributed.kv_events import BlockStored, KVEventBatch +from vllm.platforms import current_platform from vllm.utils.system_utils import set_env_var CPU_BLOCK_SIZES = [48] -ATTN_BACKENDS = ["FLASH_ATTN", "FLASHINFER"] +ATTN_BACKENDS = ["FLASH_ATTN"] + +if current_platform.is_cuda(): + ATTN_BACKENDS.append("FLASHINFER") class MockSubscriber: