[CI/Build Don't add FLASHINFER backend in test_cpu_offloading.py (#29229)

Signed-off-by: Randall Smith <ransmith@amd.com>
Co-authored-by: Randall Smith <ransmith@amd.com>
This commit is contained in:
rasmith 2025-11-22 05:00:54 -06:00 committed by GitHub
parent a4fdf2405c
commit 8e22da1d7f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -12,10 +12,14 @@ from tqdm import tqdm
from vllm import LLM, SamplingParams, TokensPrompt from vllm import LLM, SamplingParams, TokensPrompt
from vllm.config import KVEventsConfig, KVTransferConfig from vllm.config import KVEventsConfig, KVTransferConfig
from vllm.distributed.kv_events import BlockStored, KVEventBatch from vllm.distributed.kv_events import BlockStored, KVEventBatch
from vllm.platforms import current_platform
from vllm.utils.system_utils import set_env_var from vllm.utils.system_utils import set_env_var
CPU_BLOCK_SIZES = [48] CPU_BLOCK_SIZES = [48]
ATTN_BACKENDS = ["FLASH_ATTN", "FLASHINFER"] ATTN_BACKENDS = ["FLASH_ATTN"]
if current_platform.is_cuda():
ATTN_BACKENDS.append("FLASHINFER")
class MockSubscriber: class MockSubscriber: