mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 15:36:29 +08:00
[CI/Build Don't add FLASHINFER backend in test_cpu_offloading.py (#29229)
Signed-off-by: Randall Smith <ransmith@amd.com> Co-authored-by: Randall Smith <ransmith@amd.com>
This commit is contained in:
parent
a4fdf2405c
commit
8e22da1d7f
@ -12,10 +12,14 @@ from tqdm import tqdm
|
||||
from vllm import LLM, SamplingParams, TokensPrompt
|
||||
from vllm.config import KVEventsConfig, KVTransferConfig
|
||||
from vllm.distributed.kv_events import BlockStored, KVEventBatch
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.system_utils import set_env_var
|
||||
|
||||
CPU_BLOCK_SIZES = [48]
|
||||
ATTN_BACKENDS = ["FLASH_ATTN", "FLASHINFER"]
|
||||
ATTN_BACKENDS = ["FLASH_ATTN"]
|
||||
|
||||
if current_platform.is_cuda():
|
||||
ATTN_BACKENDS.append("FLASHINFER")
|
||||
|
||||
|
||||
class MockSubscriber:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user