mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 05:15:42 +08:00
[CI/Build Don't add FLASHINFER backend in test_cpu_offloading.py (#29229)
Signed-off-by: Randall Smith <ransmith@amd.com> Co-authored-by: Randall Smith <ransmith@amd.com>
This commit is contained in:
parent
a4fdf2405c
commit
8e22da1d7f
@ -12,10 +12,14 @@ from tqdm import tqdm
|
|||||||
from vllm import LLM, SamplingParams, TokensPrompt
|
from vllm import LLM, SamplingParams, TokensPrompt
|
||||||
from vllm.config import KVEventsConfig, KVTransferConfig
|
from vllm.config import KVEventsConfig, KVTransferConfig
|
||||||
from vllm.distributed.kv_events import BlockStored, KVEventBatch
|
from vllm.distributed.kv_events import BlockStored, KVEventBatch
|
||||||
|
from vllm.platforms import current_platform
|
||||||
from vllm.utils.system_utils import set_env_var
|
from vllm.utils.system_utils import set_env_var
|
||||||
|
|
||||||
CPU_BLOCK_SIZES = [48]
|
CPU_BLOCK_SIZES = [48]
|
||||||
ATTN_BACKENDS = ["FLASH_ATTN", "FLASHINFER"]
|
ATTN_BACKENDS = ["FLASH_ATTN"]
|
||||||
|
|
||||||
|
if current_platform.is_cuda():
|
||||||
|
ATTN_BACKENDS.append("FLASHINFER")
|
||||||
|
|
||||||
|
|
||||||
class MockSubscriber:
|
class MockSubscriber:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user