mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 05:35:01 +08:00
[XPU]Fix crash due to removed VLLM_USE_V1 attribute (#28520)
Signed-off-by: chaojun-zhang <chaojun.zhang@intel.com>
This commit is contained in:
parent
d3ade61e42
commit
a4730c1b4f
@ -65,7 +65,6 @@ class XPUPlatform(Platform):
|
|||||||
|
|
||||||
if use_sparse:
|
if use_sparse:
|
||||||
raise NotImplementedError("Sparse Attention is not supported on XPU.")
|
raise NotImplementedError("Sparse Attention is not supported on XPU.")
|
||||||
use_v1 = envs.VLLM_USE_V1
|
|
||||||
if not use_v1:
|
if not use_v1:
|
||||||
raise ValueError("XPU backend only supports V1.")
|
raise ValueError("XPU backend only supports V1.")
|
||||||
if selected_backend == AttentionBackendEnum.TRITON_ATTN:
|
if selected_backend == AttentionBackendEnum.TRITON_ATTN:
|
||||||
@ -115,7 +114,9 @@ class XPUPlatform(Platform):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def get_vit_attn_backend(
|
def get_vit_attn_backend(
|
||||||
cls, head_size: int, dtype: torch.dtype
|
cls, head_size: int, dtype: torch.dtype
|
||||||
) -> AttentionBackendEnum:
|
) -> "AttentionBackendEnum":
|
||||||
|
from vllm.attention.backends.registry import AttentionBackendEnum
|
||||||
|
|
||||||
return AttentionBackendEnum.FLASH_ATTN
|
return AttentionBackendEnum.FLASH_ATTN
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user