From a4730c1b4fa297848aeb8b81a36c5158b3845e9c Mon Sep 17 00:00:00 2001 From: Chaojun Zhang Date: Wed, 12 Nov 2025 18:20:55 +0800 Subject: [PATCH] [XPU]Fix crash due to removed VLLM_USE_V1 attribute (#28520) Signed-off-by: chaojun-zhang --- vllm/platforms/xpu.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index 684d6d9a6b57..359eafc66445 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -65,7 +65,6 @@ class XPUPlatform(Platform): if use_sparse: raise NotImplementedError("Sparse Attention is not supported on XPU.") - use_v1 = envs.VLLM_USE_V1 if not use_v1: raise ValueError("XPU backend only supports V1.") if selected_backend == AttentionBackendEnum.TRITON_ATTN: @@ -115,7 +114,9 @@ class XPUPlatform(Platform): @classmethod def get_vit_attn_backend( cls, head_size: int, dtype: torch.dtype - ) -> AttentionBackendEnum: + ) -> "AttentionBackendEnum": + from vllm.attention.backends.registry import AttentionBackendEnum + return AttentionBackendEnum.FLASH_ATTN @classmethod