From 5206ab20ba7477e5457c2e64469590d548fa15e6 Mon Sep 17 00:00:00 2001 From: Kunshang Ji Date: Tue, 16 Sep 2025 11:35:36 +0800 Subject: [PATCH] [XPU] Fix circular import error. (#24927) Signed-off-by: Kunshang Ji --- vllm/platforms/xpu.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index 792115b33ea8..67ef058df10f 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -9,7 +9,6 @@ import torch import vllm.envs as envs from vllm.logger import init_logger from vllm.utils import DEFAULT_MAX_NUM_BATCHED_TOKENS -from vllm.v1.attention.backends.utils import set_kv_cache_layout from .interface import DeviceCapability, Platform, PlatformEnum, _Backend @@ -164,11 +163,16 @@ class XPUPlatform(Platform): vllm_config.scheduler_config.max_num_batched_tokens = max( vllm_config.scheduler_config.max_model_len, DEFAULT_MAX_NUM_BATCHED_TOKENS) + from vllm.v1.attention.backends.utils import set_kv_cache_layout set_kv_cache_layout("NHD") logger.info("Setting VLLM_KV_CACHE_LAYOUT to 'NHD' for XPU; " "only NHD layout is supported by XPU attention kernels.") + @classmethod + def support_hybrid_kv_cache(cls) -> bool: + return True + @classmethod def is_pin_memory_available(cls): return True