mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 03:05:02 +08:00
[XPU] Fix circular import error. (#24927)
Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
parent
0af3ce1355
commit
5206ab20ba
@ -9,7 +9,6 @@ import torch
|
||||
import vllm.envs as envs
|
||||
from vllm.logger import init_logger
|
||||
from vllm.utils import DEFAULT_MAX_NUM_BATCHED_TOKENS
|
||||
from vllm.v1.attention.backends.utils import set_kv_cache_layout
|
||||
|
||||
from .interface import DeviceCapability, Platform, PlatformEnum, _Backend
|
||||
|
||||
@ -164,11 +163,16 @@ class XPUPlatform(Platform):
|
||||
vllm_config.scheduler_config.max_num_batched_tokens = max(
|
||||
vllm_config.scheduler_config.max_model_len,
|
||||
DEFAULT_MAX_NUM_BATCHED_TOKENS)
|
||||
from vllm.v1.attention.backends.utils import set_kv_cache_layout
|
||||
|
||||
set_kv_cache_layout("NHD")
|
||||
logger.info("Setting VLLM_KV_CACHE_LAYOUT to 'NHD' for XPU; "
|
||||
"only NHD layout is supported by XPU attention kernels.")
|
||||
|
||||
@classmethod
|
||||
def support_hybrid_kv_cache(cls) -> bool:
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def is_pin_memory_available(cls):
|
||||
return True
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user