From 2386803f2a2e3df1f29ea05212eaf68590b85805 Mon Sep 17 00:00:00 2001 From: "Li, Jiang" Date: Sat, 5 Apr 2025 00:39:05 +0800 Subject: [PATCH] [CPU] Change default block_size for CPU backend (#16002) Signed-off-by: jiang1.li --- vllm/platforms/cpu.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py index 619219023f4da..67466bdb98075 100644 --- a/vllm/platforms/cpu.py +++ b/vllm/platforms/cpu.py @@ -2,6 +2,7 @@ import os import sys +from importlib.util import find_spec from typing import TYPE_CHECKING, Optional import psutil @@ -68,8 +69,15 @@ class CpuPlatform(Platform): cache_config = vllm_config.cache_config + ipex_avaliable = find_spec("intel_extension_for_pytorch") is not None + if cache_config and cache_config.block_size is None: - cache_config.block_size = 16 + cache_config.block_size = 128 if ipex_avaliable else 16 + + if not ipex_avaliable and cache_config.block_size != 16: + raise RuntimeError( + f"--block-size={cache_config.block_size} requires" + " intel_extension_for_pytorch") scheduler_config = vllm_config.scheduler_config if ((scheduler_config.chunked_prefill_enabled