[Bugfix][CPU] Fix CPU KV cache fallback memory allocation (#29604)

Signed-off-by: Gauri Sahnan <gauri.sahnan@arm.com>
Co-authored-by: Li, Jiang <jiang1.li@intel.com>
This commit is contained in:
gausah01 2025-12-04 05:01:15 +00:00 committed by GitHub
parent dd38ba3a26
commit 28097d5638
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -10,6 +10,7 @@ import sys
from dataclasses import dataclass from dataclasses import dataclass
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
import psutil
import regex as re import regex as re
import torch import torch
@ -147,11 +148,21 @@ class CpuPlatform(Platform):
from vllm.utils.mem_constants import GiB_bytes from vllm.utils.mem_constants import GiB_bytes
kv_cache_space = envs.VLLM_CPU_KVCACHE_SPACE kv_cache_space = envs.VLLM_CPU_KVCACHE_SPACE
node_dir = "/sys/devices/system/node"
if kv_cache_space is None: if kv_cache_space is None:
kv_cache_space = 4 * GiB_bytes # type: ignore nodes = (
[d for d in os.listdir(node_dir) if d.startswith("node")]
if os.path.exists(node_dir)
else []
)
num_numa_nodes = len(nodes) or 1
free_cpu_memory = psutil.virtual_memory().total // num_numa_nodes
DEFAULT_CPU_MEM_UTILIZATION = 0.5
kv_cache_space = int(free_cpu_memory * DEFAULT_CPU_MEM_UTILIZATION)
kv_cache_space_gib = kv_cache_space / GiB_bytes
logger.warning_once( logger.warning_once(
"Environment variable VLLM_CPU_KVCACHE_SPACE (GiB) " "VLLM_CPU_KVCACHE_SPACE not set. Using "
"for CPU backend is not set, using 4 by default." f"{kv_cache_space_gib:.2f} GiB for KV cache."
) )
else: else:
kv_cache_space *= GiB_bytes kv_cache_space *= GiB_bytes