From 28097d5638cc695f4644c411edac8eb05a03b39b Mon Sep 17 00:00:00 2001 From: gausah01 <141038176+gausah01@users.noreply.github.com> Date: Thu, 4 Dec 2025 05:01:15 +0000 Subject: [PATCH] [Bugfix][CPU] Fix CPU KV cache fallback memory allocation (#29604) Signed-off-by: Gauri Sahnan Co-authored-by: Li, Jiang --- vllm/platforms/cpu.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py index 2b2c2f9cdc57..a2518d5fd3dc 100644 --- a/vllm/platforms/cpu.py +++ b/vllm/platforms/cpu.py @@ -10,6 +10,7 @@ import sys from dataclasses import dataclass from typing import TYPE_CHECKING +import psutil import regex as re import torch @@ -147,11 +148,21 @@ class CpuPlatform(Platform): from vllm.utils.mem_constants import GiB_bytes kv_cache_space = envs.VLLM_CPU_KVCACHE_SPACE + node_dir = "/sys/devices/system/node" if kv_cache_space is None: - kv_cache_space = 4 * GiB_bytes # type: ignore + nodes = ( + [d for d in os.listdir(node_dir) if d.startswith("node")] + if os.path.exists(node_dir) + else [] + ) + num_numa_nodes = len(nodes) or 1 + free_cpu_memory = psutil.virtual_memory().total // num_numa_nodes + DEFAULT_CPU_MEM_UTILIZATION = 0.5 + kv_cache_space = int(free_cpu_memory * DEFAULT_CPU_MEM_UTILIZATION) + kv_cache_space_gib = kv_cache_space / GiB_bytes logger.warning_once( - "Environment variable VLLM_CPU_KVCACHE_SPACE (GiB) " - "for CPU backend is not set, using 4 by default." + "VLLM_CPU_KVCACHE_SPACE not set. Using " + f"{kv_cache_space_gib:.2f} GiB for KV cache." ) else: kv_cache_space *= GiB_bytes