diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py index 2b2c2f9cdc57..a2518d5fd3dc 100644 --- a/vllm/platforms/cpu.py +++ b/vllm/platforms/cpu.py @@ -10,6 +10,7 @@ import sys from dataclasses import dataclass from typing import TYPE_CHECKING +import psutil import regex as re import torch @@ -147,11 +148,21 @@ class CpuPlatform(Platform): from vllm.utils.mem_constants import GiB_bytes kv_cache_space = envs.VLLM_CPU_KVCACHE_SPACE + node_dir = "/sys/devices/system/node" if kv_cache_space is None: - kv_cache_space = 4 * GiB_bytes # type: ignore + nodes = ( + [d for d in os.listdir(node_dir) if d.startswith("node")] + if os.path.exists(node_dir) + else [] + ) + num_numa_nodes = len(nodes) or 1 + free_cpu_memory = psutil.virtual_memory().total // num_numa_nodes + DEFAULT_CPU_MEM_UTILIZATION = 0.5 + kv_cache_space = int(free_cpu_memory * DEFAULT_CPU_MEM_UTILIZATION) + kv_cache_space_gib = kv_cache_space / GiB_bytes logger.warning_once( - "Environment variable VLLM_CPU_KVCACHE_SPACE (GiB) " - "for CPU backend is not set, using 4 by default." + "VLLM_CPU_KVCACHE_SPACE not set. Using " + f"{kv_cache_space_gib:.2f} GiB for KV cache." ) else: kv_cache_space *= GiB_bytes