mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 03:15:20 +08:00
[Bugfix][CPU] Fix CPU KV cache fallback memory allocation (#29604)
Signed-off-by: Gauri Sahnan <gauri.sahnan@arm.com> Co-authored-by: Li, Jiang <jiang1.li@intel.com>
This commit is contained in:
parent
dd38ba3a26
commit
28097d5638
@ -10,6 +10,7 @@ import sys
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import psutil
|
||||
import regex as re
|
||||
import torch
|
||||
|
||||
@ -147,11 +148,21 @@ class CpuPlatform(Platform):
|
||||
from vllm.utils.mem_constants import GiB_bytes
|
||||
|
||||
kv_cache_space = envs.VLLM_CPU_KVCACHE_SPACE
|
||||
node_dir = "/sys/devices/system/node"
|
||||
if kv_cache_space is None:
|
||||
kv_cache_space = 4 * GiB_bytes # type: ignore
|
||||
nodes = (
|
||||
[d for d in os.listdir(node_dir) if d.startswith("node")]
|
||||
if os.path.exists(node_dir)
|
||||
else []
|
||||
)
|
||||
num_numa_nodes = len(nodes) or 1
|
||||
free_cpu_memory = psutil.virtual_memory().total // num_numa_nodes
|
||||
DEFAULT_CPU_MEM_UTILIZATION = 0.5
|
||||
kv_cache_space = int(free_cpu_memory * DEFAULT_CPU_MEM_UTILIZATION)
|
||||
kv_cache_space_gib = kv_cache_space / GiB_bytes
|
||||
logger.warning_once(
|
||||
"Environment variable VLLM_CPU_KVCACHE_SPACE (GiB) "
|
||||
"for CPU backend is not set, using 4 by default."
|
||||
"VLLM_CPU_KVCACHE_SPACE not set. Using "
|
||||
f"{kv_cache_space_gib:.2f} GiB for KV cache."
|
||||
)
|
||||
else:
|
||||
kv_cache_space *= GiB_bytes
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user