mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-21 04:05:01 +08:00
[Log] Optimize kv cache memory log from Bytes to GiB (#25204)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
2357480b1a
commit
846197f505
@ -383,11 +383,13 @@ class Worker(WorkerBase):
|
|||||||
f"for non-torch memory, and {GiB(cuda_graph_memory_bytes)} "
|
f"for non-torch memory, and {GiB(cuda_graph_memory_bytes)} "
|
||||||
f"GiB for CUDAGraph memory. Replace gpu_memory_utilization "
|
f"GiB for CUDAGraph memory. Replace gpu_memory_utilization "
|
||||||
f"config with `--kv-cache-memory="
|
f"config with `--kv-cache-memory="
|
||||||
f"{kv_cache_memory_bytes_to_requested_limit}` to fit into "
|
f"{kv_cache_memory_bytes_to_requested_limit}` "
|
||||||
f"requested memory, or `--kv-cache-memory="
|
f"({GiB(kv_cache_memory_bytes_to_requested_limit)} GiB) to fit "
|
||||||
f"{kv_cache_memory_bytes_to_gpu_limit}` to fully "
|
f"into requested memory, or `--kv-cache-memory="
|
||||||
|
f"{kv_cache_memory_bytes_to_gpu_limit}` "
|
||||||
|
f"({GiB(kv_cache_memory_bytes_to_gpu_limit)} GiB) to fully "
|
||||||
f"utilize gpu memory. Current kv cache memory in use is "
|
f"utilize gpu memory. Current kv cache memory in use is "
|
||||||
f"{int(self.available_kv_cache_memory_bytes)} bytes.")
|
f"{GiB(self.available_kv_cache_memory_bytes)} GiB.")
|
||||||
|
|
||||||
logger.debug(msg)
|
logger.debug(msg)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user