From a40a8506df78b965f034aeae260e664c63a7f5d5 Mon Sep 17 00:00:00 2001 From: "Ye (Charlotte) Qi" Date: Sat, 26 Jul 2025 07:07:21 -0700 Subject: [PATCH] [Misc] Improve memory profiling debug message (#21429) Signed-off-by: Ye (Charlotte) Qi --- vllm/v1/worker/gpu_worker.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index dcfb038d28c2..d9d1f14f0554 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -246,11 +246,21 @@ class Worker(WorkerBase): available_kv_cache_memory = self.requested_memory \ - profile_result.non_kv_cache_memory + unrequested_memory = self.init_snapshot.free_memory \ + - self.requested_memory logger.debug( - "Initial free memory: %.2f GiB, free memory: %.2f GiB, " - "requested GPU memory: %.2f GiB", - GiB(self.init_snapshot.free_memory), GiB(free_gpu_memory), - GiB(self.requested_memory)) + "Initial free memory: %.2f GiB; " + "Requested memory: %.2f (util), %.2f GiB", + GiB(self.init_snapshot.free_memory), + self.cache_config.gpu_memory_utilization, + GiB(self.requested_memory), + ) + logger.debug( + "Free memory after profiling: %.2f GiB (total), " + "%.2f GiB (within requested)", + GiB(free_gpu_memory), + GiB(free_gpu_memory - unrequested_memory), + ) logger.debug(profile_result) logger.info("Available KV cache memory: %.2f GiB", GiB(available_kv_cache_memory))