From 1c63a16b653d1f3f4260e862f01eaffed283c4c8 Mon Sep 17 00:00:00 2001 From: Micah Williamson Date: Tue, 9 Sep 2025 09:38:10 -0500 Subject: [PATCH] [Core] Run garbage collector after CUDA graph capture to fix throughput regression (#24128) Signed-off-by: Gregory Shtrasberg Co-authored-by: Gregory Shtrasberg --- vllm/v1/worker/gpu_model_runner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 549c5dd2bbb2..897c3a621320 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2885,6 +2885,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): finally: if should_freeze: gc.unfreeze() + gc.collect() # Trigger CUDA graph capture for specific shapes. # Capture the large shapes first so that the smaller shapes