From 1c63a16b653d1f3f4260e862f01eaffed283c4c8 Mon Sep 17 00:00:00 2001
From: Micah Williamson <micah.williamson@amd.com>
Date: Tue, 9 Sep 2025 09:38:10 -0500
Subject: [PATCH] [Core] Run garbage collector after CUDA graph capture to fix
 throughput regression (#24128)

Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
Co-authored-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
---
 vllm/v1/worker/gpu_model_runner.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 549c5dd2bbb2..897c3a621320 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2885,6 +2885,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             finally:
                 if should_freeze:
                     gc.unfreeze()
+                    gc.collect()
 
         # Trigger CUDA graph capture for specific shapes.
         # Capture the large shapes first so that the smaller shapes