mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 02:55:40 +08:00
[Core] Run garbage collector after CUDA graph capture to fix throughput regression (#24128)
Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com> Co-authored-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
This commit is contained in:
parent
922d3b401b
commit
1c63a16b65
@ -2885,6 +2885,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
|||||||
finally:
|
finally:
|
||||||
if should_freeze:
|
if should_freeze:
|
||||||
gc.unfreeze()
|
gc.unfreeze()
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
# Trigger CUDA graph capture for specific shapes.
|
# Trigger CUDA graph capture for specific shapes.
|
||||||
# Capture the large shapes first so that the smaller shapes
|
# Capture the large shapes first so that the smaller shapes
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user