mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 00:25:01 +08:00
Add warning on CUDA graph memory usage (#2182)
This commit is contained in:
parent
290e015c6c
commit
21d5daa4ac
@ -395,6 +395,9 @@ class ModelRunner:
|
||||
"unexpected consequences if the model is not static. To "
|
||||
"run the model in eager mode, set 'enforce_eager=True' or "
|
||||
"use '--enforce-eager' in the CLI.")
|
||||
logger.info("CUDA graphs can take additional 1~3 GiB memory per GPU. "
|
||||
"If you are running out of memory, consider decreasing "
|
||||
"`gpu_memory_utilization` or enforcing eager mode.")
|
||||
start_time = time.perf_counter()
|
||||
|
||||
# Prepare dummy inputs. These will be reused for all batch sizes.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user