mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 02:25:01 +08:00
Add warning on CUDA graph memory usage (#2182)
This commit is contained in:
parent
290e015c6c
commit
21d5daa4ac
@ -395,6 +395,9 @@ class ModelRunner:
|
|||||||
"unexpected consequences if the model is not static. To "
|
"unexpected consequences if the model is not static. To "
|
||||||
"run the model in eager mode, set 'enforce_eager=True' or "
|
"run the model in eager mode, set 'enforce_eager=True' or "
|
||||||
"use '--enforce-eager' in the CLI.")
|
"use '--enforce-eager' in the CLI.")
|
||||||
|
logger.info("CUDA graphs can take additional 1~3 GiB memory per GPU. "
|
||||||
|
"If you are running out of memory, consider decreasing "
|
||||||
|
"`gpu_memory_utilization` or enforcing eager mode.")
|
||||||
start_time = time.perf_counter()
|
start_time = time.perf_counter()
|
||||||
|
|
||||||
# Prepare dummy inputs. These will be reused for all batch sizes.
|
# Prepare dummy inputs. These will be reused for all batch sizes.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user