mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 17:05:36 +08:00
[CI] Only capture a single CUDA graph size in CI by default (#25951)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
1405f0c7ba
commit
a332b84578
@ -731,6 +731,9 @@ class VllmRunner:
|
|||||||
init_ctx = (nullcontext() if default_torch_num_threads is None else
|
init_ctx = (nullcontext() if default_torch_num_threads is None else
|
||||||
set_default_torch_num_threads(default_torch_num_threads))
|
set_default_torch_num_threads(default_torch_num_threads))
|
||||||
|
|
||||||
|
if not kwargs.get("compilation_config", None):
|
||||||
|
kwargs["compilation_config"] = {"cudagraph_capture_sizes": [8]}
|
||||||
|
|
||||||
with init_ctx:
|
with init_ctx:
|
||||||
self.llm = LLM(
|
self.llm = LLM(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user