mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-21 02:54:27 +08:00
[CI] Only capture a single CUDA graph size in CI by default (#25951)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
63c56cbb25
commit
e8773e620f
@ -731,6 +731,9 @@ class VllmRunner:
|
||||
init_ctx = (nullcontext() if default_torch_num_threads is None else
|
||||
set_default_torch_num_threads(default_torch_num_threads))
|
||||
|
||||
if not kwargs.get("compilation_config", None):
|
||||
kwargs["compilation_config"] = {"cudagraph_capture_sizes": [8]}
|
||||
|
||||
with init_ctx:
|
||||
self.llm = LLM(
|
||||
model=model_name,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user