mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-23 07:04:27 +08:00
[CI] Add comment about the single cudagraph capture size that is used (#26252)
This commit is contained in:
parent
f509a20846
commit
d3c84297c3
@ -740,6 +740,10 @@ class VllmRunner:
|
||||
)
|
||||
|
||||
if not kwargs.get("compilation_config", None):
|
||||
# Note(@tdoublep): This is set to 4 because some tests (e.g., hybrid
|
||||
# model tests) may set max_num_seqs=4. If min cudagraph_capture_size is
|
||||
# set to larger than max_num_seqs, then it will lead to *no* graphs
|
||||
# being captured which can trigger edge cases that we don't handle yet.
|
||||
kwargs["compilation_config"] = {"cudagraph_capture_sizes": [4]}
|
||||
|
||||
with init_ctx:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user