mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-19 22:25:01 +08:00
[MISC] Fix misleading batch_size_capture_list when cuda_graph_sizes < 4 (#25829)
Signed-off-by: billishyahao <bill.he@amd.com> Co-authored-by: Luka Govedic <ProExpertProg@users.noreply.github.com>
This commit is contained in:
parent
a332b84578
commit
2518230d3e
@ -580,9 +580,12 @@ class VllmConfig:
|
|||||||
not self.model_config.enforce_eager:
|
not self.model_config.enforce_eager:
|
||||||
cuda_graph_sizes = self.scheduler_config.cuda_graph_sizes
|
cuda_graph_sizes = self.scheduler_config.cuda_graph_sizes
|
||||||
if len(cuda_graph_sizes) == 1:
|
if len(cuda_graph_sizes) == 1:
|
||||||
batch_size_capture_list = [1, 2, 4] + [
|
max_graph_size = cuda_graph_sizes[0]
|
||||||
i for i in range(8, cuda_graph_sizes[0] + 1, 8)
|
assert max_graph_size >= 1, "Maximum cudagraph size should be" \
|
||||||
]
|
" greater than or equal to 1."
|
||||||
|
batch_size_capture_list = [
|
||||||
|
i for i in [1, 2, 4] if i <= max_graph_size
|
||||||
|
] + list(range(8, max_graph_size + 1, 8))
|
||||||
elif len(cuda_graph_sizes) > 1:
|
elif len(cuda_graph_sizes) > 1:
|
||||||
batch_size_capture_list = sorted(cuda_graph_sizes)
|
batch_size_capture_list = sorted(cuda_graph_sizes)
|
||||||
else:
|
else:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user