mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-21 01:05:35 +08:00
Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com>
This commit is contained in:
parent
5fe643fc26
commit
8226dd56bf
@ -209,7 +209,8 @@ class GDNAttentionMetadataBuilder(
|
|||||||
|
|
||||||
# prepare tensors for cudagraph
|
# prepare tensors for cudagraph
|
||||||
if (self.use_full_cuda_graph and num_prefills == 0 and num_decodes == 0
|
if (self.use_full_cuda_graph and num_prefills == 0 and num_decodes == 0
|
||||||
and num_spec_decodes <= self.decode_cudagraph_max_bs):
|
and num_spec_decodes <= self.decode_cudagraph_max_bs
|
||||||
|
and m.num_actual_tokens <= self.decode_cudagraph_max_bs):
|
||||||
num_total_tokens = self.vllm_config.pad_for_cudagraph(
|
num_total_tokens = self.vllm_config.pad_for_cudagraph(
|
||||||
m.num_actual_tokens)
|
m.num_actual_tokens)
|
||||||
batch_size = num_total_tokens // (self.num_spec + 1)
|
batch_size = num_total_tokens // (self.num_spec + 1)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user