mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 18:16:46 +08:00
[Bugfix][Misc]: fix graph capture for decoder (#9549)
This commit is contained in:
parent
f6b97293aa
commit
8ca8954841
@ -828,7 +828,7 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
|
|||||||
|
|
||||||
cuda_graph_pad_size = self._get_cuda_graph_pad_size(
|
cuda_graph_pad_size = self._get_cuda_graph_pad_size(
|
||||||
num_seqs=len(seq_lens),
|
num_seqs=len(seq_lens),
|
||||||
max_decode_seq_len=max_encoder_seq_len,
|
max_decode_seq_len=max_decode_seq_len,
|
||||||
max_encoder_seq_len=max_encoder_seq_len)
|
max_encoder_seq_len=max_encoder_seq_len)
|
||||||
|
|
||||||
batch_size = len(input_tokens)
|
batch_size = len(input_tokens)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user