[Bugfix][Misc]: fix graph capture for decoder (#9549)

2026-01-28 10:07:14 +08:00 · 2024-10-22 01:33:30 +08:00 · 2024-10-22 01:33:30 +08:00 · 8ca8954841
commit 8ca8954841
parent f6b97293aa
1 changed files with 1 additions and 1 deletions
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@ -828,7 +828,7 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):

        cuda_graph_pad_size = self._get_cuda_graph_pad_size(
            num_seqs=len(seq_lens),
-            max_decode_seq_len=max_encoder_seq_len,
+            max_decode_seq_len=max_decode_seq_len,
            max_encoder_seq_len=max_encoder_seq_len)

        batch_size = len(input_tokens)