fix assert error num_tokens_across_dp is None

or num_tokens_across_dp[dp_rank] == batchsize Signed-off-by: yewentao256 <zhyanwentao@126.com>
2026-06-03 19:55:45 +08:00 · 2025-08-11 13:31:27 -07:00 · 2025-08-11 13:31:27 -07:00 · dd2a94fd9d
commit dd2a94fd9d
parent e526b1c091
1 changed files with 11 additions and 3 deletions
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@ -2031,10 +2031,17 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
        # Run the model.
        # Use persistent buffers for CUDA graphs.
        # when DBO is enabled, `num_tokens_after_padding`
        # represents the per-ubatch DP token count.
        dp_tokens_for_forward = num_tokens_after_padding
        if ubatch_slices is not None and num_tokens_after_padding is not None:
            dp_tokens_for_forward = num_tokens_after_padding * len(
                ubatch_slices)
        with set_forward_context(attn_metadata,
                                 vllm_config=self.vllm_config,
                                 num_tokens=num_input_tokens or 1,
-                                 num_tokens_across_dp=num_tokens_after_padding,
+                                 num_tokens_across_dp=dp_tokens_for_forward,
                                 skip_cuda_graphs=skip_cuda_graphs):
            self.maybe_setup_kv_connector(scheduler_output)
        model_output = self._run_model(
@ -2693,7 +2700,8 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
            assert num_tokens % 2 == 0
            num_tokens_per_ubatch = num_tokens // 2
            dp_size = self.vllm_config.parallel_config.data_parallel_size
-            num_tokens_across_dp = torch.tensor([num_tokens_per_ubatch] * dp_size,
+            num_tokens_across_dp = torch.tensor([num_tokens_per_ubatch] *
                                                dp_size,
                                                device="cpu",
                                                dtype=torch.int32)
            ubatch_slices = [(slice(0,