From 0d37450eb7e38ac82df35d4e0f21d4254435049d Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Thu, 9 Oct 2025 10:13:56 -0700 Subject: [PATCH] [BUGFIX] Add cu_tokens_across_sp to DPMetadata (#26457) Signed-off-by: Sage Moore --- vllm/forward_context.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/vllm/forward_context.py b/vllm/forward_context.py index a6a1e36bfe953..09da1398b0309 100644 --- a/vllm/forward_context.py +++ b/vllm/forward_context.py @@ -161,6 +161,17 @@ class DPMetadata: assert self.local_sizes is not None return self.local_sizes + # Get the cumulative tokens across sequence parallel ranks. + # In this case the input to the MoEs will be distributed w.r.t both + # DP and TP rank. + # When sp_size==1, this is just the cummulative num tokens across DP. + def cu_tokens_across_sp(self, sp_size: int) -> torch.Tensor: + num_tokens_across_sp_cpu = ( + self.num_tokens_across_dp_cpu - 1 + sp_size + ) // sp_size + num_tokens_across_sp_cpu = num_tokens_across_sp_cpu.repeat_interleave(sp_size) + return torch.cumsum(num_tokens_across_sp_cpu, dim=0) + @dataclass class ForwardContext: