From 0d37450eb7e38ac82df35d4e0f21d4254435049d Mon Sep 17 00:00:00 2001
From: Sage Moore <sage@neuralmagic.com>
Date: Thu, 9 Oct 2025 10:13:56 -0700
Subject: [PATCH] [BUGFIX] Add cu_tokens_across_sp to DPMetadata (#26457)

Signed-off-by: Sage Moore <sage@neuralmagic.com>
---
 vllm/forward_context.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/vllm/forward_context.py b/vllm/forward_context.py
index a6a1e36bfe953..09da1398b0309 100644
--- a/vllm/forward_context.py
+++ b/vllm/forward_context.py
@@ -161,6 +161,17 @@ class DPMetadata:
         assert self.local_sizes is not None
         return self.local_sizes
 
+    # Get the cumulative tokens across sequence parallel ranks.
+    # In this case the input to the MoEs will be distributed w.r.t both
+    # DP and TP rank.
+    # When sp_size==1, this is just the cummulative num tokens across DP.
+    def cu_tokens_across_sp(self, sp_size: int) -> torch.Tensor:
+        num_tokens_across_sp_cpu = (
+            self.num_tokens_across_dp_cpu - 1 + sp_size
+        ) // sp_size
+        num_tokens_across_sp_cpu = num_tokens_across_sp_cpu.repeat_interleave(sp_size)
+        return torch.cumsum(num_tokens_across_sp_cpu, dim=0)
+
 
 @dataclass
 class ForwardContext: