[BugFix] Fix DBO failing with TypeError: 'NoneType' object is not iterable (#29698)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
2026-03-18 07:57:07 +08:00 · 2025-11-28 23:19:01 -05:00 · 2025-11-28 23:19:01 -05:00 · e23f665d83
commit e23f665d83
parent ca1b1e7296
3 changed files with 7 additions and 7 deletions
--- a/tests/v1/distributed/test_dbo.py
+++ b/tests/v1/distributed/test_dbo.py
@ -85,5 +85,4 @@ def test_dbo_dp_ep_gsm8k(all2all_backend: str, num_gpus_available):
        assert accuracy >= MIN_ACCURACY, (
            f"DBO+DP+EP accuracy too low ({all2all_backend}): "
            f"{accuracy:.3f} < {MIN_ACCURACY:.3f} "
-            f"(correct: {results['num_correct']}/{results['num_questions']})"
        )
--- a/vllm/v1/attention/backends/utils.py
+++ b/vllm/v1/attention/backends/utils.py
@ -166,9 +166,7 @@ def _make_metadata_with_slice(
    assert start_locs[first_req] <= first_tok < start_locs[first_req + 1], (
        "Token slice start outside of first request"
    )
-    assert start_locs[last_req] <= last_tok < start_locs[last_req + 1], (
-        "Token slice end outside of last request"
-    )
+    # NOTE: last token can be outside of the last request if we have CG padding.

    # If the "middle" request has tokens in both ubatches, we have to split it.
    # If ubatch_slice is the first ubatch then we will be splitting the last
--- a/vllm/v1/worker/dp_utils.py
+++ b/vllm/v1/worker/dp_utils.py
@ -93,13 +93,16 @@ def _post_process_dp_padding(tensor: torch.Tensor, should_dp_pad: bool) -> torch

 # This just pads the second ubatch slice out to the total number of tokens
 # (num_tokens + padding) since we do `create_ubatch_slices` before applying DP padding.
-def _pad_out_ubatch_slice(ubatch_slices: UBatchSlices, num_total_tokens: int):
-    padded_second_ubatch_slice = slice(
+def _pad_out_ubatch_slice(
+    ubatch_slices: UBatchSlices, num_total_tokens: int
+) -> UBatchSlices:
+    padded_second_token_slice = slice(
        ubatch_slices[1].token_slice.start, num_total_tokens
    )
    ubatch_slices[1] = UBatchSlice(
-        padded_second_ubatch_slice, padded_second_ubatch_slice
+        ubatch_slices[1].request_slice, padded_second_token_slice
    )
+    return ubatch_slices


 def _synchronize_dp_ranks(