Refactor tests and address nits

Signed-off-by: ilmarkov <markovilya197@gmail.com>
2026-06-04 09:42:17 +08:00 · 2025-12-15 10:46:20 +00:00 · 2025-12-15 10:46:20 +00:00 · 7d0ab7d4b9
commit 7d0ab7d4b9
parent 9a41b9130e
2 changed files with 71 additions and 64 deletions
--- a/tests/distributed/test_eplb_algo.py
+++ b/tests/distributed/test_eplb_algo.py
@ -312,8 +312,8 @@ if __name__ == "__main__":
    test_basic_rebalance()
-def _make_phyrank_from_phy2log(phy2log: torch.Tensor) -> torch.Tensor:
+def _make_phy_replicas_idx_from_phy2log(phy2log: torch.Tensor) -> torch.Tensor:
-    """Create phyrank from phy2log"""
+    """Create replicas indices mapping from phy2log"""
    pr = torch.zeros_like(phy2log)
    for layer in range(phy2log.shape[0]):
        seen: dict[int, int] = {}
@ -328,9 +328,9 @@ def _make_phyrank_from_phy2log(phy2log: torch.Tensor) -> torch.Tensor:
 def _validate_intragpu_rearrangement(
    old_global_expert_indices: torch.Tensor,
    new_phy2log: torch.Tensor,
-    new_phyrank: torch.Tensor,
+    new_phy_replicas_idx: torch.Tensor,
    post_phy2log: torch.Tensor,
-    post_phyrank: torch.Tensor,
+    post_phy_replicas_idx: torch.Tensor,
    num_ranks: int,
    slots_per_gpu: int,
 ):
@ -340,9 +340,9 @@ def _validate_intragpu_rearrangement(
        end = start + slots_per_gpu
        old_seg = old_global_expert_indices[0, start:end]
        new_seg = new_phy2log[0, start:end]
-        new_rnk = new_phyrank[0, start:end]
+        new_rnk = new_phy_replicas_idx[0, start:end]
        post_seg = post_phy2log[0, start:end]
-        post_rnk = post_phyrank[0, start:end]
+        post_rnk = post_phy_replicas_idx[0, start:end]
        # Pairwise equality for (expert, rank) pairs to ensure nothing is lost
        def sorted_pairs(seg: torch.Tensor, rnk: torch.Tensor):
@ -376,70 +376,77 @@ def _validate_intragpu_rearrangement(
            )
-def test_preserve_intragpu_slots_simple():
+@pytest.mark.parametrize(
    "num_ranks, slots_per_gpu, old_phy2log, new_phy2log",
    [
        pytest.param(
            # Setup: 2 GPUs, 4 slots each, 1 layer
            # Old mapping: GPU0 -> [0,1,2,3], GPU1 -> [4,5,6,7]
            # New mapping shuffles within GPU0 and brings 4,5 into GPU0.
            # GPU0 new -> [1,5,0,4]; GPU1 new -> [6,2,7,3]
            2,
            4,
            torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7]]),
            torch.tensor([[1, 5, 0, 4, 6, 2, 7, 3]]),
            id="simple",
        ),
        pytest.param(
            # Setup: 2 GPUs, 5 slots each (total 10 physical experts), 1 layer
            # Old mapping:
            #   GPU0 -> [0, 1, 0, 2, 3]  (expert 0 duplicated)
            #   GPU1 -> [4, 5, 6, 1, 2]
            # New mapping reorders within GPUs and moves some experts across GPUs,
            # while still including duplicates:
            #   GPU0 new -> [0, 5, 4, 0, 1]  (expert 0 duplicated, 4/5 incoming)
            #   GPU1 new -> [6, 2, 3, 2, 1]  (expert 2 duplicated)
            2,
            5,
            torch.tensor([[0, 1, 0, 2, 3, 4, 5, 6, 1, 2]]),
            torch.tensor([[0, 5, 4, 0, 1, 6, 2, 3, 2, 1]]),
            id="duplicates",
        ),
        pytest.param(
            # Setup: 3 GPUs, 4 slots each (total 12 physical experts), 1 layer
            # Old mapping:
            #   GPU0 -> [0, 1, 2, 3]
            #   GPU1 -> [0, 1, 2, 3]
            #   GPU2 -> [0, 1, 2, 3]
            # New mapping decides to use one expert on 2 GPUs and shuffles
            # experts on the third GPU,
            #   GPU0 new -> [0, 0, 0, 0]
            #   GPU1 new -> [0, 0, 0, 0]
            #   GPU2 new -> [1, 2, 3, 0]
            3,
            4,
            torch.tensor([[0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]]),
            torch.tensor([[0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0]]),
            id="skewed_expert",
        ),
    ],
 )
 def test_preserve_intragpu_slots(
    num_ranks: int,
    slots_per_gpu: int,
    old_phy2log: torch.Tensor,
    new_phy2log: torch.Tensor,
 ):
    """Experts that stay on a GPU keep their old slots; incoming not lost."""
-    # Setup: 2 GPUs, 4 slots each, 1 layer
+    phy_replicas_idx = _make_phy_replicas_idx_from_phy2log(new_phy2log)
    num_ranks = 2
    slots_per_gpu = 4
    # Old mapping: GPU0 -> [0,1,2,3], GPU1 -> [4,5,6,7]
    old_global_expert_indices = torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7]])
    # New mapping shuffles within GPU0 and brings 4,5 into GPU0.
    # GPU0 new -> [1,5,0,4] (0 and 1 remain on GPU0 but at different slots)
    # GPU1 new -> [6,2,7,3] (6 and 7 remain on GPU1, 2 and 3 move in)
    phy2log = torch.tensor([[1, 5, 0, 4, 6, 2, 7, 3]])
    # Derive phyrank from replica occurrence order per expert
    phyrank = _make_phyrank_from_phy2log(phy2log)
-    post_phy2log, post_phyrank = DefaultEplbPolicy.preserve_intragpu_slots(
+    post_phy2log, post_phy_replicas_idx = DefaultEplbPolicy.preserve_intragpu_slots(
-        phy2log, phyrank, num_ranks, old_global_expert_indices
+        new_phy2log, phy_replicas_idx, num_ranks, old_phy2log
    )
    # Shapes preserved
-    assert post_phy2log.shape == phy2log.shape
+    assert post_phy2log.shape == new_phy2log.shape
-    assert post_phyrank.shape == phyrank.shape
+    assert post_phy_replicas_idx.shape == phy_replicas_idx.shape
    _validate_intragpu_rearrangement(
-        old_global_expert_indices,
+        old_phy2log,
-        phy2log,
+        new_phy2log,
-        phyrank,
+        phy_replicas_idx,
        post_phy2log,
-        post_phyrank,
+        post_phy_replicas_idx,
        num_ranks,
        slots_per_gpu,
    )
 def test_preserve_intragpu_slots_with_duplicates():
    """Test preserve intragpu slots with duplicates"""
    # Setup: 2 GPUs, 5 slots each (total 10 physical experts), 1 layer
    num_ranks = 2
    slots_per_gpu = 5
    # Old mapping:
    #   GPU0 -> [0, 1, 0, 2, 3]  (expert 0 duplicated)
    #   GPU1 -> [4, 5, 6, 1, 2]
    old_global_expert_indices = torch.tensor([[0, 1, 0, 2, 3, 4, 5, 6, 1, 2]])
    # New mapping reorders within GPUs and moves some experts across GPUs,
    # while still including duplicates:
    #   GPU0 new -> [0, 5, 4, 0, 1]  (expert 0 duplicated, 4/5 incoming)
    #   GPU1 new -> [6, 2, 3, 1, 2]  (expert 2 duplicated)
    phy2log = torch.tensor([[0, 5, 4, 0, 1, 6, 2, 3, 1, 2]])
    # Derive ranks so duplicates have ranks [0,1,...] by occurrence
    phyrank = _make_phyrank_from_phy2log(phy2log)
    post_phy2log, post_phyrank = DefaultEplbPolicy.preserve_intragpu_slots(
        phy2log, phyrank, num_ranks, old_global_expert_indices
    )
    # Shapes preserved
    assert post_phy2log.shape == phy2log.shape
    assert post_phyrank.shape == phyrank.shape
    _validate_intragpu_rearrangement(
        old_global_expert_indices,
        phy2log,
        phyrank,
        post_phy2log,
        post_phyrank,
        num_ranks,
        slots_per_gpu,
    )
--- a/vllm/distributed/eplb/policy/default.py
+++ b/vllm/distributed/eplb/policy/default.py
@ -271,8 +271,8 @@ class DefaultEplbPolicy(AbstractEplbPolicy):
                    preserved_positions[layer_indices, slot_idx] = True
            # Second pass: fill remaining slots with remaining new experts
-            remaining_mask = ~used_new_indices  # [L, S]
+            remaining_mask = ~used_new_indices  # [layers, slots]
-            fill_mask = ~preserved_positions  # [L, S]
+            fill_mask = ~preserved_positions  # [layers, slots]
            if remaining_mask.any() and fill_mask.any():
                idx_base = np.tile(np.arange(slots_per_gpu), (num_layers, 1))
                # Sentinel value for unavailable positions.