mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-04 09:42:17 +08:00
Refactor tests and address nits
Signed-off-by: ilmarkov <markovilya197@gmail.com>
This commit is contained in:
parent
9a41b9130e
commit
7d0ab7d4b9
@ -312,8 +312,8 @@ if __name__ == "__main__":
|
|||||||
test_basic_rebalance()
|
test_basic_rebalance()
|
||||||
|
|
||||||
|
|
||||||
def _make_phyrank_from_phy2log(phy2log: torch.Tensor) -> torch.Tensor:
|
def _make_phy_replicas_idx_from_phy2log(phy2log: torch.Tensor) -> torch.Tensor:
|
||||||
"""Create phyrank from phy2log"""
|
"""Create replicas indices mapping from phy2log"""
|
||||||
pr = torch.zeros_like(phy2log)
|
pr = torch.zeros_like(phy2log)
|
||||||
for layer in range(phy2log.shape[0]):
|
for layer in range(phy2log.shape[0]):
|
||||||
seen: dict[int, int] = {}
|
seen: dict[int, int] = {}
|
||||||
@ -328,9 +328,9 @@ def _make_phyrank_from_phy2log(phy2log: torch.Tensor) -> torch.Tensor:
|
|||||||
def _validate_intragpu_rearrangement(
|
def _validate_intragpu_rearrangement(
|
||||||
old_global_expert_indices: torch.Tensor,
|
old_global_expert_indices: torch.Tensor,
|
||||||
new_phy2log: torch.Tensor,
|
new_phy2log: torch.Tensor,
|
||||||
new_phyrank: torch.Tensor,
|
new_phy_replicas_idx: torch.Tensor,
|
||||||
post_phy2log: torch.Tensor,
|
post_phy2log: torch.Tensor,
|
||||||
post_phyrank: torch.Tensor,
|
post_phy_replicas_idx: torch.Tensor,
|
||||||
num_ranks: int,
|
num_ranks: int,
|
||||||
slots_per_gpu: int,
|
slots_per_gpu: int,
|
||||||
):
|
):
|
||||||
@ -340,9 +340,9 @@ def _validate_intragpu_rearrangement(
|
|||||||
end = start + slots_per_gpu
|
end = start + slots_per_gpu
|
||||||
old_seg = old_global_expert_indices[0, start:end]
|
old_seg = old_global_expert_indices[0, start:end]
|
||||||
new_seg = new_phy2log[0, start:end]
|
new_seg = new_phy2log[0, start:end]
|
||||||
new_rnk = new_phyrank[0, start:end]
|
new_rnk = new_phy_replicas_idx[0, start:end]
|
||||||
post_seg = post_phy2log[0, start:end]
|
post_seg = post_phy2log[0, start:end]
|
||||||
post_rnk = post_phyrank[0, start:end]
|
post_rnk = post_phy_replicas_idx[0, start:end]
|
||||||
|
|
||||||
# Pairwise equality for (expert, rank) pairs to ensure nothing is lost
|
# Pairwise equality for (expert, rank) pairs to ensure nothing is lost
|
||||||
def sorted_pairs(seg: torch.Tensor, rnk: torch.Tensor):
|
def sorted_pairs(seg: torch.Tensor, rnk: torch.Tensor):
|
||||||
@ -376,70 +376,77 @@ def _validate_intragpu_rearrangement(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_preserve_intragpu_slots_simple():
|
@pytest.mark.parametrize(
|
||||||
|
"num_ranks, slots_per_gpu, old_phy2log, new_phy2log",
|
||||||
|
[
|
||||||
|
pytest.param(
|
||||||
|
# Setup: 2 GPUs, 4 slots each, 1 layer
|
||||||
|
# Old mapping: GPU0 -> [0,1,2,3], GPU1 -> [4,5,6,7]
|
||||||
|
# New mapping shuffles within GPU0 and brings 4,5 into GPU0.
|
||||||
|
# GPU0 new -> [1,5,0,4]; GPU1 new -> [6,2,7,3]
|
||||||
|
2,
|
||||||
|
4,
|
||||||
|
torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7]]),
|
||||||
|
torch.tensor([[1, 5, 0, 4, 6, 2, 7, 3]]),
|
||||||
|
id="simple",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
# Setup: 2 GPUs, 5 slots each (total 10 physical experts), 1 layer
|
||||||
|
# Old mapping:
|
||||||
|
# GPU0 -> [0, 1, 0, 2, 3] (expert 0 duplicated)
|
||||||
|
# GPU1 -> [4, 5, 6, 1, 2]
|
||||||
|
# New mapping reorders within GPUs and moves some experts across GPUs,
|
||||||
|
# while still including duplicates:
|
||||||
|
# GPU0 new -> [0, 5, 4, 0, 1] (expert 0 duplicated, 4/5 incoming)
|
||||||
|
# GPU1 new -> [6, 2, 3, 2, 1] (expert 2 duplicated)
|
||||||
|
2,
|
||||||
|
5,
|
||||||
|
torch.tensor([[0, 1, 0, 2, 3, 4, 5, 6, 1, 2]]),
|
||||||
|
torch.tensor([[0, 5, 4, 0, 1, 6, 2, 3, 2, 1]]),
|
||||||
|
id="duplicates",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
# Setup: 3 GPUs, 4 slots each (total 12 physical experts), 1 layer
|
||||||
|
# Old mapping:
|
||||||
|
# GPU0 -> [0, 1, 2, 3]
|
||||||
|
# GPU1 -> [0, 1, 2, 3]
|
||||||
|
# GPU2 -> [0, 1, 2, 3]
|
||||||
|
# New mapping decides to use one expert on 2 GPUs and shuffles
|
||||||
|
# experts on the third GPU,
|
||||||
|
# GPU0 new -> [0, 0, 0, 0]
|
||||||
|
# GPU1 new -> [0, 0, 0, 0]
|
||||||
|
# GPU2 new -> [1, 2, 3, 0]
|
||||||
|
3,
|
||||||
|
4,
|
||||||
|
torch.tensor([[0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]]),
|
||||||
|
torch.tensor([[0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0]]),
|
||||||
|
id="skewed_expert",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_preserve_intragpu_slots(
|
||||||
|
num_ranks: int,
|
||||||
|
slots_per_gpu: int,
|
||||||
|
old_phy2log: torch.Tensor,
|
||||||
|
new_phy2log: torch.Tensor,
|
||||||
|
):
|
||||||
"""Experts that stay on a GPU keep their old slots; incoming not lost."""
|
"""Experts that stay on a GPU keep their old slots; incoming not lost."""
|
||||||
# Setup: 2 GPUs, 4 slots each, 1 layer
|
phy_replicas_idx = _make_phy_replicas_idx_from_phy2log(new_phy2log)
|
||||||
num_ranks = 2
|
|
||||||
slots_per_gpu = 4
|
|
||||||
# Old mapping: GPU0 -> [0,1,2,3], GPU1 -> [4,5,6,7]
|
|
||||||
old_global_expert_indices = torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7]])
|
|
||||||
# New mapping shuffles within GPU0 and brings 4,5 into GPU0.
|
|
||||||
# GPU0 new -> [1,5,0,4] (0 and 1 remain on GPU0 but at different slots)
|
|
||||||
# GPU1 new -> [6,2,7,3] (6 and 7 remain on GPU1, 2 and 3 move in)
|
|
||||||
phy2log = torch.tensor([[1, 5, 0, 4, 6, 2, 7, 3]])
|
|
||||||
# Derive phyrank from replica occurrence order per expert
|
|
||||||
phyrank = _make_phyrank_from_phy2log(phy2log)
|
|
||||||
|
|
||||||
post_phy2log, post_phyrank = DefaultEplbPolicy.preserve_intragpu_slots(
|
post_phy2log, post_phy_replicas_idx = DefaultEplbPolicy.preserve_intragpu_slots(
|
||||||
phy2log, phyrank, num_ranks, old_global_expert_indices
|
new_phy2log, phy_replicas_idx, num_ranks, old_phy2log
|
||||||
)
|
)
|
||||||
|
|
||||||
# Shapes preserved
|
# Shapes preserved
|
||||||
assert post_phy2log.shape == phy2log.shape
|
assert post_phy2log.shape == new_phy2log.shape
|
||||||
assert post_phyrank.shape == phyrank.shape
|
assert post_phy_replicas_idx.shape == phy_replicas_idx.shape
|
||||||
|
|
||||||
_validate_intragpu_rearrangement(
|
_validate_intragpu_rearrangement(
|
||||||
old_global_expert_indices,
|
old_phy2log,
|
||||||
phy2log,
|
new_phy2log,
|
||||||
phyrank,
|
phy_replicas_idx,
|
||||||
post_phy2log,
|
post_phy2log,
|
||||||
post_phyrank,
|
post_phy_replicas_idx,
|
||||||
num_ranks,
|
|
||||||
slots_per_gpu,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_preserve_intragpu_slots_with_duplicates():
|
|
||||||
"""Test preserve intragpu slots with duplicates"""
|
|
||||||
# Setup: 2 GPUs, 5 slots each (total 10 physical experts), 1 layer
|
|
||||||
num_ranks = 2
|
|
||||||
slots_per_gpu = 5
|
|
||||||
# Old mapping:
|
|
||||||
# GPU0 -> [0, 1, 0, 2, 3] (expert 0 duplicated)
|
|
||||||
# GPU1 -> [4, 5, 6, 1, 2]
|
|
||||||
old_global_expert_indices = torch.tensor([[0, 1, 0, 2, 3, 4, 5, 6, 1, 2]])
|
|
||||||
# New mapping reorders within GPUs and moves some experts across GPUs,
|
|
||||||
# while still including duplicates:
|
|
||||||
# GPU0 new -> [0, 5, 4, 0, 1] (expert 0 duplicated, 4/5 incoming)
|
|
||||||
# GPU1 new -> [6, 2, 3, 1, 2] (expert 2 duplicated)
|
|
||||||
phy2log = torch.tensor([[0, 5, 4, 0, 1, 6, 2, 3, 1, 2]])
|
|
||||||
# Derive ranks so duplicates have ranks [0,1,...] by occurrence
|
|
||||||
phyrank = _make_phyrank_from_phy2log(phy2log)
|
|
||||||
|
|
||||||
post_phy2log, post_phyrank = DefaultEplbPolicy.preserve_intragpu_slots(
|
|
||||||
phy2log, phyrank, num_ranks, old_global_expert_indices
|
|
||||||
)
|
|
||||||
|
|
||||||
# Shapes preserved
|
|
||||||
assert post_phy2log.shape == phy2log.shape
|
|
||||||
assert post_phyrank.shape == phyrank.shape
|
|
||||||
|
|
||||||
_validate_intragpu_rearrangement(
|
|
||||||
old_global_expert_indices,
|
|
||||||
phy2log,
|
|
||||||
phyrank,
|
|
||||||
post_phy2log,
|
|
||||||
post_phyrank,
|
|
||||||
num_ranks,
|
num_ranks,
|
||||||
slots_per_gpu,
|
slots_per_gpu,
|
||||||
)
|
)
|
||||||
|
|||||||
@ -271,8 +271,8 @@ class DefaultEplbPolicy(AbstractEplbPolicy):
|
|||||||
preserved_positions[layer_indices, slot_idx] = True
|
preserved_positions[layer_indices, slot_idx] = True
|
||||||
|
|
||||||
# Second pass: fill remaining slots with remaining new experts
|
# Second pass: fill remaining slots with remaining new experts
|
||||||
remaining_mask = ~used_new_indices # [L, S]
|
remaining_mask = ~used_new_indices # [layers, slots]
|
||||||
fill_mask = ~preserved_positions # [L, S]
|
fill_mask = ~preserved_positions # [layers, slots]
|
||||||
if remaining_mask.any() and fill_mask.any():
|
if remaining_mask.any() and fill_mask.any():
|
||||||
idx_base = np.tile(np.arange(slots_per_gpu), (num_layers, 1))
|
idx_base = np.tile(np.arange(slots_per_gpu), (num_layers, 1))
|
||||||
# Sentinel value for unavailable positions.
|
# Sentinel value for unavailable positions.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user