mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-02 10:04:28 +08:00
balanced_packing into numpy
Signed-off-by: ilmarkov <markovilya197@gmail.com>
This commit is contained in:
parent
389f86e0c5
commit
dcf4783967
@ -40,37 +40,36 @@ class DefaultEplbPolicy(AbstractEplbPolicy):
|
|||||||
groups_per_pack = num_groups // num_packs
|
groups_per_pack = num_groups // num_packs
|
||||||
|
|
||||||
if groups_per_pack == 1:
|
if groups_per_pack == 1:
|
||||||
pack_index_np = np.tile(
|
pack_index = np.tile(np.arange(num_groups, dtype=np.int64), (num_layers, 1))
|
||||||
np.arange(num_groups, dtype=np.int64), (num_layers, 1)
|
rank_in_pack = np.zeros_like(pack_index, dtype=np.int64)
|
||||||
)
|
return pack_index, rank_in_pack
|
||||||
rank_in_pack_np = np.zeros_like(pack_index_np, dtype=np.int64)
|
|
||||||
return pack_index_np, rank_in_pack_np
|
|
||||||
|
|
||||||
# Sort and get indices in decending order
|
# Sort and get indices in decending order
|
||||||
indices = np.argsort(-weight, axis=-1)
|
indices = np.argsort(-weight, axis=-1)
|
||||||
|
|
||||||
pack_index_np = np.full((num_layers, num_groups), -1, dtype=np.int64)
|
pack_index = np.full((num_layers, num_groups), -1, dtype=np.int64)
|
||||||
rank_in_pack_np = np.full((num_layers, num_groups), -1, dtype=np.int64)
|
rank_in_pack = np.full((num_layers, num_groups), -1, dtype=np.int64)
|
||||||
|
|
||||||
|
pack_weights = np.zeros((num_layers, num_packs), dtype=np.float64)
|
||||||
|
pack_items = np.zeros((num_layers, num_packs), dtype=np.int64)
|
||||||
|
|
||||||
# Run the packing algorithm
|
# Run the packing algorithm
|
||||||
for i in range(num_layers):
|
for layer_idx in range(num_layers):
|
||||||
pack_weights = [0.0] * num_packs
|
weights_row = pack_weights[layer_idx]
|
||||||
pack_items = [0] * num_packs
|
items_row = pack_items[layer_idx]
|
||||||
|
|
||||||
for group in indices[i]:
|
for group in indices[layer_idx]:
|
||||||
# Find a pack with capacity that has the lowest weight
|
# Select the lightest pack that still has capacity.
|
||||||
pack = min(
|
available = items_row < groups_per_pack
|
||||||
(j for j in range(num_packs) if pack_items[j] < groups_per_pack),
|
assert np.any(available)
|
||||||
key=pack_weights.__getitem__,
|
pack = int(np.argmin(np.where(available, weights_row, np.inf)))
|
||||||
)
|
|
||||||
|
|
||||||
assert pack_items[pack] < groups_per_pack
|
pack_index[layer_idx, group] = pack
|
||||||
pack_index_np[i, group] = pack
|
rank_in_pack[layer_idx, group] = items_row[pack]
|
||||||
rank_in_pack_np[i, group] = pack_items[pack]
|
weights_row[pack] += weight[layer_idx, group]
|
||||||
pack_weights[pack] += weight[i, group]
|
items_row[pack] += 1
|
||||||
pack_items[pack] += 1
|
|
||||||
|
|
||||||
return pack_index_np, rank_in_pack_np
|
return pack_index, rank_in_pack
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def replicate_experts(
|
def replicate_experts(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user