mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-06 03:02:15 +08:00
Fix pre-commit
Signed-off-by: ilmarkov <markovilya197@gmail.com>
This commit is contained in:
parent
599648b6b7
commit
b462872804
@ -16,6 +16,7 @@ class AbstractEplbPolicy(ABC):
|
|||||||
num_groups: int,
|
num_groups: int,
|
||||||
num_nodes: int,
|
num_nodes: int,
|
||||||
num_ranks: int,
|
num_ranks: int,
|
||||||
|
old_global_expert_indices: torch.Tensor | None = None,
|
||||||
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
||||||
"""
|
"""
|
||||||
Entry point for expert-parallelism load balancer.
|
Entry point for expert-parallelism load balancer.
|
||||||
@ -28,7 +29,9 @@ class AbstractEplbPolicy(ABC):
|
|||||||
num_groups: number of expert groups
|
num_groups: number of expert groups
|
||||||
num_nodes: number of server nodes
|
num_nodes: number of server nodes
|
||||||
num_ranks: number of ranks, must be a multiple of `num_nodes`
|
num_ranks: number of ranks, must be a multiple of `num_nodes`
|
||||||
|
old_global_expert_indices: [layers, num_logical_experts], the old global
|
||||||
|
expert indices. Used to avoid unnecessary weight copying
|
||||||
|
for experts moving within one rank.
|
||||||
Returns:
|
Returns:
|
||||||
physical_to_logical_map: [layers, num_replicas], the expert
|
physical_to_logical_map: [layers, num_replicas], the expert
|
||||||
index of each replica
|
index of each replica
|
||||||
|
|||||||
@ -328,7 +328,9 @@ class DefaultEplbPolicy(AbstractEplbPolicy):
|
|||||||
num_nodes: number of server nodes, where the intra-node network
|
num_nodes: number of server nodes, where the intra-node network
|
||||||
(e.g, NVLink) is faster
|
(e.g, NVLink) is faster
|
||||||
num_ranks: number of ranks, must be a multiple of `num_nodes`
|
num_ranks: number of ranks, must be a multiple of `num_nodes`
|
||||||
|
old_global_expert_indices: [layers, num_logical_experts], the old global
|
||||||
|
expert indices. Used to avoid unnecessary weight copying
|
||||||
|
for experts moving within one rank.
|
||||||
Returns:
|
Returns:
|
||||||
phy2log: [layers, num_replicas], the expert
|
phy2log: [layers, num_replicas], the expert
|
||||||
index of each replica
|
index of each replica
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user