From b4628728043a63f3574a309ed36f146c4cff12b3 Mon Sep 17 00:00:00 2001 From: ilmarkov Date: Tue, 9 Dec 2025 12:21:31 +0000 Subject: [PATCH] Fix pre-commit Signed-off-by: ilmarkov --- vllm/distributed/eplb/policy/abstract.py | 5 ++++- vllm/distributed/eplb/policy/default.py | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/vllm/distributed/eplb/policy/abstract.py b/vllm/distributed/eplb/policy/abstract.py index 40ed621c84892..f4435f11bd57b 100644 --- a/vllm/distributed/eplb/policy/abstract.py +++ b/vllm/distributed/eplb/policy/abstract.py @@ -16,6 +16,7 @@ class AbstractEplbPolicy(ABC): num_groups: int, num_nodes: int, num_ranks: int, + old_global_expert_indices: torch.Tensor | None = None, ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Entry point for expert-parallelism load balancer. @@ -28,7 +29,9 @@ class AbstractEplbPolicy(ABC): num_groups: number of expert groups num_nodes: number of server nodes num_ranks: number of ranks, must be a multiple of `num_nodes` - + old_global_expert_indices: [layers, num_logical_experts], the old global + expert indices. Used to avoid unnecessary weight copying + for experts moving within one rank. Returns: physical_to_logical_map: [layers, num_replicas], the expert index of each replica diff --git a/vllm/distributed/eplb/policy/default.py b/vllm/distributed/eplb/policy/default.py index 82fd1b94acaea..970a1614933ee 100644 --- a/vllm/distributed/eplb/policy/default.py +++ b/vllm/distributed/eplb/policy/default.py @@ -328,7 +328,9 @@ class DefaultEplbPolicy(AbstractEplbPolicy): num_nodes: number of server nodes, where the intra-node network (e.g, NVLink) is faster num_ranks: number of ranks, must be a multiple of `num_nodes` - + old_global_expert_indices: [layers, num_logical_experts], the old global + expert indices. Used to avoid unnecessary weight copying + for experts moving within one rank. Returns: phy2log: [layers, num_replicas], the expert index of each replica