diff --git a/vllm/distributed/eplb/async_worker.py b/vllm/distributed/eplb/async_worker.py index 2648ada89abc9..b2a58ded19edb 100644 --- a/vllm/distributed/eplb/async_worker.py +++ b/vllm/distributed/eplb/async_worker.py @@ -109,12 +109,12 @@ async def transfer_run_periodically( for model_state in state.model_states.values(): if not model_state.is_async_enabled: continue - if not model_state.new_indices_computed: - run_rebalance_experts(model_state, state) - logger.info( - "Async worker computed new indices for model %s", - model_state.model_name, - ) + # Rebalance experts is done once, only when the async worker wakes up. + run_rebalance_experts(model_state, state) + logger.info( + "Async worker computed new indices for model %s", + model_state.model_name, + ) current_num_layers = model_state.model.num_moe_layers while ( diff --git a/vllm/distributed/eplb/eplb_state.py b/vllm/distributed/eplb/eplb_state.py index ef5e58706722c..43c0bbdd72e44 100644 --- a/vllm/distributed/eplb/eplb_state.py +++ b/vllm/distributed/eplb/eplb_state.py @@ -197,10 +197,6 @@ class EplbModelState: """ Whether the async EPLB needs to poll peers for buffer readiness. """ - new_indices_computed: bool - """ - The flag indicates whether the new indices have been computed. - """ eplb_stats: EplbStats | None """ EPLB stats for the model. @@ -547,7 +543,6 @@ class EplbState: layer_to_transfer=0, rebalanced=False, pending_global_ready_check=False, - new_indices_computed=False, eplb_stats=None, is_unchanged=np.array([]), is_received_locally=np.array([]), @@ -928,7 +923,6 @@ class EplbState: eplb_model_state.rebalanced = True eplb_model_state.layer_to_transfer = 0 eplb_model_state.pending_global_ready_check = True - eplb_model_state.new_indices_computed = False # Signal async thread to start transferring layers if self.is_async and (not is_profile): self.rearrange_event.set()