diff --git a/vllm/distributed/eplb/rebalance_execute.py b/vllm/distributed/eplb/rebalance_execute.py index e37292362582d..4c0528bbd22bc 100644 --- a/vllm/distributed/eplb/rebalance_execute.py +++ b/vllm/distributed/eplb/rebalance_execute.py @@ -233,10 +233,7 @@ def move_to_buffer( recv_dst_rows[:recv_count] = dst_rows recv_primary_mask[dst_rows] = True - eligible_local_buffer_mask = np.logical_and( - np.logical_and(~is_unchanged, is_received_locally), - new_local_expert_ids != -1, - ) + eligible_local_buffer_mask = np.logical_and(~is_unchanged, is_received_locally) # 1. Local moves into tmp buffers if bool(eligible_local_buffer_mask.any()) and send_count > 0: @@ -396,7 +393,7 @@ def move_from_buffer( dest_indices = np.nonzero(dest_mask_np)[0].tolist() for dst in dest_indices: for w, b in zip(expert_weights, expert_weights_buffers): - w[dst].copy_(b[dst]) + w[dst].copy_(b[dst], non_blocking=True) # Duplicate remote received rows to non-primary duplicate dsts if recv_count == 0: @@ -433,7 +430,7 @@ def move_from_buffer( for dst, src in zip(matched_dst_rows.tolist(), matched_src_rows.tolist()): for w in expert_weights: - w[dst].copy_(w[src]) + w[dst].copy_(w[src], non_blocking=True) async def transfer_layer(