From f7b899202088418ad3bc0b04701662acdee8c5f5 Mon Sep 17 00:00:00 2001 From: ilmarkov Date: Thu, 18 Dec 2025 15:10:43 +0000 Subject: [PATCH] Keep memory copy blocking for elastic EP case Signed-off-by: ilmarkov --- vllm/distributed/eplb/eplb_state.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/distributed/eplb/eplb_state.py b/vllm/distributed/eplb/eplb_state.py index 4e3fdad8bb9cc..d1f0e181c5354 100644 --- a/vllm/distributed/eplb/eplb_state.py +++ b/vllm/distributed/eplb/eplb_state.py @@ -954,10 +954,10 @@ class EplbState: target_device = model_state.physical_to_logical_map.device new_physical = model_state.new_physical_to_logical_map + # In order to avoid race condition with async eplb worker, + # we need to copy blocking in case of updated EP size. if model_state.physical_to_logical_map.shape[1] != new_physical.shape[1]: - model_state.physical_to_logical_map = new_physical.to( - target_device, non_blocking=True - ) + model_state.physical_to_logical_map = new_physical.to(target_device) else: model_state.physical_to_logical_map[layer].copy_( new_physical[layer].to(target_device, non_blocking=True)