From f7b899202088418ad3bc0b04701662acdee8c5f5 Mon Sep 17 00:00:00 2001
From: ilmarkov <markovilya197@gmail.com>
Date: Thu, 18 Dec 2025 15:10:43 +0000
Subject: [PATCH] Keep memory copy blocking for elastic EP case

Signed-off-by: ilmarkov <markovilya197@gmail.com>
---
 vllm/distributed/eplb/eplb_state.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/distributed/eplb/eplb_state.py b/vllm/distributed/eplb/eplb_state.py
index 4e3fdad8bb9cc..d1f0e181c5354 100644
--- a/vllm/distributed/eplb/eplb_state.py
+++ b/vllm/distributed/eplb/eplb_state.py
@@ -954,10 +954,10 @@ class EplbState:
 
         target_device = model_state.physical_to_logical_map.device
         new_physical = model_state.new_physical_to_logical_map
+        # In order to avoid race condition with async eplb worker,
+        # we need to copy blocking in case of updated EP size.
         if model_state.physical_to_logical_map.shape[1] != new_physical.shape[1]:
-            model_state.physical_to_logical_map = new_physical.to(
-                target_device, non_blocking=True
-            )
+            model_state.physical_to_logical_map = new_physical.to(target_device)
         else:
             model_state.physical_to_logical_map[layer].copy_(
                 new_physical[layer].to(target_device, non_blocking=True)