mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-22 22:50:11 +08:00
Keep memory copy blocking for elastic EP case
Signed-off-by: ilmarkov <markovilya197@gmail.com>
This commit is contained in:
parent
67ff54997d
commit
f7b8992020
@ -954,10 +954,10 @@ class EplbState:
|
||||
|
||||
target_device = model_state.physical_to_logical_map.device
|
||||
new_physical = model_state.new_physical_to_logical_map
|
||||
# In order to avoid race condition with async eplb worker,
|
||||
# we need to copy blocking in case of updated EP size.
|
||||
if model_state.physical_to_logical_map.shape[1] != new_physical.shape[1]:
|
||||
model_state.physical_to_logical_map = new_physical.to(
|
||||
target_device, non_blocking=True
|
||||
)
|
||||
model_state.physical_to_logical_map = new_physical.to(target_device)
|
||||
else:
|
||||
model_state.physical_to_logical_map[layer].copy_(
|
||||
new_physical[layer].to(target_device, non_blocking=True)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user