mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-16 10:17:13 +08:00
hack 2
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
parent
45ea3c31a2
commit
37cf1f27f2
@ -1115,13 +1115,12 @@ def prepare_communication_buffer_for_model(model: torch.nn.Module):
|
||||
MoE all2all (DeepEP) usually allocate the communication buffer
|
||||
based on the model shape for optimal performance.
|
||||
"""
|
||||
gpus = list(range(torch.cuda.device_count()))
|
||||
orig = torch.cuda.current_device()
|
||||
for d in gpus:
|
||||
for d in range(8):
|
||||
torch.cuda.set_device(d)
|
||||
torch.zeros(1, device=f'cuda:{d}')
|
||||
torch.cuda.set_device(orig)
|
||||
print("pre-warmed all GPUs:", gpus)
|
||||
print("pre-warmed all GPUs")
|
||||
if _TP is not None:
|
||||
_TP.prepare_communication_buffer_for_model(model)
|
||||
if _PP is not None:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user