Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
Tyler Michael Smith 2025-07-10 18:56:08 -04:00
parent 45ea3c31a2
commit 37cf1f27f2

View File

@ -1115,13 +1115,12 @@ def prepare_communication_buffer_for_model(model: torch.nn.Module):
MoE all2all (DeepEP) usually allocate the communication buffer MoE all2all (DeepEP) usually allocate the communication buffer
based on the model shape for optimal performance. based on the model shape for optimal performance.
""" """
gpus = list(range(torch.cuda.device_count()))
orig = torch.cuda.current_device() orig = torch.cuda.current_device()
for d in gpus: for d in range(8):
torch.cuda.set_device(d) torch.cuda.set_device(d)
torch.zeros(1, device=f'cuda:{d}') torch.zeros(1, device=f'cuda:{d}')
torch.cuda.set_device(orig) torch.cuda.set_device(orig)
print("pre-warmed all GPUs:", gpus) print("pre-warmed all GPUs")
if _TP is not None: if _TP is not None:
_TP.prepare_communication_buffer_for_model(model) _TP.prepare_communication_buffer_for_model(model)
if _PP is not None: if _PP is not None: