Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
Tyler Michael Smith 2025-07-10 18:56:08 -04:00
parent 45ea3c31a2
commit 37cf1f27f2

View File

@ -1115,13 +1115,12 @@ def prepare_communication_buffer_for_model(model: torch.nn.Module):
MoE all2all (DeepEP) usually allocate the communication buffer
based on the model shape for optimal performance.
"""
gpus = list(range(torch.cuda.device_count()))
orig = torch.cuda.current_device()
for d in gpus:
for d in range(8):
torch.cuda.set_device(d)
torch.zeros(1, device=f'cuda:{d}')
torch.cuda.set_device(orig)
print("pre-warmed all GPUs:", gpus)
print("pre-warmed all GPUs")
if _TP is not None:
_TP.prepare_communication_buffer_for_model(model)
if _PP is not None: