mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-31 21:57:43 +08:00
turn yields on for pplx
Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
parent
18f7bfb501
commit
ce3ef95c11
@ -134,14 +134,14 @@ class PplxPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
|
|||||||
do_recv=not send,
|
do_recv=not send,
|
||||||
)
|
)
|
||||||
|
|
||||||
# yield_and_switch_from_compute_to_comm_impl(schedule="default")
|
yield_and_switch_from_compute_to_comm_impl(schedule="default")
|
||||||
dispatch(True) # Send
|
dispatch(True) # Send
|
||||||
# torch.cuda.synchronize()
|
# torch.cuda.synchronize()
|
||||||
# print(f"{ubatch_id} AFTER SEND SYNC", flush=True)
|
# print(f"{ubatch_id} AFTER SEND SYNC", flush=True)
|
||||||
dispatch(False) # Recv
|
dispatch(False) # Recv
|
||||||
# torch.cuda.synchronize()
|
# torch.cuda.synchronize()
|
||||||
# print(f"{ubatch_id} AFTER RECV SYNC", flush=True)
|
# print(f"{ubatch_id} AFTER RECV SYNC", flush=True)
|
||||||
# yield_and_switch_from_comm_to_compute_impl(schedule="default")
|
yield_and_switch_from_comm_to_compute_impl(schedule="default")
|
||||||
# torch.cuda.synchronize()
|
# torch.cuda.synchronize()
|
||||||
if expert_x_scale is not None:
|
if expert_x_scale is not None:
|
||||||
expert_x_scale = expert_x_scale[:, :, 0:1]
|
expert_x_scale = expert_x_scale[:, :, 0:1]
|
||||||
@ -185,11 +185,11 @@ class PplxPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
|
|||||||
do_recv=not send,
|
do_recv=not send,
|
||||||
)
|
)
|
||||||
|
|
||||||
# yield_and_switch_from_compute_to_comm_impl(schedule="default")
|
yield_and_switch_from_compute_to_comm_impl(schedule="default")
|
||||||
combine(True)
|
combine(True)
|
||||||
# torch.cuda.synchronize()
|
# torch.cuda.synchronize()
|
||||||
# print(f"{ubatch_id} AFTER COMBINE SEND SYNC", flush=True)
|
# print(f"{ubatch_id} AFTER COMBINE SEND SYNC", flush=True)
|
||||||
combine(False)
|
combine(False)
|
||||||
# print(f"{ubatch_id} AFTER COMBINE RECV SYNC", flush=True)
|
# print(f"{ubatch_id} AFTER COMBINE RECV SYNC", flush=True)
|
||||||
# yield_and_switch_from_comm_to_compute_impl(schedule="default")
|
yield_and_switch_from_comm_to_compute_impl(schedule="default")
|
||||||
# torch.cuda.synchronize()
|
# torch.cuda.synchronize()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user