mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-16 14:27:19 +08:00
updated
Signed-off-by: Robert Shaw <robshaw@redhat.com>
This commit is contained in:
parent
e830434fe2
commit
9d762c3aa5
@ -2,6 +2,7 @@
|
|||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
import os
|
||||||
import time
|
import time
|
||||||
import pplx_kernels as pplx
|
import pplx_kernels as pplx
|
||||||
import torch
|
import torch
|
||||||
@ -210,6 +211,7 @@ class PplxPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
|
|||||||
bound_m=bound_m,
|
bound_m=bound_m,
|
||||||
)
|
)
|
||||||
end = time.perf_counter()
|
end = time.perf_counter()
|
||||||
|
if os.getenv("LOG_TIME") == "1":
|
||||||
logger.info("dispatch took %.3f ms", (end - start) * 1000)
|
logger.info("dispatch took %.3f ms", (end - start) * 1000)
|
||||||
|
|
||||||
if expert_x_scale is not None:
|
if expert_x_scale is not None:
|
||||||
@ -259,4 +261,5 @@ class PplxPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
|
|||||||
expert_y=fused_expert_output,
|
expert_y=fused_expert_output,
|
||||||
bound_m=bound_m)
|
bound_m=bound_m)
|
||||||
end = time.perf_counter()
|
end = time.perf_counter()
|
||||||
|
if os.getenv("LOG_TIME") == "1":
|
||||||
logger.info("combine took %.3f ms", (end - start) * 1000)
|
logger.info("combine took %.3f ms", (end - start) * 1000)
|
||||||
|
|||||||
@ -946,7 +946,6 @@ class DPEngineCoreProc(EngineCoreProc):
|
|||||||
|
|
||||||
# We are in a running state and so must execute a dummy pass
|
# We are in a running state and so must execute a dummy pass
|
||||||
# if the model didn't execute any ready requests.
|
# if the model didn't execute any ready requests.
|
||||||
logger.info("Executing dummy batch for wave %d.", self.current_wave)
|
|
||||||
self.execute_dummy_batch()
|
self.execute_dummy_batch()
|
||||||
|
|
||||||
# 3) All-reduce operation to determine global unfinished reqs.
|
# 3) All-reduce operation to determine global unfinished reqs.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user