From 9d762c3aa579aa01e56aac397e10994f0c591188 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Tue, 15 Jul 2025 02:09:43 +0000 Subject: [PATCH] updated Signed-off-by: Robert Shaw --- .../layers/fused_moe/pplx_prepare_finalize.py | 7 +++++-- vllm/v1/engine/core.py | 1 - 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py index f890ecae040e9..b074d1380ab43 100644 --- a/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from typing import Optional +import os import time import pplx_kernels as pplx import torch @@ -210,7 +211,8 @@ class PplxPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize): bound_m=bound_m, ) end = time.perf_counter() - logger.info("dispatch took %.3f ms", (end - start) * 1000) + if os.getenv("LOG_TIME") == "1": + logger.info("dispatch took %.3f ms", (end - start) * 1000) if expert_x_scale is not None: expert_x_scale = expert_x_scale[:, :, :orig_a_scale_block_shape] @@ -259,4 +261,5 @@ class PplxPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize): expert_y=fused_expert_output, bound_m=bound_m) end = time.perf_counter() - logger.info("combine took %.3f ms", (end - start) * 1000) + if os.getenv("LOG_TIME") == "1": + logger.info("combine took %.3f ms", (end - start) * 1000) diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 0e6e35690d644..e2fdf6f8a11c7 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -946,7 +946,6 @@ class DPEngineCoreProc(EngineCoreProc): # We are in a running state and so must execute a dummy pass # if the model didn't execute any ready requests. - logger.info("Executing dummy batch for wave %d.", self.current_wave) self.execute_dummy_batch() # 3) All-reduce operation to determine global unfinished reqs.