[Logs] Optimize startup logs 4 (#29903)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Wentao Ye 2025-12-13 16:12:53 -05:00 committed by GitHub
parent 7c16f3fbcc
commit 6e78ed6ba7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 20 additions and 15 deletions

View File

@ -885,12 +885,11 @@ def get_moe_configs(
# If no optimized configuration is available, we will use the default
# configuration
logger.warning(
(
"Using default MoE config. Performance might be sub-optimal! "
"Config file not found at %s"
),
config_file_paths,
logger.warning_once(
"Using default MoE config. Performance might be sub-optimal! "
"Config file not found at %s",
", ".join(config_file_paths),
scope="local",
)
return None

View File

@ -369,7 +369,9 @@ class FusedMoE(CustomOp):
# aux_stream() returns None on non-cuda-alike platforms.
self.shared_experts_stream = aux_stream()
if self.shared_experts_stream is not None:
logger.info_once("Enabled separate cuda stream for MoE shared_experts")
logger.info_once(
"Enabled separate cuda stream for MoE shared_experts", scope="local"
)
if params_dtype is None:
params_dtype = torch.get_default_dtype()

View File

@ -409,10 +409,11 @@ class CudaPlatformBase(Platform):
)
selected_index = sorted_indices[0]
selected_backend = valid_backends_priorities[selected_index][0]
logger.info(
logger.info_once(
"Using %s attention backend out of potential backends: %s",
selected_backend.name,
[b[0].name for b in valid_backends_priorities],
tuple(b[0].name for b in valid_backends_priorities),
scope="local",
)
return selected_backend.get_path()

View File

@ -61,7 +61,7 @@ class WorkerProfiler(ABC):
"""Call _stop with error handling but no safeguards."""
try:
self._stop()
logger.info("Profiler stopped successfully.")
logger.info_once("Profiler stopped successfully.", scope="local")
except Exception as e:
logger.warning("Failed to stop profiler: %s", e)
self._running = False # Always mark as not running, assume stop worked
@ -91,7 +91,7 @@ class WorkerProfiler(ABC):
and self._delay_iters > 0
and self._active_iteration_count == self._delay_iters
):
logger.info("Starting profiler after delay...")
logger.info_once("Starting profiler after delay...", scope="local")
self._call_start()
if self._running:
@ -105,7 +105,9 @@ class WorkerProfiler(ABC):
# Automatically stop the profiler after max iters
# will be marked as not running, but leave as active so that stop
# can clean up properly
logger.info("Max profiling iterations reached. Stopping profiler...")
logger.info_once(
"Max profiling iterations reached. Stopping profiler...", scope="local"
)
self._call_stop()
return
@ -125,7 +127,7 @@ class WorkerProfiler(ABC):
def shutdown(self) -> None:
"""Ensure profiler is stopped when shutting down."""
logger.info_once("Shutting down profiler")
logger.info_once("Shutting down profiler", scope="local")
if self._running:
self.stop()
@ -156,9 +158,10 @@ class TorchProfilerWrapper(WorkerProfiler):
self.profiler_config = profiler_config
torch_profiler_trace_dir = profiler_config.torch_profiler_dir
if local_rank in (None, 0):
logger.info(
logger.info_once(
"Torch profiling enabled. Traces will be saved to: %s",
torch_profiler_trace_dir,
scope="local",
)
logger.debug(
"Profiler config: record_shapes=%s,"

View File

@ -706,7 +706,7 @@ class WorkerProc:
death_pipe.recv()
except EOFError:
# Parent process has exited, terminate this worker
logger.info("Parent process exited, terminating worker")
logger.info_once("Parent process exited, terminating worker")
# Send signal to self to trigger clean shutdown
shutdown_event.set()
except Exception as e: