Merge pull request #19 from robertgshaw2-redhat/fix-prometheus-logging

Improve code structure
This commit is contained in:
Robert Shaw 2025-07-20 12:53:23 -04:00 committed by GitHub
commit 5e6114df5d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 74 additions and 63 deletions

View File

@ -36,10 +36,9 @@ from vllm.v1.engine.output_processor import (OutputProcessor,
from vllm.v1.engine.parallel_sampling import ParentRequest
from vllm.v1.engine.processor import Processor
from vllm.v1.executor.abstract import Executor
from vllm.v1.metrics.loggers import (PrometheusStatLogger, StatLoggerBase,
StatLoggerFactory, setup_default_loggers)
from vllm.v1.metrics.loggers import StatLoggerFactory, StatLoggerManager
from vllm.v1.metrics.prometheus import shutdown_prometheus
from vllm.v1.metrics.stats import IterationStats, SchedulerStats
from vllm.v1.metrics.stats import IterationStats
logger = init_logger(__name__)
@ -103,12 +102,11 @@ class AsyncLLM(EngineClient):
engine_idxs = [
idx for idx in range(start_idx, start_idx + local_engines)
]
self.stat_loggers = setup_default_loggers(
self.logger_manager = StatLoggerManager(
vllm_config=vllm_config,
log_stats=self.log_stats,
engine_idxs=engine_idxs,
custom_stat_loggers=stat_loggers,
)
) if self.log_stats else None
# Tokenizer (+ ensure liveness if running in another process).
self.tokenizer = init_tokenizer_from_configs(
@ -136,12 +134,8 @@ class AsyncLLM(EngineClient):
client_addresses=client_addresses,
client_index=client_index,
)
if self.stat_loggers:
# loggers, prom_logger
loggers, _ = self.stat_loggers
for per_engine_loggers in loggers.values():
for logger in per_engine_loggers:
logger.log_engine_initialized()
if self.logger_manager:
self.logger_manager.log_engine_initialized()
self.output_handler: Optional[asyncio.Task] = None
try:
# Start output handler eagerly if we are in the asyncio eventloop.
@ -380,7 +374,7 @@ class AsyncLLM(EngineClient):
engine_core = self.engine_core
output_processor = self.output_processor
log_stats = self.log_stats
stat_loggers = self.stat_loggers if log_stats else None
logger_manager = self.logger_manager
async def output_handler():
try:
@ -420,12 +414,12 @@ class AsyncLLM(EngineClient):
# 4) Logging.
# TODO(rob): make into a coroutine and launch it in
# background thread once Prometheus overhead is non-trivial.
if stat_loggers:
AsyncLLM._record_stats(
stat_loggers,
outputs.engine_index,
# NOTE: we do not use self.log
if logger_manager:
logger_manager.record(
scheduler_stats=outputs.scheduler_stats,
iteration_stats=iteration_stats,
engine_idx=outputs.engine_index,
)
except Exception as e:
logger.exception("AsyncLLM output_handler failed.")
@ -442,26 +436,6 @@ class AsyncLLM(EngineClient):
if self.log_requests:
logger.info("Aborted request %s.", request_id)
@staticmethod
def _record_stats(
stat_loggers: tuple[dict[int, list[StatLoggerBase]],
PrometheusStatLogger],
engine_idx: int,
scheduler_stats: Optional[SchedulerStats],
iteration_stats: Optional[IterationStats],
):
"""static so that it can be used from the output_handler task
without a circular ref to AsyncLLM."""
per_engine_loggers, prom_logger = stat_loggers
for stat_logger in per_engine_loggers[engine_idx]:
stat_logger.record(engine_idx=engine_idx,
scheduler_stats=scheduler_stats,
iteration_stats=iteration_stats)
prom_logger.record(engine_idx=engine_idx,
scheduler_stats=scheduler_stats,
iteration_stats=iteration_stats)
async def encode(
self,
prompt: PromptType,

View File

@ -600,32 +600,69 @@ def build_1_2_5_buckets(max_value: int) -> list[int]:
return build_buckets([1, 2, 5], max_value)
def setup_default_loggers(
vllm_config: VllmConfig,
log_stats: bool,
engine_idxs: list[int],
custom_stat_loggers: Optional[list[StatLoggerFactory]] = None,
) -> Optional[tuple[dict[int, list[StatLoggerBase]], PrometheusStatLogger]]:
"""Setup logging and prometheus metrics."""
if not log_stats:
return None
class StatLoggerManager:
"""
StatLoggerManager:
Logging happens at the level of the EngineCore (per scheduler).
* DP: >1 EngineCore per AsyncLLM - loggers for each EngineCore.
* With Local Logger, just make N copies for N EngineCores.
* With Prometheus, we need a single logger with N "labels"
factories: list[StatLoggerFactory]
if custom_stat_loggers is not None:
factories = custom_stat_loggers
else:
factories = []
if logger.isEnabledFor(logging.INFO):
factories.append(LoggingStatLogger)
This class abstracts away this implementation detail from
the AsyncLLM, allowing the AsyncLLM to just call .record()
and .log() to a simple interface.
"""
# engine_idx: Logger
stat_loggers: dict[int, list[StatLoggerBase]] = {}
for engine_idx in engine_idxs:
per_engine_stat_loggers: list[StatLoggerBase] = []
for logger_factory in factories:
per_engine_stat_loggers.append(
logger_factory(vllm_config, engine_idx))
stat_loggers[engine_idx] = per_engine_stat_loggers
def __init__(
self,
vllm_config: VllmConfig,
engine_idxs: Optional[list[int]] = None,
custom_stat_loggers: Optional[list[StatLoggerFactory]] = None,
):
self.engine_idxs = engine_idxs if engine_idxs else [0]
prom_stat_logger = PrometheusStatLogger(vllm_config, engine_idxs)
return stat_loggers, prom_stat_logger
factories: list[StatLoggerFactory]
if custom_stat_loggers is not None:
factories = custom_stat_loggers
else:
factories = []
if logger.isEnabledFor(logging.INFO):
factories.append(LoggingStatLogger)
# engine_idx: StatLogger
self.per_engine_logger_dict: dict[int, list[StatLoggerBase]] = {}
for engine_idx in self.engine_idxs:
loggers: list[StatLoggerBase] = []
for logger_factory in factories:
loggers.append(logger_factory(vllm_config, engine_idx))
self.per_engine_logger_dict[engine_idx] = loggers
# For Prometheus, need to share the metrics between EngineCores.
# Each EngineCore's metrics are expressed as a unique label.
self.prometheus_logger = PrometheusStatLogger(vllm_config, engine_idxs)
def record(
self,
scheduler_stats: Optional[SchedulerStats],
iteration_stats: Optional[IterationStats],
engine_idx: Optional[int] = None,
):
if engine_idx is None:
engine_idx = 0
per_engine_loggers = self.per_engine_logger_dict[engine_idx]
for logger in per_engine_loggers:
logger.record(scheduler_stats, iteration_stats, engine_idx)
self.prometheus_logger.record(scheduler_stats, iteration_stats,
engine_idx)
def log(self):
for per_engine_loggers in self.per_engine_logger_dict.values():
for logger in per_engine_loggers:
logger.log()
def log_engine_initialized(self):
for per_engine_loggers in self.per_engine_logger_dict.values():
for logger in per_engine_loggers:
logger.log_engine_initialized()