mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-26 22:23:12 +08:00
[V1] Simplify stats logging (#14082)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
2dfdfed8a0
commit
872db2be0e
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from collections.abc import AsyncGenerator, Mapping
|
||||
from typing import Optional, Union
|
||||
@ -57,10 +58,9 @@ class AsyncLLM(EngineClient):
|
||||
self.log_stats = log_stats
|
||||
self.stat_loggers: list[StatLoggerBase] = []
|
||||
if self.log_stats:
|
||||
self.stat_loggers.extend([
|
||||
LoggingStatLogger(),
|
||||
PrometheusStatLogger(vllm_config),
|
||||
])
|
||||
if logger.isEnabledFor(logging.INFO):
|
||||
self.stat_loggers.append(LoggingStatLogger())
|
||||
self.stat_loggers.append(PrometheusStatLogger(vllm_config))
|
||||
|
||||
# Tokenizer (+ ensure liveness if running in another process).
|
||||
self.tokenizer = init_tokenizer_from_configs(
|
||||
@ -287,7 +287,7 @@ class AsyncLLM(EngineClient):
|
||||
# 4) Logging.
|
||||
# TODO(rob): make into a coroutine and launch it in
|
||||
# background thread once Prometheus overhead is non-trivial.
|
||||
self._log_stats(
|
||||
self._record_stats(
|
||||
scheduler_stats=outputs.scheduler_stats,
|
||||
iteration_stats=iteration_stats,
|
||||
)
|
||||
@ -306,7 +306,7 @@ class AsyncLLM(EngineClient):
|
||||
if self.log_requests:
|
||||
logger.info("Aborted request %s.", request_id)
|
||||
|
||||
def _log_stats(
|
||||
def _record_stats(
|
||||
self,
|
||||
scheduler_stats: Optional[SchedulerStats],
|
||||
iteration_stats: Optional[IterationStats],
|
||||
@ -316,9 +316,9 @@ class AsyncLLM(EngineClient):
|
||||
|
||||
assert scheduler_stats is not None
|
||||
assert iteration_stats is not None
|
||||
for logger in self.stat_loggers:
|
||||
logger.log(scheduler_stats=scheduler_stats,
|
||||
iteration_stats=iteration_stats)
|
||||
for stat_logger in self.stat_loggers:
|
||||
stat_logger.record(scheduler_stats=scheduler_stats,
|
||||
iteration_stats=iteration_stats)
|
||||
|
||||
def encode(
|
||||
self,
|
||||
@ -354,7 +354,8 @@ class AsyncLLM(EngineClient):
|
||||
scheduler_outputs=None,
|
||||
model_output=None,
|
||||
) -> None:
|
||||
logger.debug("Called do_log_stats.")
|
||||
for stat_logger in self.stat_loggers:
|
||||
stat_logger.log()
|
||||
|
||||
async def check_health(self) -> None:
|
||||
logger.debug("Called check_health.")
|
||||
|
||||
@ -316,19 +316,10 @@ class EngineCoreProc(EngineCore):
|
||||
# Loop until process is sent a SIGINT or SIGTERM
|
||||
while True:
|
||||
# 1) Poll the input queue until there is work to do.
|
||||
if not self.scheduler.has_unfinished_requests():
|
||||
while True:
|
||||
try:
|
||||
req = self.input_queue.get(timeout=POLLING_TIMEOUT_S)
|
||||
self._handle_client_request(*req)
|
||||
break
|
||||
except queue.Empty:
|
||||
logger.debug("EngineCore busy loop waiting.")
|
||||
# Break out the loop so we can log_stats in step().
|
||||
if self.log_stats:
|
||||
break
|
||||
except BaseException:
|
||||
raise
|
||||
while not self.scheduler.has_unfinished_requests():
|
||||
logger.debug("EngineCore busy loop waiting.")
|
||||
req = self.input_queue.get()
|
||||
self._handle_client_request(*req)
|
||||
|
||||
# 2) Handle any new client requests.
|
||||
while not self.input_queue.empty():
|
||||
|
||||
@ -21,15 +21,19 @@ _LOCAL_LOGGING_INTERVAL_SEC = 5.0
|
||||
class StatLoggerBase(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def log(self, scheduler_stats: SchedulerStats,
|
||||
iteration_stats: IterationStats):
|
||||
def record(self, scheduler_stats: SchedulerStats,
|
||||
iteration_stats: IterationStats):
|
||||
...
|
||||
|
||||
def log(self): # noqa
|
||||
pass
|
||||
|
||||
|
||||
class LoggingStatLogger(StatLoggerBase):
|
||||
|
||||
def __init__(self):
|
||||
self._reset(time.monotonic())
|
||||
self.last_scheduler_stats = SchedulerStats()
|
||||
|
||||
def _reset(self, now):
|
||||
self.last_log_time = now
|
||||
@ -41,11 +45,6 @@ class LoggingStatLogger(StatLoggerBase):
|
||||
# Prefix cache metrics. TODO: Make the interval configurable.
|
||||
self.prefix_caching_metrics = PrefixCachingMetrics()
|
||||
|
||||
def _local_interval_elapsed(self, now: float) -> bool:
|
||||
# Log every _LOCAL_LOGGING_INTERVAL_SEC.
|
||||
elapsed_time = now - self.last_log_time
|
||||
return elapsed_time > _LOCAL_LOGGING_INTERVAL_SEC
|
||||
|
||||
def _track_iteration_stats(self, iteration_stats: IterationStats):
|
||||
# Save tracked stats for token counters.
|
||||
self.num_prompt_tokens.append(iteration_stats.num_prompt_tokens)
|
||||
@ -56,24 +55,26 @@ class LoggingStatLogger(StatLoggerBase):
|
||||
# Compute summary metrics for tracked stats
|
||||
return float(np.sum(tracked_stats) / (now - self.last_log_time))
|
||||
|
||||
def log(self, scheduler_stats: SchedulerStats,
|
||||
iteration_stats: IterationStats):
|
||||
def record(self, scheduler_stats: SchedulerStats,
|
||||
iteration_stats: IterationStats):
|
||||
"""Log Stats to standard output."""
|
||||
|
||||
self._track_iteration_stats(iteration_stats)
|
||||
|
||||
self.prefix_caching_metrics.observe(scheduler_stats.prefix_cache_stats)
|
||||
|
||||
now = time.monotonic()
|
||||
if not self._local_interval_elapsed(now):
|
||||
return
|
||||
self.last_scheduler_stats = scheduler_stats
|
||||
|
||||
def log(self):
|
||||
now = time.monotonic()
|
||||
prompt_throughput = self._get_throughput(self.num_prompt_tokens, now)
|
||||
generation_throughput = self._get_throughput(
|
||||
self.num_generation_tokens, now)
|
||||
|
||||
self._reset(now)
|
||||
|
||||
scheduler_stats = self.last_scheduler_stats
|
||||
|
||||
# Format and print output.
|
||||
logger.info(
|
||||
"Avg prompt throughput: %.1f tokens/s, "
|
||||
@ -274,8 +275,8 @@ class PrometheusStatLogger(StatLoggerBase):
|
||||
labelnames=metrics_info.keys()).labels(**metrics_info)
|
||||
info_gauge.set(1)
|
||||
|
||||
def log(self, scheduler_stats: SchedulerStats,
|
||||
iteration_stats: IterationStats):
|
||||
def record(self, scheduler_stats: SchedulerStats,
|
||||
iteration_stats: IterationStats):
|
||||
"""Log to prometheus."""
|
||||
self.gauge_scheduler_running.set(scheduler_stats.num_running_reqs)
|
||||
self.gauge_scheduler_waiting.set(scheduler_stats.num_waiting_reqs)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user