diff --git a/vllm/config.py b/vllm/config.py index fc848b72d7f2..2f8883fe0733 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -308,6 +308,10 @@ class CacheConfig: self.num_gpu_blocks = None self.num_cpu_blocks = None + def metrics_info(self): + # convert cache_config to dict(key: str, value:str) for prometheus metrics info + return {key: str(value) for key, value in self.__dict__.items()} + def _verify_args(self) -> None: if self.gpu_memory_utilization > 1.0: raise ValueError( diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index f0fd7efdef81..6f5af71426d7 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -138,6 +138,7 @@ class LLMEngine: self.stat_logger = StatLogger( local_interval=_LOCAL_LOGGING_INTERVAL_SEC, labels=dict(model_name=model_config.model)) + self.stat_logger.info("cache_config", self.cache_config) self.forward_dag = None if USE_RAY_COMPILED_DAG: diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py index 83e66a937227..54b09c38f58a 100644 --- a/vllm/engine/metrics.py +++ b/vllm/engine/metrics.py @@ -1,5 +1,5 @@ from vllm.logger import init_logger -from prometheus_client import Counter, Gauge, Histogram, REGISTRY, disable_created_metrics +from prometheus_client import Counter, Gauge, Histogram, Info, REGISTRY, disable_created_metrics import time import numpy as np @@ -23,6 +23,10 @@ class Metrics: if hasattr(collector, "_name") and "vllm" in collector._name: REGISTRY.unregister(collector) + self.info_cache_config = Info( + name='vllm:cache_config', + documentation='information of cache_config') + # System stats self.gauge_scheduler_running = Gauge( name="vllm:num_requests_running", @@ -128,6 +132,10 @@ class StatLogger: self.labels = labels self.metrics = Metrics(labelnames=list(labels.keys())) + def info(self, type: str, obj: object) -> None: + if type == "cache_config": + self.metrics.info_cache_config.info(obj.metrics_info()) + def _get_throughput(self, tracked_stats: List[int], now: float) -> float: return float(np.sum(tracked_stats) / (now - self.last_local_log))