From d9291f998ed46cf6b4af833bbcdd5e14bd2d123d Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sun, 20 Jul 2025 17:04:35 +0000 Subject: [PATCH] cleanup Signed-off-by: Robert Shaw --- vllm/v1/metrics/loggers.py | 78 +++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py index d8d9eb463f4ec..6aea6827e9a51 100644 --- a/vllm/v1/metrics/loggers.py +++ b/vllm/v1/metrics/loggers.py @@ -425,29 +425,32 @@ class PrometheusStatLogger(StatLoggerBase): self.histogram_decode_time_request = make_per_engine( histogram_decode_time_request, engine_indexes, model_name) - # # - # # LoRA metrics - # # + # + # LoRA metrics + # - # # TODO: This metric might be incorrect in case of using multiple - # # api_server counts which uses prometheus mp. - # self.gauge_lora_info: Optional[prometheus_client.Gauge] = None - # if vllm_config.lora_config is not None: - # self.labelname_max_lora = "max_lora" - # self.labelname_waiting_lora_adapters = "waiting_lora_adapters" - # self.labelname_running_lora_adapters = "running_lora_adapters" - # self.max_lora = vllm_config.lora_config.max_loras - # self.gauge_lora_info = \ - # self._gauge_cls( - # name="vllm:lora_requests_info", - # documentation="Running stats on lora requests.", - # multiprocess_mode="sum", - # labelnames=[ - # self.labelname_max_lora, - # self.labelname_waiting_lora_adapters, - # self.labelname_running_lora_adapters, - # ], - # ) + # TODO: This metric might be incorrect in case of using multiple + # api_server counts which uses prometheus mp. + self.gauge_lora_info: Optional[prometheus_client.Gauge] = None + if vllm_config.lora_config is not None: + if len(self.engine_indexes) > 1: + raise NotImplementedError( + "LoRA in DP mode is not supported yet.") + self.labelname_max_lora = "max_lora" + self.labelname_waiting_lora_adapters = "waiting_lora_adapters" + self.labelname_running_lora_adapters = "running_lora_adapters" + self.max_lora = vllm_config.lora_config.max_loras + self.gauge_lora_info = \ + self._gauge_cls( + name="vllm:lora_requests_info", + documentation="Running stats on lora requests.", + multiprocess_mode="sum", + labelnames=[ + self.labelname_max_lora, + self.labelname_waiting_lora_adapters, + self.labelname_running_lora_adapters, + ], + ) def log_metrics_info(self, type: str, config_obj: SupportsMetricsInfo): metrics_info = config_obj.metrics_info() @@ -471,7 +474,7 @@ class PrometheusStatLogger(StatLoggerBase): for engine_index in self.engine_indexes: metrics_info = config_obj.metrics_info() metrics_info["engine"] = str(engine_index) - info_gauge.labels(*metrics_info.set(1)) + info_gauge.labels(**metrics_info).set(1) def record(self, scheduler_stats: Optional[SchedulerStats], @@ -547,18 +550,21 @@ class PrometheusStatLogger(StatLoggerBase): self.histogram_max_tokens_request[engine_idx].observe( finished_request.max_tokens_param) - # if self.gauge_lora_info is not None: - # running_lora_adapters = \ - # ",".join(iteration_stats.running_lora_adapters.keys()) - # waiting_lora_adapters = \ - # ",".join(iteration_stats.waiting_lora_adapters.keys()) - # lora_info_labels = { - # self.labelname_running_lora_adapters: running_lora_adapters, - # self.labelname_waiting_lora_adapters: waiting_lora_adapters, - # self.labelname_max_lora: self.max_lora, - # } - # self.gauge_lora_info.labels(**lora_info_labels)\ - # .set_to_current_time() + # TODO: investigate whether LoRA works with DP and ensure + # that the metrics we are logging are consistent with + + if self.gauge_lora_info is not None: + running_lora_adapters = \ + ",".join(iteration_stats.running_lora_adapters.keys()) + waiting_lora_adapters = \ + ",".join(iteration_stats.waiting_lora_adapters.keys()) + lora_info_labels = { + self.labelname_running_lora_adapters: running_lora_adapters, + self.labelname_waiting_lora_adapters: waiting_lora_adapters, + self.labelname_max_lora: self.max_lora, + } + self.gauge_lora_info.labels(**lora_info_labels)\ + .set_to_current_time() def log_engine_initialized(self): self.log_metrics_info("cache_config", self.vllm_config.cache_config) @@ -666,6 +672,8 @@ class StatLoggerManager: logger.log() def log_engine_initialized(self): + self.prometheus_logger.log_engine_initialized() + for per_engine_loggers in self.per_engine_logger_dict.values(): for logger in per_engine_loggers: logger.log_engine_initialized()