From 5ea4fa206d8e08d184174ac3d465975a5cb01489 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sun, 20 Jul 2025 17:17:15 +0000 Subject: [PATCH] updated Signed-off-by: Robert Shaw --- vllm/v1/metrics/loggers.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py index 2a76e9234f5ac..d4de767855d35 100644 --- a/vllm/v1/metrics/loggers.py +++ b/vllm/v1/metrics/loggers.py @@ -168,7 +168,8 @@ class PrometheusStatLogger(StatLoggerBase): model_name = vllm_config.model_config.served_model_name max_model_len = vllm_config.model_config.max_model_len - if len(self.engine_indexes) > 1: + if (len(self.engine_indexes) > 1 + and vllm_config.speculative_config is not None): raise NotImplementedError("Prometheus metrics with Spec Decoding " "with >1 EngineCore per AsyncLLM is not " "supported yet.") @@ -511,9 +512,9 @@ class PrometheusStatLogger(StatLoggerBase): self.counter_prefix_cache_hits[engine_idx].inc( scheduler_stats.prefix_cache_stats.hits) - # if scheduler_stats.spec_decoding_stats is not None: - # self.spec_decoding_prom.observe( - # scheduler_stats.spec_decoding_stats) + if scheduler_stats.spec_decoding_stats is not None: + self.spec_decoding_prom.observe( + scheduler_stats.spec_decoding_stats) if iteration_stats is None: return