From e5192819208c4d68194844b7dfafbc00020d0dea Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Fri, 10 Oct 2025 17:00:50 +0100 Subject: [PATCH] [Metrics] Add test for multi-modal cache stats logging (#26588) Signed-off-by: Mark McLoughlin --- tests/entrypoints/llm/test_mm_cache_stats.py | 24 ++++++++++++++++++++ vllm/v1/metrics/loggers.py | 5 +--- vllm/v1/metrics/stats.py | 5 ++++ 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/tests/entrypoints/llm/test_mm_cache_stats.py b/tests/entrypoints/llm/test_mm_cache_stats.py index 62bfefddbe1de..e5ee99124409d 100644 --- a/tests/entrypoints/llm/test_mm_cache_stats.py +++ b/tests/entrypoints/llm/test_mm_cache_stats.py @@ -1,10 +1,14 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import logging + import pytest +import regex as re from vllm import LLM from vllm.entrypoints.chat_utils import ChatCompletionMessageParam +from vllm.v1.metrics import loggers as stat_loggers from vllm.v1.metrics.reader import Counter, Metric from ..openai.test_vision import TEST_IMAGE_ASSETS @@ -37,12 +41,27 @@ def _get_mm_cache_stats(metrics: list[Metric]): return mm_cache_queries, mm_cache_hits +def _get_mm_cache_log(llm: LLM, caplog_vllm: pytest.LogCaptureFixture) -> float: + caplog_vllm.clear() + with caplog_vllm.at_level(logging.INFO, logger=stat_loggers.__name__): + llm.llm_engine.do_log_stats() + + assert len(caplog_vllm.records) == 1 + msg = caplog_vllm.records[0].getMessage() + + assert "MM cache hit rate" in msg + match = re.search(r"MM cache hit rate: ([0-9.]+)%", msg) + assert match is not None + return float(match.group(1)) + + @pytest.mark.parametrize("image_urls", [TEST_IMAGE_ASSETS[:2]], indirect=True) @pytest.mark.parametrize("mm_processor_cache_type", ["lru", "shm"]) def test_mm_cache_stats( num_gpus_available, image_urls, mm_processor_cache_type, + caplog_vllm, ): llm = LLM( model="llava-hf/llava-1.5-7b-hf", @@ -56,12 +75,15 @@ def test_mm_cache_stats( llm.chat(_make_messages(image_urls[0])) assert _get_mm_cache_stats(llm.get_metrics()) == (1, 0) + assert _get_mm_cache_log(llm, caplog_vllm) == pytest.approx(0.0) llm.chat(_make_messages(image_urls[1])) assert _get_mm_cache_stats(llm.get_metrics()) == (2, 0) + assert _get_mm_cache_log(llm, caplog_vllm) == pytest.approx(0.0) llm.chat(_make_messages(image_urls[0])) assert _get_mm_cache_stats(llm.get_metrics()) == (3, 1) + assert _get_mm_cache_log(llm, caplog_vllm) == pytest.approx(33.3) # NOTE: This only resets hit rate stats in CachingMetrics # The raw queries and hits counts remain unaffected @@ -69,6 +91,8 @@ def test_mm_cache_stats( llm.chat(_make_messages(image_urls[0])) assert _get_mm_cache_stats(llm.get_metrics()) == (4, 1) + assert _get_mm_cache_log(llm, caplog_vllm) == pytest.approx(0.0) llm.chat(_make_messages(image_urls[1])) assert _get_mm_cache_stats(llm.get_metrics()) == (5, 1) + assert _get_mm_cache_log(llm, caplog_vllm) == pytest.approx(0.0) diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py index 3db9a428e93a6..32d2ed2961dee 100644 --- a/vllm/v1/metrics/loggers.py +++ b/vllm/v1/metrics/loggers.py @@ -60,7 +60,6 @@ class LoggingStatLogger(StatLoggerBase): self._reset(time.monotonic()) self.last_scheduler_stats = SchedulerStats() - self.last_mm_cache_stats: Optional[MultiModalCacheStats] = None # Caching metrics. This cannot be reset. # TODO: Make the interval configurable. @@ -115,8 +114,6 @@ class LoggingStatLogger(StatLoggerBase): if mm_cache_stats: self.mm_caching_metrics.observe(mm_cache_stats) - self.last_mm_cache_stats = mm_cache_stats - def log(self): now = time.monotonic() prompt_throughput = self._get_throughput(self.num_prompt_tokens, now) @@ -157,7 +154,7 @@ class LoggingStatLogger(StatLoggerBase): scheduler_stats.kv_cache_usage * 100, self.prefix_caching_metrics.hit_rate * 100, ] - if self.last_mm_cache_stats: + if not self.mm_caching_metrics.empty: log_parts.append("MM cache hit rate: %.1f%%") log_args.append(self.mm_caching_metrics.hit_rate * 100) diff --git a/vllm/v1/metrics/stats.py b/vllm/v1/metrics/stats.py index f0922288db325..8d21efca87f44 100644 --- a/vllm/v1/metrics/stats.py +++ b/vllm/v1/metrics/stats.py @@ -96,6 +96,11 @@ class CachingMetrics: self.aggregated_query_hit = 0 self.query_queue.clear() + @property + def empty(self) -> bool: + """Return true if no requests have been observed.""" + return self.aggregated_requests == 0 + @property def hit_rate(self) -> float: """Calculate the hit rate for the past N requests."""