[Bugfix] Fix entrypoints metrics tests (#18063)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-01-23 10:34:27 +08:00 · 2025-05-13 21:42:43 +08:00 · 2025-05-13 21:42:43 +08:00 · b922c2ebd2
commit b922c2ebd2
parent 00b14e0f16
2 changed files with 5 additions and 5 deletions
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@ -150,10 +150,6 @@ async def build_async_engine_client(

    async with build_async_engine_client_from_engine_args(
            engine_args, args.disable_frontend_multiprocessing) as engine:
-
-        # Don't keep the dummy data in memory
-        await engine.reset_mm_cache()
-
        yield engine


@ -189,6 +185,10 @@ async def build_async_engine_client_from_engine_args(
                usage_context=usage_context,
                disable_log_requests=engine_args.disable_log_requests,
                disable_log_stats=engine_args.disable_log_stats)
+
+            # Don't keep the dummy data in memory
+            await async_llm.reset_mm_cache()
+
            yield async_llm
        finally:
            if async_llm:
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@ -289,7 +289,7 @@ class EngineCore:
    def reset_mm_cache(self):
        # NOTE: Since this is mainly for debugging, we don't attempt to
        # re-sync the internal caches (P0 processor, P0 mirror, P1 mirror)
-        if self.scheduler.get_num_unfinished_requests():
+        if self.scheduler.has_unfinished_requests():
            logger.warning("Resetting the multi-modal cache when requests are "
                           "in progress may lead to desynced internal caches.")