From d0215a58e78572d91dadafe9d832a2db89b09a13 Mon Sep 17 00:00:00 2001 From: Iskren Ivov Chernev Date: Fri, 5 Jan 2024 15:24:42 +0200 Subject: [PATCH] Ensure metrics are logged regardless of requests (#2347) --- vllm/engine/async_llm_engine.py | 6 ++++++ vllm/engine/llm_engine.py | 3 +++ vllm/entrypoints/openai/api_server.py | 20 +++++++++++++++++++- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index fbe4a4e5d459..92f23ec29bfd 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -506,3 +506,9 @@ class AsyncLLMEngine: max_log_len=engine_args.max_log_len, start_engine_loop=start_engine_loop) return engine + + async def do_log_stats(self) -> None: + if self.engine_use_ray: + await self.engine.do_log_stats.remote() + else: + self.engine.do_log_stats() diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 0e36a50a5709..bf5a11f39e82 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -641,6 +641,9 @@ class LLMEngine: return self._process_model_outputs(output, scheduler_outputs) + def do_log_stats(self) -> None: + self._log_system_stats(False, 0) + def _log_system_stats( self, prompt_run: bool, diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index be5f4190e633..30c55f4c01c5 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -6,6 +6,7 @@ import asyncio import codecs import json import time +from contextlib import asynccontextmanager from http import HTTPStatus from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union @@ -38,11 +39,28 @@ TIMEOUT_KEEP_ALIVE = 5 # seconds logger = init_logger(__name__) served_model = None -app = fastapi.FastAPI() +engine_args = None engine = None response_role = None +@asynccontextmanager +async def lifespan(app: fastapi.FastAPI): + + async def _force_log(): + while True: + await asyncio.sleep(10) + await engine.do_log_stats() + + if not engine_args.disable_log_stats: + asyncio.create_task(_force_log()) + + yield + + +app = fastapi.FastAPI(lifespan=lifespan) + + def parse_args(): parser = argparse.ArgumentParser( description="vLLM OpenAI-Compatible RESTful API server.")