Ensure metrics are logged regardless of requests (#2347)

This commit is contained in:
Iskren Ivov Chernev 2024-01-05 15:24:42 +02:00 committed by GitHub
parent 937e7b7d7c
commit d0215a58e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 1 deletions

View File

@ -506,3 +506,9 @@ class AsyncLLMEngine:
max_log_len=engine_args.max_log_len,
start_engine_loop=start_engine_loop)
return engine
async def do_log_stats(self) -> None:
if self.engine_use_ray:
await self.engine.do_log_stats.remote()
else:
self.engine.do_log_stats()

View File

@ -641,6 +641,9 @@ class LLMEngine:
return self._process_model_outputs(output, scheduler_outputs)
def do_log_stats(self) -> None:
self._log_system_stats(False, 0)
def _log_system_stats(
self,
prompt_run: bool,

View File

@ -6,6 +6,7 @@ import asyncio
import codecs
import json
import time
from contextlib import asynccontextmanager
from http import HTTPStatus
from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union
@ -38,11 +39,28 @@ TIMEOUT_KEEP_ALIVE = 5 # seconds
logger = init_logger(__name__)
served_model = None
app = fastapi.FastAPI()
engine_args = None
engine = None
response_role = None
@asynccontextmanager
async def lifespan(app: fastapi.FastAPI):
async def _force_log():
while True:
await asyncio.sleep(10)
await engine.do_log_stats()
if not engine_args.disable_log_stats:
asyncio.create_task(_force_log())
yield
app = fastapi.FastAPI(lifespan=lifespan)
def parse_args():
parser = argparse.ArgumentParser(
description="vLLM OpenAI-Compatible RESTful API server.")