Ensure metrics are logged regardless of requests (#2347)

This commit is contained in:
Iskren Ivov Chernev 2024-01-05 15:24:42 +02:00 committed by GitHub
parent 937e7b7d7c
commit d0215a58e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 1 deletions

View File

@ -506,3 +506,9 @@ class AsyncLLMEngine:
max_log_len=engine_args.max_log_len, max_log_len=engine_args.max_log_len,
start_engine_loop=start_engine_loop) start_engine_loop=start_engine_loop)
return engine return engine
async def do_log_stats(self) -> None:
if self.engine_use_ray:
await self.engine.do_log_stats.remote()
else:
self.engine.do_log_stats()

View File

@ -641,6 +641,9 @@ class LLMEngine:
return self._process_model_outputs(output, scheduler_outputs) return self._process_model_outputs(output, scheduler_outputs)
def do_log_stats(self) -> None:
self._log_system_stats(False, 0)
def _log_system_stats( def _log_system_stats(
self, self,
prompt_run: bool, prompt_run: bool,

View File

@ -6,6 +6,7 @@ import asyncio
import codecs import codecs
import json import json
import time import time
from contextlib import asynccontextmanager
from http import HTTPStatus from http import HTTPStatus
from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union
@ -38,11 +39,28 @@ TIMEOUT_KEEP_ALIVE = 5 # seconds
logger = init_logger(__name__) logger = init_logger(__name__)
served_model = None served_model = None
app = fastapi.FastAPI() engine_args = None
engine = None engine = None
response_role = None response_role = None
@asynccontextmanager
async def lifespan(app: fastapi.FastAPI):
async def _force_log():
while True:
await asyncio.sleep(10)
await engine.do_log_stats()
if not engine_args.disable_log_stats:
asyncio.create_task(_force_log())
yield
app = fastapi.FastAPI(lifespan=lifespan)
def parse_args(): def parse_args():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="vLLM OpenAI-Compatible RESTful API server.") description="vLLM OpenAI-Compatible RESTful API server.")