mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 01:45:01 +08:00
Ensure metrics are logged regardless of requests (#2347)
This commit is contained in:
parent
937e7b7d7c
commit
d0215a58e7
@ -506,3 +506,9 @@ class AsyncLLMEngine:
|
|||||||
max_log_len=engine_args.max_log_len,
|
max_log_len=engine_args.max_log_len,
|
||||||
start_engine_loop=start_engine_loop)
|
start_engine_loop=start_engine_loop)
|
||||||
return engine
|
return engine
|
||||||
|
|
||||||
|
async def do_log_stats(self) -> None:
|
||||||
|
if self.engine_use_ray:
|
||||||
|
await self.engine.do_log_stats.remote()
|
||||||
|
else:
|
||||||
|
self.engine.do_log_stats()
|
||||||
|
|||||||
@ -641,6 +641,9 @@ class LLMEngine:
|
|||||||
|
|
||||||
return self._process_model_outputs(output, scheduler_outputs)
|
return self._process_model_outputs(output, scheduler_outputs)
|
||||||
|
|
||||||
|
def do_log_stats(self) -> None:
|
||||||
|
self._log_system_stats(False, 0)
|
||||||
|
|
||||||
def _log_system_stats(
|
def _log_system_stats(
|
||||||
self,
|
self,
|
||||||
prompt_run: bool,
|
prompt_run: bool,
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import asyncio
|
|||||||
import codecs
|
import codecs
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
from http import HTTPStatus
|
from http import HTTPStatus
|
||||||
from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union
|
from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union
|
||||||
|
|
||||||
@ -38,11 +39,28 @@ TIMEOUT_KEEP_ALIVE = 5 # seconds
|
|||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
served_model = None
|
served_model = None
|
||||||
app = fastapi.FastAPI()
|
engine_args = None
|
||||||
engine = None
|
engine = None
|
||||||
response_role = None
|
response_role = None
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: fastapi.FastAPI):
|
||||||
|
|
||||||
|
async def _force_log():
|
||||||
|
while True:
|
||||||
|
await asyncio.sleep(10)
|
||||||
|
await engine.do_log_stats()
|
||||||
|
|
||||||
|
if not engine_args.disable_log_stats:
|
||||||
|
asyncio.create_task(_force_log())
|
||||||
|
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
|
app = fastapi.FastAPI(lifespan=lifespan)
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="vLLM OpenAI-Compatible RESTful API server.")
|
description="vLLM OpenAI-Compatible RESTful API server.")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user