mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-22 20:45:01 +08:00
[Frontend] Publish Prometheus metrics in run_batch API (#7641)
This commit is contained in:
parent
6885fde317
commit
8da48e4d95
@ -1,3 +1,7 @@
|
|||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import time
|
||||||
from http import HTTPStatus
|
from http import HTTPStatus
|
||||||
|
|
||||||
import openai
|
import openai
|
||||||
@ -177,3 +181,48 @@ async def test_metrics_exist(client: openai.AsyncOpenAI):
|
|||||||
|
|
||||||
for metric in EXPECTED_METRICS:
|
for metric in EXPECTED_METRICS:
|
||||||
assert metric in response.text
|
assert metric in response.text
|
||||||
|
|
||||||
|
|
||||||
|
def test_metrics_exist_run_batch():
|
||||||
|
input_batch = """{"custom_id": "request-0", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are a helpful assistant."}}""" # noqa: E501
|
||||||
|
|
||||||
|
base_url = "0.0.0.0"
|
||||||
|
port = "8001"
|
||||||
|
server_url = f"http://{base_url}:{port}"
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(
|
||||||
|
"w") as input_file, tempfile.NamedTemporaryFile(
|
||||||
|
"r") as output_file:
|
||||||
|
input_file.write(input_batch)
|
||||||
|
input_file.flush()
|
||||||
|
proc = subprocess.Popen([
|
||||||
|
sys.executable,
|
||||||
|
"-m",
|
||||||
|
"vllm.entrypoints.openai.run_batch",
|
||||||
|
"-i",
|
||||||
|
input_file.name,
|
||||||
|
"-o",
|
||||||
|
output_file.name,
|
||||||
|
"--model",
|
||||||
|
"intfloat/e5-mistral-7b-instruct",
|
||||||
|
"--enable-metrics",
|
||||||
|
"--url",
|
||||||
|
base_url,
|
||||||
|
"--port",
|
||||||
|
port,
|
||||||
|
], )
|
||||||
|
|
||||||
|
def is_server_up(url):
|
||||||
|
try:
|
||||||
|
response = requests.get(url)
|
||||||
|
return response.status_code == 200
|
||||||
|
except requests.ConnectionError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
while not is_server_up(server_url):
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
response = requests.get(server_url + "/metrics")
|
||||||
|
assert response.status_code == HTTPStatus.OK
|
||||||
|
|
||||||
|
proc.wait()
|
||||||
|
|||||||
@ -3,6 +3,7 @@ from io import StringIO
|
|||||||
from typing import Awaitable, Callable, List
|
from typing import Awaitable, Callable, List
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
from prometheus_client import start_http_server
|
||||||
|
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str
|
from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str
|
||||||
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
||||||
@ -56,6 +57,24 @@ def parse_args():
|
|||||||
'ID numbers being printed in log.'
|
'ID numbers being printed in log.'
|
||||||
'\n\nDefault: Unlimited')
|
'\n\nDefault: Unlimited')
|
||||||
|
|
||||||
|
parser.add_argument("--enable-metrics",
|
||||||
|
action="store_true",
|
||||||
|
help="Enable Prometheus metrics")
|
||||||
|
parser.add_argument(
|
||||||
|
"--url",
|
||||||
|
type=str,
|
||||||
|
default="0.0.0.0",
|
||||||
|
help="URL to the Prometheus metrics server "
|
||||||
|
"(only needed if enable-metrics is set).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--port",
|
||||||
|
type=int,
|
||||||
|
default=8000,
|
||||||
|
help="Port number for the Prometheus metrics server "
|
||||||
|
"(only needed if enable-metrics is set).",
|
||||||
|
)
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
@ -184,4 +203,12 @@ if __name__ == "__main__":
|
|||||||
logger.info("vLLM batch processing API version %s", VLLM_VERSION)
|
logger.info("vLLM batch processing API version %s", VLLM_VERSION)
|
||||||
logger.info("args: %s", args)
|
logger.info("args: %s", args)
|
||||||
|
|
||||||
|
# Start the Prometheus metrics server. LLMEngine uses the Prometheus client
|
||||||
|
# to publish metrics at the /metrics endpoint.
|
||||||
|
if args.enable_metrics:
|
||||||
|
logger.info("Prometheus metrics enabled")
|
||||||
|
start_http_server(port=args.port, addr=args.url)
|
||||||
|
else:
|
||||||
|
logger.info("Prometheus metrics disabled")
|
||||||
|
|
||||||
asyncio.run(main(args))
|
asyncio.run(main(args))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user