mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 05:25:00 +08:00
78 lines
1.7 KiB
Python
78 lines
1.7 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
import subprocess
|
|
|
|
import pytest
|
|
|
|
from ..utils import RemoteOpenAIServer
|
|
|
|
MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def server():
|
|
args = ["--max-model-len", "1024", "--enforce-eager", "--load-format", "dummy"]
|
|
|
|
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
|
|
yield remote_server
|
|
|
|
|
|
@pytest.mark.benchmark
|
|
def test_bench_serve(server):
|
|
command = [
|
|
"vllm",
|
|
"bench",
|
|
"serve",
|
|
"--model",
|
|
MODEL_NAME,
|
|
"--host",
|
|
server.host,
|
|
"--port",
|
|
str(server.port),
|
|
"--dataset-name",
|
|
"random",
|
|
"--random-input-len",
|
|
"32",
|
|
"--random-output-len",
|
|
"4",
|
|
"--num-prompts",
|
|
"5",
|
|
]
|
|
result = subprocess.run(command, capture_output=True, text=True)
|
|
print(result.stdout)
|
|
print(result.stderr)
|
|
|
|
assert result.returncode == 0, f"Benchmark failed: {result.stderr}"
|
|
|
|
|
|
@pytest.mark.benchmark
|
|
def test_bench_serve_chat(server):
|
|
command = [
|
|
"vllm",
|
|
"bench",
|
|
"serve",
|
|
"--model",
|
|
MODEL_NAME,
|
|
"--host",
|
|
server.host,
|
|
"--port",
|
|
str(server.port),
|
|
"--dataset-name",
|
|
"random",
|
|
"--random-input-len",
|
|
"32",
|
|
"--random-output-len",
|
|
"4",
|
|
"--num-prompts",
|
|
"5",
|
|
"--endpoint",
|
|
"/v1/chat/completions",
|
|
"--backend",
|
|
"openai-chat",
|
|
]
|
|
result = subprocess.run(command, capture_output=True, text=True)
|
|
print(result.stdout)
|
|
print(result.stderr)
|
|
|
|
assert result.returncode == 0, f"Benchmark failed: {result.stderr}"
|