Nicolò Lucchesi b2fac67130
[P/D] Heterogeneous TP (#18833)
Signed-off-by: nicklucche <nlucches@redhat.com>
2025-06-04 23:25:34 +00:00

63 lines
1.9 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import os
import lm_eval
import openai
BASE_URL = "http://localhost:8192/v1"
NUM_CONCURRENT = 100
TASK = "gsm8k"
FILTER = "exact_match,strict-match"
RTOL = 0.03
# Model-specific expected values
EXPECTED_VALUES = {
"Qwen/Qwen3-0.6B": 0.41,
"deepseek-ai/deepseek-vl2-small": 0.59
}
SIMPLE_PROMPT = "The best part about working on vLLM is that I got to meet so many people across various different organizations like UCB, Google, and Meta which means", # noqa: E501
# Get model name from environment variable
MODEL_NAME = os.environ.get("TEST_MODEL", "Qwen/Qwen3-0.6B")
def run_simple_prompt():
client = openai.OpenAI(api_key="EMPTY", base_url=BASE_URL)
completion = client.completions.create(model=MODEL_NAME,
prompt=SIMPLE_PROMPT)
print("-" * 50)
print(f"Completion results for {MODEL_NAME}:")
print(completion)
print("-" * 50)
def test_accuracy():
"""Run the end to end accuracy test."""
run_simple_prompt()
model_args = (f"model={MODEL_NAME},"
f"base_url={BASE_URL}/completions,"
f"num_concurrent={NUM_CONCURRENT},tokenized_requests=False")
results = lm_eval.simple_evaluate(
model="local-completions",
model_args=model_args,
tasks=TASK,
)
measured_value = results["results"][TASK][FILTER]
expected_value = EXPECTED_VALUES.get(MODEL_NAME)
if expected_value is None:
print(f"Warning: No expected value found for {MODEL_NAME}. "
"Skipping accuracy check.")
print(f"Measured value: {measured_value}")
return
assert (measured_value - RTOL < expected_value
and measured_value + RTOL > expected_value
), f"Expected: {expected_value} | Measured: {measured_value}"