mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-05 04:47:02 +08:00
[CI] Basic Integration Test For TPU (#9968)
Signed-off-by: Robert Shaw <rshaw@neuralmagic.com>
This commit is contained in:
parent
603a661ae8
commit
1c45f4c385
@ -12,4 +12,4 @@ remove_docker_container
|
||||
# For HF_TOKEN.
|
||||
source /etc/environment
|
||||
# Run a simple end-to-end example.
|
||||
docker run --privileged --net host --shm-size=16G -it -e HF_TOKEN=$HF_TOKEN --name tpu-test vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git && python3 -m pip install pytest && pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py && python3 /workspace/vllm/tests/tpu/test_compilation.py && python3 /workspace/vllm/examples/offline_inference_tpu.py"
|
||||
docker run --privileged --net host --shm-size=16G -it -e HF_TOKEN=$HF_TOKEN --name tpu-test vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git && python3 -m pip install pytest && python3 -m pip install lm_eval[api]==0.4.4 && pytest -v -s /workspace/vllm/tests/entrypoints/openai/test_accuracy.py && pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py && python3 /workspace/vllm/tests/tpu/test_compilation.py && python3 /workspace/vllm/examples/offline_inference_tpu.py"
|
||||
|
||||
@ -10,6 +10,8 @@ AsyncLLMEngine are working correctly.
|
||||
import lm_eval
|
||||
import pytest
|
||||
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"
|
||||
@ -18,12 +20,21 @@ TASK = "gsm8k"
|
||||
FILTER = "exact_match,strict-match"
|
||||
RTOL = 0.03
|
||||
EXPECTED_VALUE = 0.58
|
||||
DEFAULT_ARGS = ["--max-model-len", "4096", "--disable-log-requests"]
|
||||
DEFAULT_ARGS = ["--max-model-len", "2048", "--disable-log-requests"]
|
||||
MORE_ARGS_LIST = [
|
||||
[], # Default
|
||||
["--enable-chunked-prefill"], # Chunked
|
||||
["--num-scheduler-steps", "8"], # MS
|
||||
["--num-scheduler-steps", "8", "--multi-step-stream-outputs"] # MS+Stream
|
||||
]
|
||||
MAX_WAIT_SECONDS = None
|
||||
|
||||
if current_platform.is_tpu():
|
||||
MORE_ARGS_LIST = [
|
||||
[], # Default
|
||||
# ["--num-scheduler-steps", "8"], # Multi-step << currently fails
|
||||
]
|
||||
MAX_WAIT_SECONDS = 600
|
||||
|
||||
|
||||
@pytest.mark.parametrize("more_args", MORE_ARGS_LIST)
|
||||
@ -33,7 +44,9 @@ def test_lm_eval_accuracy(more_args):
|
||||
|
||||
print(f"Running with: {args}")
|
||||
|
||||
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
|
||||
with RemoteOpenAIServer(
|
||||
MODEL_NAME, args,
|
||||
max_wait_seconds=MAX_WAIT_SECONDS) as remote_server:
|
||||
url = f"{remote_server.url_for('v1')}/completions"
|
||||
|
||||
model_args = (
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user