mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 15:05:45 +08:00
[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797)
This commit is contained in:
parent
0150a10630
commit
26148120b3
@ -1,4 +1,4 @@
|
|||||||
# This script build the ROCm docker image and runs test inside it.
|
# This script runs test inside the corresponding ROCm docker container.
|
||||||
set -ex
|
set -ex
|
||||||
|
|
||||||
# Print ROCm version
|
# Print ROCm version
|
||||||
@ -19,15 +19,16 @@ done
|
|||||||
|
|
||||||
echo "--- Building container"
|
echo "--- Building container"
|
||||||
sha=$(git rev-parse --short HEAD)
|
sha=$(git rev-parse --short HEAD)
|
||||||
container_name=rocm_${sha}
|
image_name=rocm_${sha}
|
||||||
|
container_name=rocm_${sha}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)
|
||||||
docker build \
|
docker build \
|
||||||
-t ${container_name} \
|
-t ${image_name} \
|
||||||
-f Dockerfile.rocm \
|
-f Dockerfile.rocm \
|
||||||
--progress plain \
|
--progress plain \
|
||||||
.
|
.
|
||||||
|
|
||||||
remove_docker_container() {
|
remove_docker_container() {
|
||||||
docker rm -f ${container_name} || docker image rm -f ${container_name} || true
|
docker rm -f ${container_name} || docker image rm -f ${image_name} || true
|
||||||
}
|
}
|
||||||
trap remove_docker_container EXIT
|
trap remove_docker_container EXIT
|
||||||
|
|
||||||
@ -39,6 +40,6 @@ docker run \
|
|||||||
--rm \
|
--rm \
|
||||||
-e HF_TOKEN \
|
-e HF_TOKEN \
|
||||||
--name ${container_name} \
|
--name ${container_name} \
|
||||||
${container_name} \
|
${image_name} \
|
||||||
/bin/bash -c "${@}"
|
/bin/bash -c "${@}"
|
||||||
|
|
||||||
|
|||||||
@ -5,13 +5,16 @@
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- label: Regression Test
|
- label: Regression Test
|
||||||
|
mirror_hardwares: [amd]
|
||||||
command: pytest -v -s test_regression.py
|
command: pytest -v -s test_regression.py
|
||||||
working_dir: "/vllm-workspace/tests" # optional
|
working_dir: "/vllm-workspace/tests" # optional
|
||||||
|
|
||||||
- label: AsyncEngine Test
|
- label: AsyncEngine Test
|
||||||
|
#mirror_hardwares: [amd]
|
||||||
command: pytest -v -s async_engine
|
command: pytest -v -s async_engine
|
||||||
|
|
||||||
- label: Basic Correctness Test
|
- label: Basic Correctness Test
|
||||||
|
mirror_hardwares: [amd]
|
||||||
commands:
|
commands:
|
||||||
- VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py
|
- VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py
|
||||||
- VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py
|
- VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py
|
||||||
@ -24,14 +27,15 @@ steps:
|
|||||||
command: pytest -v -s core
|
command: pytest -v -s core
|
||||||
|
|
||||||
- label: Distributed Comm Ops Test
|
- label: Distributed Comm Ops Test
|
||||||
|
#mirror_hardwares: [amd]
|
||||||
command: pytest -v -s distributed/test_comm_ops.py
|
command: pytest -v -s distributed/test_comm_ops.py
|
||||||
working_dir: "/vllm-workspace/tests"
|
working_dir: "/vllm-workspace/tests"
|
||||||
num_gpus: 2
|
num_gpus: 2
|
||||||
|
|
||||||
- label: Distributed Tests
|
- label: Distributed Tests
|
||||||
|
mirror_hardwares: [amd]
|
||||||
working_dir: "/vllm-workspace/tests"
|
working_dir: "/vllm-workspace/tests"
|
||||||
num_gpus: 2
|
num_gpus: 2
|
||||||
mirror_hardwares: [amd]
|
|
||||||
commands:
|
commands:
|
||||||
- pytest -v -s distributed/test_pynccl_library.py
|
- pytest -v -s distributed/test_pynccl_library.py
|
||||||
- TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
|
- TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
|
||||||
@ -45,16 +49,18 @@ steps:
|
|||||||
- pytest -v -s spec_decode/e2e/test_integration_dist.py
|
- pytest -v -s spec_decode/e2e/test_integration_dist.py
|
||||||
|
|
||||||
- label: Distributed Tests (Multiple Groups)
|
- label: Distributed Tests (Multiple Groups)
|
||||||
|
#mirror_hardwares: [amd]
|
||||||
working_dir: "/vllm-workspace/tests"
|
working_dir: "/vllm-workspace/tests"
|
||||||
num_gpus: 4
|
num_gpus: 4
|
||||||
commands:
|
commands:
|
||||||
- pytest -v -s distributed/test_pynccl.py
|
- pytest -v -s distributed/test_pynccl.py
|
||||||
|
|
||||||
- label: Engine Test
|
- label: Engine Test
|
||||||
#mirror_hardwares: [amd]
|
mirror_hardwares: [amd]
|
||||||
command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py
|
command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py
|
||||||
|
|
||||||
- label: Entrypoints Test
|
- label: Entrypoints Test
|
||||||
|
#mirror_hardwares: [amd]
|
||||||
commands:
|
commands:
|
||||||
# these tests have to be separated, because each one will allocate all posible GPU memory
|
# these tests have to be separated, because each one will allocate all posible GPU memory
|
||||||
- pytest -v -s entrypoints --ignore=entrypoints/test_server_oot_registration.py
|
- pytest -v -s entrypoints --ignore=entrypoints/test_server_oot_registration.py
|
||||||
@ -74,6 +80,7 @@ steps:
|
|||||||
- python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
|
- python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
|
||||||
|
|
||||||
- label: Kernels Test %N
|
- label: Kernels Test %N
|
||||||
|
#mirror_hardwares: [amd]
|
||||||
command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
|
command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
|
||||||
parallelism: 4
|
parallelism: 4
|
||||||
|
|
||||||
@ -84,7 +91,7 @@ steps:
|
|||||||
- pytest -v -s models --ignore=models/test_llava.py
|
- pytest -v -s models --ignore=models/test_llava.py
|
||||||
|
|
||||||
- label: Llava Test
|
- label: Llava Test
|
||||||
#mirror_hardwares: [amd]
|
mirror_hardwares: [amd]
|
||||||
commands:
|
commands:
|
||||||
- bash ../.buildkite/download-images.sh
|
- bash ../.buildkite/download-images.sh
|
||||||
- pytest -v -s models/test_llava.py
|
- pytest -v -s models/test_llava.py
|
||||||
@ -95,6 +102,7 @@ steps:
|
|||||||
- pytest -v -s prefix_caching
|
- pytest -v -s prefix_caching
|
||||||
|
|
||||||
- label: Samplers Test
|
- label: Samplers Test
|
||||||
|
#mirror_hardwares: [amd]
|
||||||
command: pytest -v -s samplers
|
command: pytest -v -s samplers
|
||||||
|
|
||||||
- label: LogitsProcessor Test
|
- label: LogitsProcessor Test
|
||||||
@ -110,16 +118,20 @@ steps:
|
|||||||
command: pytest -v -s spec_decode
|
command: pytest -v -s spec_decode
|
||||||
|
|
||||||
- label: LoRA Test %N
|
- label: LoRA Test %N
|
||||||
|
#mirror_hardwares: [amd]
|
||||||
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
|
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
|
||||||
parallelism: 4
|
parallelism: 4
|
||||||
|
|
||||||
- label: Tensorizer Test
|
- label: Tensorizer Test
|
||||||
|
#mirror_hardwares: [amd]
|
||||||
command: apt-get install curl libsodium23 && pytest -v -s tensorizer_loader
|
command: apt-get install curl libsodium23 && pytest -v -s tensorizer_loader
|
||||||
|
|
||||||
- label: Metrics Test
|
- label: Metrics Test
|
||||||
|
mirror_hardwares: [amd]
|
||||||
command: pytest -v -s metrics
|
command: pytest -v -s metrics
|
||||||
|
|
||||||
- label: Quantization Test
|
- label: Quantization Test
|
||||||
|
#mirror_hardwares: [amd]
|
||||||
command: pytest -v -s quantization
|
command: pytest -v -s quantization
|
||||||
|
|
||||||
- label: Benchmarks
|
- label: Benchmarks
|
||||||
|
|||||||
@ -3,9 +3,8 @@
|
|||||||
{% set default_working_dir = "/vllm-workspace/tests" %}
|
{% set default_working_dir = "/vllm-workspace/tests" %}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
|
||||||
- label: ":docker: build image"
|
- label: ":docker: build image"
|
||||||
commands:
|
commands:
|
||||||
- "docker build --build-arg max_jobs=16 --tag {{ docker_image }} --target test --progress plain ."
|
- "docker build --build-arg max_jobs=16 --tag {{ docker_image }} --target test --progress plain ."
|
||||||
- "docker push {{ docker_image }}"
|
- "docker push {{ docker_image }}"
|
||||||
env:
|
env:
|
||||||
|
|||||||
@ -32,6 +32,7 @@ def test_stop_reason(vllm_model, example_prompts):
|
|||||||
# test stop token
|
# test stop token
|
||||||
outputs = llm.generate(example_prompts,
|
outputs = llm.generate(example_prompts,
|
||||||
sampling_params=SamplingParams(
|
sampling_params=SamplingParams(
|
||||||
|
ignore_eos=True,
|
||||||
seed=SEED,
|
seed=SEED,
|
||||||
max_tokens=MAX_TOKENS,
|
max_tokens=MAX_TOKENS,
|
||||||
stop_token_ids=[stop_token_id]))
|
stop_token_ids=[stop_token_id]))
|
||||||
@ -43,7 +44,10 @@ def test_stop_reason(vllm_model, example_prompts):
|
|||||||
# test stop string
|
# test stop string
|
||||||
outputs = llm.generate(example_prompts,
|
outputs = llm.generate(example_prompts,
|
||||||
sampling_params=SamplingParams(
|
sampling_params=SamplingParams(
|
||||||
seed=SEED, max_tokens=MAX_TOKENS, stop="."))
|
ignore_eos=True,
|
||||||
|
seed=SEED,
|
||||||
|
max_tokens=MAX_TOKENS,
|
||||||
|
stop="."))
|
||||||
for output in outputs:
|
for output in outputs:
|
||||||
output = output.outputs[0]
|
output = output.outputs[0]
|
||||||
assert output.finish_reason == "stop"
|
assert output.finish_reason == "stop"
|
||||||
|
|||||||
@ -1060,7 +1060,7 @@ _STR_DTYPE_TO_TORCH_DTYPE = {
|
|||||||
"bfloat16": torch.bfloat16,
|
"bfloat16": torch.bfloat16,
|
||||||
}
|
}
|
||||||
|
|
||||||
_ROCM_NOT_SUPPORTED_DTYPE = ["float", "float32"]
|
_ROCM_NOT_SUPPORTED_DTYPE: List[str] = [] #
|
||||||
|
|
||||||
|
|
||||||
def _get_and_verify_dtype(
|
def _get_and_verify_dtype(
|
||||||
@ -1092,14 +1092,6 @@ def _get_and_verify_dtype(
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown dtype: {dtype}")
|
raise ValueError(f"Unknown dtype: {dtype}")
|
||||||
|
|
||||||
if is_hip() and torch_dtype == torch.float32:
|
|
||||||
rocm_supported_dtypes = [
|
|
||||||
k for k, v in _STR_DTYPE_TO_TORCH_DTYPE.items()
|
|
||||||
if (k not in _ROCM_NOT_SUPPORTED_DTYPE)
|
|
||||||
]
|
|
||||||
raise ValueError(f"dtype '{dtype}' is not supported in ROCm. "
|
|
||||||
f"Supported dtypes are {rocm_supported_dtypes}")
|
|
||||||
|
|
||||||
# Verify the dtype.
|
# Verify the dtype.
|
||||||
if torch_dtype != config_dtype:
|
if torch_dtype != config_dtype:
|
||||||
if torch_dtype == torch.float32:
|
if torch_dtype == torch.float32:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user