[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797)

This commit is contained in:
Alexei-V-Ivanov-AMD 2024-05-16 22:58:25 -05:00 committed by GitHub
parent 0150a10630
commit 26148120b3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 28 additions and 20 deletions

View File

@ -1,4 +1,4 @@
# This script build the ROCm docker image and runs test inside it. # This script runs test inside the corresponding ROCm docker container.
set -ex set -ex
# Print ROCm version # Print ROCm version
@ -19,15 +19,16 @@ done
echo "--- Building container" echo "--- Building container"
sha=$(git rev-parse --short HEAD) sha=$(git rev-parse --short HEAD)
container_name=rocm_${sha} image_name=rocm_${sha}
container_name=rocm_${sha}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)
docker build \ docker build \
-t ${container_name} \ -t ${image_name} \
-f Dockerfile.rocm \ -f Dockerfile.rocm \
--progress plain \ --progress plain \
. .
remove_docker_container() { remove_docker_container() {
docker rm -f ${container_name} || docker image rm -f ${container_name} || true docker rm -f ${container_name} || docker image rm -f ${image_name} || true
} }
trap remove_docker_container EXIT trap remove_docker_container EXIT
@ -39,6 +40,6 @@ docker run \
--rm \ --rm \
-e HF_TOKEN \ -e HF_TOKEN \
--name ${container_name} \ --name ${container_name} \
${container_name} \ ${image_name} \
/bin/bash -c "${@}" /bin/bash -c "${@}"

View File

@ -5,13 +5,16 @@
steps: steps:
- label: Regression Test - label: Regression Test
mirror_hardwares: [amd]
command: pytest -v -s test_regression.py command: pytest -v -s test_regression.py
working_dir: "/vllm-workspace/tests" # optional working_dir: "/vllm-workspace/tests" # optional
- label: AsyncEngine Test - label: AsyncEngine Test
#mirror_hardwares: [amd]
command: pytest -v -s async_engine command: pytest -v -s async_engine
- label: Basic Correctness Test - label: Basic Correctness Test
mirror_hardwares: [amd]
commands: commands:
- VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py
- VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py
@ -24,14 +27,15 @@ steps:
command: pytest -v -s core command: pytest -v -s core
- label: Distributed Comm Ops Test - label: Distributed Comm Ops Test
#mirror_hardwares: [amd]
command: pytest -v -s distributed/test_comm_ops.py command: pytest -v -s distributed/test_comm_ops.py
working_dir: "/vllm-workspace/tests" working_dir: "/vllm-workspace/tests"
num_gpus: 2 num_gpus: 2
- label: Distributed Tests - label: Distributed Tests
mirror_hardwares: [amd]
working_dir: "/vllm-workspace/tests" working_dir: "/vllm-workspace/tests"
num_gpus: 2 num_gpus: 2
mirror_hardwares: [amd]
commands: commands:
- pytest -v -s distributed/test_pynccl_library.py - pytest -v -s distributed/test_pynccl_library.py
- TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
@ -45,16 +49,18 @@ steps:
- pytest -v -s spec_decode/e2e/test_integration_dist.py - pytest -v -s spec_decode/e2e/test_integration_dist.py
- label: Distributed Tests (Multiple Groups) - label: Distributed Tests (Multiple Groups)
#mirror_hardwares: [amd]
working_dir: "/vllm-workspace/tests" working_dir: "/vllm-workspace/tests"
num_gpus: 4 num_gpus: 4
commands: commands:
- pytest -v -s distributed/test_pynccl.py - pytest -v -s distributed/test_pynccl.py
- label: Engine Test - label: Engine Test
#mirror_hardwares: [amd] mirror_hardwares: [amd]
command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py
- label: Entrypoints Test - label: Entrypoints Test
#mirror_hardwares: [amd]
commands: commands:
# these tests have to be separated, because each one will allocate all posible GPU memory # these tests have to be separated, because each one will allocate all posible GPU memory
- pytest -v -s entrypoints --ignore=entrypoints/test_server_oot_registration.py - pytest -v -s entrypoints --ignore=entrypoints/test_server_oot_registration.py
@ -74,6 +80,7 @@ steps:
- python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors - python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
- label: Kernels Test %N - label: Kernels Test %N
#mirror_hardwares: [amd]
command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
parallelism: 4 parallelism: 4
@ -84,7 +91,7 @@ steps:
- pytest -v -s models --ignore=models/test_llava.py - pytest -v -s models --ignore=models/test_llava.py
- label: Llava Test - label: Llava Test
#mirror_hardwares: [amd] mirror_hardwares: [amd]
commands: commands:
- bash ../.buildkite/download-images.sh - bash ../.buildkite/download-images.sh
- pytest -v -s models/test_llava.py - pytest -v -s models/test_llava.py
@ -95,6 +102,7 @@ steps:
- pytest -v -s prefix_caching - pytest -v -s prefix_caching
- label: Samplers Test - label: Samplers Test
#mirror_hardwares: [amd]
command: pytest -v -s samplers command: pytest -v -s samplers
- label: LogitsProcessor Test - label: LogitsProcessor Test
@ -110,16 +118,20 @@ steps:
command: pytest -v -s spec_decode command: pytest -v -s spec_decode
- label: LoRA Test %N - label: LoRA Test %N
#mirror_hardwares: [amd]
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
parallelism: 4 parallelism: 4
- label: Tensorizer Test - label: Tensorizer Test
#mirror_hardwares: [amd]
command: apt-get install curl libsodium23 && pytest -v -s tensorizer_loader command: apt-get install curl libsodium23 && pytest -v -s tensorizer_loader
- label: Metrics Test - label: Metrics Test
mirror_hardwares: [amd]
command: pytest -v -s metrics command: pytest -v -s metrics
- label: Quantization Test - label: Quantization Test
#mirror_hardwares: [amd]
command: pytest -v -s quantization command: pytest -v -s quantization
- label: Benchmarks - label: Benchmarks

View File

@ -3,9 +3,8 @@
{% set default_working_dir = "/vllm-workspace/tests" %} {% set default_working_dir = "/vllm-workspace/tests" %}
steps: steps:
- label: ":docker: build image" - label: ":docker: build image"
commands: commands:
- "docker build --build-arg max_jobs=16 --tag {{ docker_image }} --target test --progress plain ." - "docker build --build-arg max_jobs=16 --tag {{ docker_image }} --target test --progress plain ."
- "docker push {{ docker_image }}" - "docker push {{ docker_image }}"
env: env:

View File

@ -32,6 +32,7 @@ def test_stop_reason(vllm_model, example_prompts):
# test stop token # test stop token
outputs = llm.generate(example_prompts, outputs = llm.generate(example_prompts,
sampling_params=SamplingParams( sampling_params=SamplingParams(
ignore_eos=True,
seed=SEED, seed=SEED,
max_tokens=MAX_TOKENS, max_tokens=MAX_TOKENS,
stop_token_ids=[stop_token_id])) stop_token_ids=[stop_token_id]))
@ -43,7 +44,10 @@ def test_stop_reason(vllm_model, example_prompts):
# test stop string # test stop string
outputs = llm.generate(example_prompts, outputs = llm.generate(example_prompts,
sampling_params=SamplingParams( sampling_params=SamplingParams(
seed=SEED, max_tokens=MAX_TOKENS, stop=".")) ignore_eos=True,
seed=SEED,
max_tokens=MAX_TOKENS,
stop="."))
for output in outputs: for output in outputs:
output = output.outputs[0] output = output.outputs[0]
assert output.finish_reason == "stop" assert output.finish_reason == "stop"

View File

@ -1060,7 +1060,7 @@ _STR_DTYPE_TO_TORCH_DTYPE = {
"bfloat16": torch.bfloat16, "bfloat16": torch.bfloat16,
} }
_ROCM_NOT_SUPPORTED_DTYPE = ["float", "float32"] _ROCM_NOT_SUPPORTED_DTYPE: List[str] = [] #
def _get_and_verify_dtype( def _get_and_verify_dtype(
@ -1092,14 +1092,6 @@ def _get_and_verify_dtype(
else: else:
raise ValueError(f"Unknown dtype: {dtype}") raise ValueError(f"Unknown dtype: {dtype}")
if is_hip() and torch_dtype == torch.float32:
rocm_supported_dtypes = [
k for k, v in _STR_DTYPE_TO_TORCH_DTYPE.items()
if (k not in _ROCM_NOT_SUPPORTED_DTYPE)
]
raise ValueError(f"dtype '{dtype}' is not supported in ROCm. "
f"Supported dtypes are {rocm_supported_dtypes}")
# Verify the dtype. # Verify the dtype.
if torch_dtype != config_dtype: if torch_dtype != config_dtype:
if torch_dtype == torch.float32: if torch_dtype == torch.float32: