mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-28 05:07:04 +08:00
[CI] refine more logic when generating and using nightly wheels & indices, add cuda130 build for aarch64, specify correct manylinux version (#30341)
Signed-off-by: Shengqi Chen <harry-chen@outlook.com>
This commit is contained in:
parent
93db3256a4
commit
305b168a9f
@ -15,6 +15,21 @@ steps:
|
|||||||
env:
|
env:
|
||||||
DOCKER_BUILDKIT: "1"
|
DOCKER_BUILDKIT: "1"
|
||||||
|
|
||||||
|
- label: "Build arm64 wheel - CUDA 13.0"
|
||||||
|
depends_on: ~
|
||||||
|
id: build-wheel-arm64-cuda-13-0
|
||||||
|
agents:
|
||||||
|
queue: arm64_cpu_queue_postmerge
|
||||||
|
commands:
|
||||||
|
# #NOTE: torch_cuda_arch_list is derived from upstream PyTorch build files here:
|
||||||
|
# https://github.com/pytorch/pytorch/blob/main/.ci/aarch64_linux/aarch64_ci_build.sh#L7
|
||||||
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
|
||||||
|
- "mkdir artifacts"
|
||||||
|
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
||||||
|
- "bash .buildkite/scripts/upload-wheels.sh manylinux_2_35"
|
||||||
|
env:
|
||||||
|
DOCKER_BUILDKIT: "1"
|
||||||
|
|
||||||
# aarch64 build
|
# aarch64 build
|
||||||
- label: "Build arm64 CPU wheel"
|
- label: "Build arm64 CPU wheel"
|
||||||
depends_on: ~
|
depends_on: ~
|
||||||
@ -25,7 +40,7 @@ steps:
|
|||||||
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_BUILD_ACL=ON --tag vllm-ci:build-image --target vllm-build --progress plain -f docker/Dockerfile.cpu ."
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_BUILD_ACL=ON --tag vllm-ci:build-image --target vllm-build --progress plain -f docker/Dockerfile.cpu ."
|
||||||
- "mkdir artifacts"
|
- "mkdir artifacts"
|
||||||
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
||||||
- "bash .buildkite/scripts/upload-wheels.sh"
|
- "bash .buildkite/scripts/upload-wheels.sh manylinux_2_35"
|
||||||
env:
|
env:
|
||||||
DOCKER_BUILDKIT: "1"
|
DOCKER_BUILDKIT: "1"
|
||||||
|
|
||||||
@ -39,7 +54,7 @@ steps:
|
|||||||
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
|
||||||
- "mkdir artifacts"
|
- "mkdir artifacts"
|
||||||
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
||||||
- "bash .buildkite/scripts/upload-wheels.sh"
|
- "bash .buildkite/scripts/upload-wheels.sh manylinux_2_31"
|
||||||
env:
|
env:
|
||||||
DOCKER_BUILDKIT: "1"
|
DOCKER_BUILDKIT: "1"
|
||||||
|
|
||||||
@ -52,7 +67,7 @@ steps:
|
|||||||
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
|
||||||
- "mkdir artifacts"
|
- "mkdir artifacts"
|
||||||
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
||||||
- "bash .buildkite/scripts/upload-wheels.sh"
|
- "bash .buildkite/scripts/upload-wheels.sh manylinux_2_35"
|
||||||
env:
|
env:
|
||||||
DOCKER_BUILDKIT: "1"
|
DOCKER_BUILDKIT: "1"
|
||||||
|
|
||||||
|
|||||||
@ -372,6 +372,17 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
print(f"Found {len(wheel_files)} wheel files for version {version}: {wheel_files}")
|
print(f"Found {len(wheel_files)} wheel files for version {version}: {wheel_files}")
|
||||||
|
|
||||||
|
# keep only "official" files for a non-nightly version (specifed by cli args)
|
||||||
|
PY_VERSION_RE = re.compile(r"^\d+\.\d+\.\d+([a-zA-Z0-9.+-]*)?$")
|
||||||
|
if PY_VERSION_RE.match(version):
|
||||||
|
# upload-wheels.sh ensures no "dev" is in args.version
|
||||||
|
wheel_files = list(
|
||||||
|
filter(lambda x: version in x and "dev" not in x, wheel_files)
|
||||||
|
)
|
||||||
|
print(f"Non-nightly version detected, wheel files used: {wheel_files}")
|
||||||
|
else:
|
||||||
|
print("Nightly version detected, keeping all wheel files.")
|
||||||
|
|
||||||
# Generate index and metadata, assuming wheels and indices are stored as:
|
# Generate index and metadata, assuming wheels and indices are stored as:
|
||||||
# s3://vllm-wheels/{version}/<wheel files>
|
# s3://vllm-wheels/{version}/<wheel files>
|
||||||
# s3://vllm-wheels/<anything>/<index files>
|
# s3://vllm-wheels/<anything>/<index files>
|
||||||
|
|||||||
@ -34,9 +34,10 @@ if [[ ${#wheel_files[@]} -ne 1 ]]; then
|
|||||||
fi
|
fi
|
||||||
wheel="${wheel_files[0]}"
|
wheel="${wheel_files[0]}"
|
||||||
|
|
||||||
# current build image uses ubuntu 20.04, which corresponds to manylinux_2_31
|
# default build image uses ubuntu 20.04, which corresponds to manylinux_2_31
|
||||||
|
# we also accept params as manylinux tag
|
||||||
# refer to https://github.com/mayeut/pep600_compliance?tab=readme-ov-file#acceptable-distros-to-build-wheels
|
# refer to https://github.com/mayeut/pep600_compliance?tab=readme-ov-file#acceptable-distros-to-build-wheels
|
||||||
manylinux_version="manylinux_2_31"
|
manylinux_version="${1:-manylinux_2_31}"
|
||||||
|
|
||||||
# Rename 'linux' to the appropriate manylinux version in the wheel filename
|
# Rename 'linux' to the appropriate manylinux version in the wheel filename
|
||||||
if [[ "$wheel" != *"linux"* ]]; then
|
if [[ "$wheel" != *"linux"* ]]; then
|
||||||
@ -96,8 +97,11 @@ if [[ "$BUILDKITE_BRANCH" == "main" && "$BUILDKITE_PULL_REQUEST" == "false" ]];
|
|||||||
aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "s3://$BUCKET/nightly/"
|
aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "s3://$BUCKET/nightly/"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# copy to /<pure_version>/ only if it does not have "dev" in the version
|
# re-generate and copy to /<pure_version>/ only if it does not have "dev" in the version
|
||||||
if [[ "$version" != *"dev"* ]]; then
|
if [[ "$version" != *"dev"* ]]; then
|
||||||
echo "Uploading indices to overwrite /$pure_version/"
|
echo "Re-generating indices for /$pure_version/"
|
||||||
|
rm -rf "$INDICES_OUTPUT_DIR/*"
|
||||||
|
mkdir -p "$INDICES_OUTPUT_DIR"
|
||||||
|
$PYTHON .buildkite/scripts/generate-nightly-index.py --version "$pure_version" --current-objects "$obj_json" --output-dir "$INDICES_OUTPUT_DIR" --comment "version $pure_version" $alias_arg
|
||||||
aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "s3://$BUCKET/$pure_version/"
|
aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "s3://$BUCKET/$pure_version/"
|
||||||
fi
|
fi
|
||||||
|
|||||||
@ -3,12 +3,45 @@
|
|||||||
# for users who do not have any compilers installed on their system
|
# for users who do not have any compilers installed on their system
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
set -x
|
|
||||||
|
|
||||||
merge_base_commit=$(git merge-base HEAD origin/main)
|
merge_base_commit=$(git merge-base HEAD origin/main)
|
||||||
echo "Current merge base commit with main: $merge_base_commit"
|
echo "INFO: current merge base commit with main: $merge_base_commit"
|
||||||
git show --oneline -s $merge_base_commit
|
git show --oneline -s $merge_base_commit
|
||||||
|
|
||||||
|
# test whether the metadata.json url is valid, retry each 3 minutes up to 5 times
|
||||||
|
# this avoids cumbersome error messages & manual retries in case the precompiled wheel
|
||||||
|
# for the given commit is still being built in the release pipeline
|
||||||
|
meta_json_url="https://wheels.vllm.ai/$merge_base_commit/vllm/metadata.json"
|
||||||
|
echo "INFO: will use metadata.json from $meta_json_url"
|
||||||
|
|
||||||
|
for i in {1..5}; do
|
||||||
|
echo "Checking metadata.json URL (attempt $i)..."
|
||||||
|
if curl --fail "$meta_json_url" > metadata.json; then
|
||||||
|
echo "INFO: metadata.json URL is valid."
|
||||||
|
# check whether it is valid json by python
|
||||||
|
if python3 -m json.tool metadata.json; then
|
||||||
|
echo "INFO: metadata.json is valid JSON. Proceeding with the test."
|
||||||
|
else
|
||||||
|
echo "CRITICAL: metadata.json exists but is not valid JSON, please do report in #sig-ci channel!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
# failure handling
|
||||||
|
if [ $i -eq 5 ]; then
|
||||||
|
echo "ERROR: metadata.json URL is still not valid after 5 attempts."
|
||||||
|
echo "ERROR: Please check whether the precompiled wheel for commit $merge_base_commit exists."
|
||||||
|
echo " NOTE: If $merge_base_commit is a new commit on main, maybe try again after its release pipeline finishes."
|
||||||
|
echo " NOTE: If it fails, please report in #sig-ci channel."
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
echo "WARNING: metadata.json URL is not valid. Retrying in 3 minutes..."
|
||||||
|
sleep 180
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
set -x
|
||||||
|
|
||||||
cd /vllm-workspace/
|
cd /vllm-workspace/
|
||||||
|
|
||||||
# uninstall vllm
|
# uninstall vllm
|
||||||
@ -29,6 +62,6 @@ python3 -c 'import vllm'
|
|||||||
|
|
||||||
# Check if the clangd log file was created
|
# Check if the clangd log file was created
|
||||||
if [ ! -f /tmp/changed.file ]; then
|
if [ ! -f /tmp/changed.file ]; then
|
||||||
echo "changed.file was not created, python only compilation failed"
|
echo "ERROR: changed.file was not created, python only compilation failed"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user