diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index fbfc923998f89..151bb6abb0905 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -15,6 +15,21 @@ steps: env: DOCKER_BUILDKIT: "1" + - label: "Build arm64 wheel - CUDA 13.0" + depends_on: ~ + id: build-wheel-arm64-cuda-13-0 + agents: + queue: arm64_cpu_queue_postmerge + commands: + # #NOTE: torch_cuda_arch_list is derived from upstream PyTorch build files here: + # https://github.com/pytorch/pytorch/blob/main/.ci/aarch64_linux/aarch64_ci_build.sh#L7 + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." + - "mkdir artifacts" + - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" + - "bash .buildkite/scripts/upload-wheels.sh manylinux_2_35" + env: + DOCKER_BUILDKIT: "1" + # aarch64 build - label: "Build arm64 CPU wheel" depends_on: ~ @@ -25,7 +40,7 @@ steps: - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_BUILD_ACL=ON --tag vllm-ci:build-image --target vllm-build --progress plain -f docker/Dockerfile.cpu ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - - "bash .buildkite/scripts/upload-wheels.sh" + - "bash .buildkite/scripts/upload-wheels.sh manylinux_2_35" env: DOCKER_BUILDKIT: "1" @@ -39,7 +54,7 @@ steps: - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - - "bash .buildkite/scripts/upload-wheels.sh" + - "bash .buildkite/scripts/upload-wheels.sh manylinux_2_31" env: DOCKER_BUILDKIT: "1" @@ -52,7 +67,7 @@ steps: - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - - "bash .buildkite/scripts/upload-wheels.sh" + - "bash .buildkite/scripts/upload-wheels.sh manylinux_2_35" env: DOCKER_BUILDKIT: "1" diff --git a/.buildkite/scripts/generate-nightly-index.py b/.buildkite/scripts/generate-nightly-index.py index f10cb2f0b6e21..d0965fbd56405 100644 --- a/.buildkite/scripts/generate-nightly-index.py +++ b/.buildkite/scripts/generate-nightly-index.py @@ -372,6 +372,17 @@ if __name__ == "__main__": print(f"Found {len(wheel_files)} wheel files for version {version}: {wheel_files}") + # keep only "official" files for a non-nightly version (specifed by cli args) + PY_VERSION_RE = re.compile(r"^\d+\.\d+\.\d+([a-zA-Z0-9.+-]*)?$") + if PY_VERSION_RE.match(version): + # upload-wheels.sh ensures no "dev" is in args.version + wheel_files = list( + filter(lambda x: version in x and "dev" not in x, wheel_files) + ) + print(f"Non-nightly version detected, wheel files used: {wheel_files}") + else: + print("Nightly version detected, keeping all wheel files.") + # Generate index and metadata, assuming wheels and indices are stored as: # s3://vllm-wheels/{version}/ # s3://vllm-wheels// diff --git a/.buildkite/scripts/upload-wheels.sh b/.buildkite/scripts/upload-wheels.sh index 8e38ace0bfbc2..3a218a4bb2e6d 100644 --- a/.buildkite/scripts/upload-wheels.sh +++ b/.buildkite/scripts/upload-wheels.sh @@ -34,9 +34,10 @@ if [[ ${#wheel_files[@]} -ne 1 ]]; then fi wheel="${wheel_files[0]}" -# current build image uses ubuntu 20.04, which corresponds to manylinux_2_31 +# default build image uses ubuntu 20.04, which corresponds to manylinux_2_31 +# we also accept params as manylinux tag # refer to https://github.com/mayeut/pep600_compliance?tab=readme-ov-file#acceptable-distros-to-build-wheels -manylinux_version="manylinux_2_31" +manylinux_version="${1:-manylinux_2_31}" # Rename 'linux' to the appropriate manylinux version in the wheel filename if [[ "$wheel" != *"linux"* ]]; then @@ -96,8 +97,11 @@ if [[ "$BUILDKITE_BRANCH" == "main" && "$BUILDKITE_PULL_REQUEST" == "false" ]]; aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "s3://$BUCKET/nightly/" fi -# copy to // only if it does not have "dev" in the version +# re-generate and copy to // only if it does not have "dev" in the version if [[ "$version" != *"dev"* ]]; then - echo "Uploading indices to overwrite /$pure_version/" + echo "Re-generating indices for /$pure_version/" + rm -rf "$INDICES_OUTPUT_DIR/*" + mkdir -p "$INDICES_OUTPUT_DIR" + $PYTHON .buildkite/scripts/generate-nightly-index.py --version "$pure_version" --current-objects "$obj_json" --output-dir "$INDICES_OUTPUT_DIR" --comment "version $pure_version" $alias_arg aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "s3://$BUCKET/$pure_version/" fi diff --git a/tests/standalone_tests/python_only_compile.sh b/tests/standalone_tests/python_only_compile.sh index d29b9afcc6fbf..2017e34030d60 100644 --- a/tests/standalone_tests/python_only_compile.sh +++ b/tests/standalone_tests/python_only_compile.sh @@ -3,12 +3,45 @@ # for users who do not have any compilers installed on their system set -e -set -x merge_base_commit=$(git merge-base HEAD origin/main) -echo "Current merge base commit with main: $merge_base_commit" +echo "INFO: current merge base commit with main: $merge_base_commit" git show --oneline -s $merge_base_commit +# test whether the metadata.json url is valid, retry each 3 minutes up to 5 times +# this avoids cumbersome error messages & manual retries in case the precompiled wheel +# for the given commit is still being built in the release pipeline +meta_json_url="https://wheels.vllm.ai/$merge_base_commit/vllm/metadata.json" +echo "INFO: will use metadata.json from $meta_json_url" + +for i in {1..5}; do + echo "Checking metadata.json URL (attempt $i)..." + if curl --fail "$meta_json_url" > metadata.json; then + echo "INFO: metadata.json URL is valid." + # check whether it is valid json by python + if python3 -m json.tool metadata.json; then + echo "INFO: metadata.json is valid JSON. Proceeding with the test." + else + echo "CRITICAL: metadata.json exists but is not valid JSON, please do report in #sig-ci channel!" + exit 1 + fi + break + fi + # failure handling + if [ $i -eq 5 ]; then + echo "ERROR: metadata.json URL is still not valid after 5 attempts." + echo "ERROR: Please check whether the precompiled wheel for commit $merge_base_commit exists." + echo " NOTE: If $merge_base_commit is a new commit on main, maybe try again after its release pipeline finishes." + echo " NOTE: If it fails, please report in #sig-ci channel." + exit 1 + else + echo "WARNING: metadata.json URL is not valid. Retrying in 3 minutes..." + sleep 180 + fi +done + +set -x + cd /vllm-workspace/ # uninstall vllm @@ -29,6 +62,6 @@ python3 -c 'import vllm' # Check if the clangd log file was created if [ ! -f /tmp/changed.file ]; then - echo "changed.file was not created, python only compilation failed" + echo "ERROR: changed.file was not created, python only compilation failed" exit 1 fi