diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index 47866b209e69a..6587b7b512a0a 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -41,37 +41,17 @@ steps: env: DOCKER_BUILDKIT: "1" - - block: "Build FlashInfer wheels" - key: block-build-flashinfer-wheels + - block: "Build FlashInfer wheel" + key: block-build-flashinfer-wheel depends_on: ~ - - label: "Build FlashInfer wheels - CUDA 12.8" - depends_on: block-build-flashinfer-wheels - id: build-flashinfer-wheel-cuda-12-8 + - label: "Build and upload FlashInfer wheel - CUDA 12.8" + depends_on: block-build-flashinfer-wheel + id: build-upload-flashinfer-wheel agents: queue: cpu_queue_postmerge commands: - - "bash .buildkite/scripts/build-flashinfer-wheel.sh 12.8.1" - env: - DOCKER_BUILDKIT: "1" - - - label: "Build FlashInfer wheels - CUDA 12.6" - depends_on: block-build-flashinfer-wheels - id: build-flashinfer-wheel-cuda-12-6 - agents: - queue: cpu_queue_postmerge - commands: - - "bash .buildkite/scripts/build-flashinfer-wheel.sh 12.6.3" - env: - DOCKER_BUILDKIT: "1" - - - label: "Build FlashInfer wheels - CUDA 11.8" - depends_on: block-build-flashinfer-wheels - id: build-flashinfer-wheel-cuda-11-8 - agents: - queue: cpu_queue_postmerge - commands: - - "bash .buildkite/scripts/build-flashinfer-wheel.sh 11.8.0" + - "bash .buildkite/scripts/build-upload-flashinfer-wheel.sh 12.8.1" env: DOCKER_BUILDKIT: "1" diff --git a/.buildkite/scripts/build-flashinfer-wheel.sh b/.buildkite/scripts/build-flashinfer-wheel.sh deleted file mode 100755 index c941bff0fc3e4..0000000000000 --- a/.buildkite/scripts/build-flashinfer-wheel.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash - -set -ex - -CUDA_VERSION="${1:-12.8.1}" -FLASHINFER_VERSION="${FLASHINFER_VERSION:-v0.2.9rc2}" - -echo "Building FlashInfer wheel for CUDA ${CUDA_VERSION} using vLLM Dockerfile" - -# Build the FlashInfer wheel using the existing Dockerfile stage -DOCKER_BUILDKIT=1 docker build \ - --build-arg max_jobs=16 \ - --build-arg USE_SCCACHE=1 \ - --build-arg CUDA_VERSION="${CUDA_VERSION}" \ - --build-arg FLASHINFER_GIT_REF="${FLASHINFER_VERSION}" \ - --tag flashinfer-wheel-builder:${CUDA_VERSION} \ - --target flashinfer-wheel-builder \ - --progress plain \ - -f docker/Dockerfile . - -# Extract the wheel -mkdir -p artifacts/dist -docker run --rm -v $(pwd)/artifacts:/output_host flashinfer-wheel-builder:${CUDA_VERSION} \ - bash -c 'cp /output/*.whl /output_host/dist/ && chmod -R a+rw /output_host' - -# Upload the wheel -bash .buildkite/scripts/upload-flashinfer-wheels.sh - -echo "FlashInfer wheel built and uploaded successfully for CUDA ${CUDA_VERSION}" -ls -la artifacts/dist/ \ No newline at end of file diff --git a/.buildkite/scripts/build-upload-flashinfer-wheel.sh b/.buildkite/scripts/build-upload-flashinfer-wheel.sh new file mode 100755 index 0000000000000..762cbe0484955 --- /dev/null +++ b/.buildkite/scripts/build-upload-flashinfer-wheel.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +set -ex + +CUDA_VERSION="${1:-12.8.1}" +FLASHINFER_VERSION="${FLASHINFER_VERSION:-v0.2.9rc2}" + +echo "Building FlashInfer wheel for CUDA ${CUDA_VERSION} using vLLM Dockerfile" + +# Build the FlashInfer wheel using the existing Dockerfile stage +DOCKER_BUILDKIT=1 docker build \ + --build-arg max_jobs=16 \ + --build-arg USE_SCCACHE=1 \ + --build-arg CUDA_VERSION="${CUDA_VERSION}" \ + --build-arg FLASHINFER_GIT_REF="${FLASHINFER_VERSION}" \ + --tag flashinfer-wheel-builder:${CUDA_VERSION} \ + --target flashinfer-wheel-builder \ + --progress plain \ + -f docker/Dockerfile . + +# Extract the wheel +mkdir -p artifacts/dist +docker run --rm -v $(pwd)/artifacts:/output_host flashinfer-wheel-builder:${CUDA_VERSION} \ + bash -c 'cp /output/*.whl /output_host/dist/ && chmod -R a+rw /output_host' + +# Upload the wheel to S3 +echo "Uploading FlashInfer wheel to S3..." +wheel_files=(artifacts/dist/*.whl) + +# Check that exactly one wheel is found +if [[ ${#wheel_files[@]} -ne 1 ]]; then + echo "Error: Expected exactly one wheel file in artifacts/dist/, but found ${#wheel_files[@]}" + exit 1 +fi + +# Get the single wheel file +wheel="${wheel_files[0]}" +echo "Processing FlashInfer wheel: $wheel" + +# Rename 'linux' to 'manylinux1' in the wheel filename for compatibility +new_wheel="${wheel/linux/manylinux1}" +if [[ "$wheel" != "$new_wheel" ]]; then + mv -- "$wheel" "$new_wheel" + wheel="$new_wheel" + echo "Renamed wheel to: $wheel" +fi + +# Extract the version from the wheel +version=$(unzip -p "$wheel" '**/METADATA' | grep '^Version: ' | cut -d' ' -f2) +wheel_name=$(basename "$wheel") +echo "FlashInfer version: $version" + +# Upload the wheel to S3 under flashinfer directory +aws s3 cp "$wheel" "s3://vllm-wheels/flashinfer/" + +echo "✅ FlashInfer wheel built and uploaded successfully for CUDA ${CUDA_VERSION}" +echo "📦 Wheel: $wheel_name (version $version)" +ls -la artifacts/dist/ \ No newline at end of file diff --git a/.buildkite/scripts/upload-flashinfer-wheels.sh b/.buildkite/scripts/upload-flashinfer-wheels.sh deleted file mode 100755 index 2e98578b7b8e2..0000000000000 --- a/.buildkite/scripts/upload-flashinfer-wheels.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env bash - -set -ex - -# Assume wheels are in artifacts/dist/*.whl -wheel_files=(artifacts/dist/*.whl) - -# Check that exactly one wheel is found -if [[ ${#wheel_files[@]} -ne 1 ]]; then - echo "Error: Expected exactly one wheel file in artifacts/dist/, but found ${#wheel_files[@]}" - exit 1 -fi - -# Get the single wheel file -wheel="${wheel_files[0]}" - -echo "Processing FlashInfer wheel: $wheel" - -# Rename 'linux' to 'manylinux1' in the wheel filename for compatibility -new_wheel="${wheel/linux/manylinux1}" -if [[ "$wheel" != "$new_wheel" ]]; then - mv -- "$wheel" "$new_wheel" - wheel="$new_wheel" - echo "Renamed wheel to: $wheel" -fi - -# Extract the version from the wheel -version=$(unzip -p "$wheel" '**/METADATA' | grep '^Version: ' | cut -d' ' -f2) -wheel_name=$(basename "$wheel") -echo "FlashInfer version: $version" - -# Upload the wheel to S3 under flashinfer directory -aws s3 cp "$wheel" "s3://vllm-wheels/flashinfer/" - -# Download existing index if it exists, then rebuild it with all wheels -echo "Rebuilding index with all available wheels..." -aws s3 ls s3://vllm-wheels/flashinfer/ --recursive | grep '\.whl$' | awk '{print $4}' | sed 's|flashinfer/||' > wheel_list.txt - -# Generate complete index.html for the package (following pip index pattern) -cat > flashinfer_index.html << 'EOF' - - -