From b15005dc12256f1fc4e4af0d470038864e1a9559 Mon Sep 17 00:00:00 2001 From: mgoin Date: Wed, 30 Jul 2025 12:41:14 -0400 Subject: [PATCH] Simplify! Signed-off-by: mgoin --- .buildkite/release-pipeline.yaml | 32 ++------- .buildkite/scripts/build-flashinfer-wheel.sh | 30 --------- .../scripts/build-upload-flashinfer-wheel.sh | 58 +++++++++++++++++ .../scripts/upload-flashinfer-wheels.sh | 65 ------------------- docker/Dockerfile | 8 +-- ...uild-flashinfer.sh => flashinfer-build.sh} | 0 6 files changed, 68 insertions(+), 125 deletions(-) delete mode 100755 .buildkite/scripts/build-flashinfer-wheel.sh create mode 100755 .buildkite/scripts/build-upload-flashinfer-wheel.sh delete mode 100755 .buildkite/scripts/upload-flashinfer-wheels.sh rename tools/{build-flashinfer.sh => flashinfer-build.sh} (100%) diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index 47866b209e69a..6587b7b512a0a 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -41,37 +41,17 @@ steps: env: DOCKER_BUILDKIT: "1" - - block: "Build FlashInfer wheels" - key: block-build-flashinfer-wheels + - block: "Build FlashInfer wheel" + key: block-build-flashinfer-wheel depends_on: ~ - - label: "Build FlashInfer wheels - CUDA 12.8" - depends_on: block-build-flashinfer-wheels - id: build-flashinfer-wheel-cuda-12-8 + - label: "Build and upload FlashInfer wheel - CUDA 12.8" + depends_on: block-build-flashinfer-wheel + id: build-upload-flashinfer-wheel agents: queue: cpu_queue_postmerge commands: - - "bash .buildkite/scripts/build-flashinfer-wheel.sh 12.8.1" - env: - DOCKER_BUILDKIT: "1" - - - label: "Build FlashInfer wheels - CUDA 12.6" - depends_on: block-build-flashinfer-wheels - id: build-flashinfer-wheel-cuda-12-6 - agents: - queue: cpu_queue_postmerge - commands: - - "bash .buildkite/scripts/build-flashinfer-wheel.sh 12.6.3" - env: - DOCKER_BUILDKIT: "1" - - - label: "Build FlashInfer wheels - CUDA 11.8" - depends_on: block-build-flashinfer-wheels - id: build-flashinfer-wheel-cuda-11-8 - agents: - queue: cpu_queue_postmerge - commands: - - "bash .buildkite/scripts/build-flashinfer-wheel.sh 11.8.0" + - "bash .buildkite/scripts/build-upload-flashinfer-wheel.sh 12.8.1" env: DOCKER_BUILDKIT: "1" diff --git a/.buildkite/scripts/build-flashinfer-wheel.sh b/.buildkite/scripts/build-flashinfer-wheel.sh deleted file mode 100755 index c941bff0fc3e4..0000000000000 --- a/.buildkite/scripts/build-flashinfer-wheel.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash - -set -ex - -CUDA_VERSION="${1:-12.8.1}" -FLASHINFER_VERSION="${FLASHINFER_VERSION:-v0.2.9rc2}" - -echo "Building FlashInfer wheel for CUDA ${CUDA_VERSION} using vLLM Dockerfile" - -# Build the FlashInfer wheel using the existing Dockerfile stage -DOCKER_BUILDKIT=1 docker build \ - --build-arg max_jobs=16 \ - --build-arg USE_SCCACHE=1 \ - --build-arg CUDA_VERSION="${CUDA_VERSION}" \ - --build-arg FLASHINFER_GIT_REF="${FLASHINFER_VERSION}" \ - --tag flashinfer-wheel-builder:${CUDA_VERSION} \ - --target flashinfer-wheel-builder \ - --progress plain \ - -f docker/Dockerfile . - -# Extract the wheel -mkdir -p artifacts/dist -docker run --rm -v $(pwd)/artifacts:/output_host flashinfer-wheel-builder:${CUDA_VERSION} \ - bash -c 'cp /output/*.whl /output_host/dist/ && chmod -R a+rw /output_host' - -# Upload the wheel -bash .buildkite/scripts/upload-flashinfer-wheels.sh - -echo "FlashInfer wheel built and uploaded successfully for CUDA ${CUDA_VERSION}" -ls -la artifacts/dist/ \ No newline at end of file diff --git a/.buildkite/scripts/build-upload-flashinfer-wheel.sh b/.buildkite/scripts/build-upload-flashinfer-wheel.sh new file mode 100755 index 0000000000000..762cbe0484955 --- /dev/null +++ b/.buildkite/scripts/build-upload-flashinfer-wheel.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +set -ex + +CUDA_VERSION="${1:-12.8.1}" +FLASHINFER_VERSION="${FLASHINFER_VERSION:-v0.2.9rc2}" + +echo "Building FlashInfer wheel for CUDA ${CUDA_VERSION} using vLLM Dockerfile" + +# Build the FlashInfer wheel using the existing Dockerfile stage +DOCKER_BUILDKIT=1 docker build \ + --build-arg max_jobs=16 \ + --build-arg USE_SCCACHE=1 \ + --build-arg CUDA_VERSION="${CUDA_VERSION}" \ + --build-arg FLASHINFER_GIT_REF="${FLASHINFER_VERSION}" \ + --tag flashinfer-wheel-builder:${CUDA_VERSION} \ + --target flashinfer-wheel-builder \ + --progress plain \ + -f docker/Dockerfile . + +# Extract the wheel +mkdir -p artifacts/dist +docker run --rm -v $(pwd)/artifacts:/output_host flashinfer-wheel-builder:${CUDA_VERSION} \ + bash -c 'cp /output/*.whl /output_host/dist/ && chmod -R a+rw /output_host' + +# Upload the wheel to S3 +echo "Uploading FlashInfer wheel to S3..." +wheel_files=(artifacts/dist/*.whl) + +# Check that exactly one wheel is found +if [[ ${#wheel_files[@]} -ne 1 ]]; then + echo "Error: Expected exactly one wheel file in artifacts/dist/, but found ${#wheel_files[@]}" + exit 1 +fi + +# Get the single wheel file +wheel="${wheel_files[0]}" +echo "Processing FlashInfer wheel: $wheel" + +# Rename 'linux' to 'manylinux1' in the wheel filename for compatibility +new_wheel="${wheel/linux/manylinux1}" +if [[ "$wheel" != "$new_wheel" ]]; then + mv -- "$wheel" "$new_wheel" + wheel="$new_wheel" + echo "Renamed wheel to: $wheel" +fi + +# Extract the version from the wheel +version=$(unzip -p "$wheel" '**/METADATA' | grep '^Version: ' | cut -d' ' -f2) +wheel_name=$(basename "$wheel") +echo "FlashInfer version: $version" + +# Upload the wheel to S3 under flashinfer directory +aws s3 cp "$wheel" "s3://vllm-wheels/flashinfer/" + +echo "✅ FlashInfer wheel built and uploaded successfully for CUDA ${CUDA_VERSION}" +echo "📦 Wheel: $wheel_name (version $version)" +ls -la artifacts/dist/ \ No newline at end of file diff --git a/.buildkite/scripts/upload-flashinfer-wheels.sh b/.buildkite/scripts/upload-flashinfer-wheels.sh deleted file mode 100755 index 2e98578b7b8e2..0000000000000 --- a/.buildkite/scripts/upload-flashinfer-wheels.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env bash - -set -ex - -# Assume wheels are in artifacts/dist/*.whl -wheel_files=(artifacts/dist/*.whl) - -# Check that exactly one wheel is found -if [[ ${#wheel_files[@]} -ne 1 ]]; then - echo "Error: Expected exactly one wheel file in artifacts/dist/, but found ${#wheel_files[@]}" - exit 1 -fi - -# Get the single wheel file -wheel="${wheel_files[0]}" - -echo "Processing FlashInfer wheel: $wheel" - -# Rename 'linux' to 'manylinux1' in the wheel filename for compatibility -new_wheel="${wheel/linux/manylinux1}" -if [[ "$wheel" != "$new_wheel" ]]; then - mv -- "$wheel" "$new_wheel" - wheel="$new_wheel" - echo "Renamed wheel to: $wheel" -fi - -# Extract the version from the wheel -version=$(unzip -p "$wheel" '**/METADATA' | grep '^Version: ' | cut -d' ' -f2) -wheel_name=$(basename "$wheel") -echo "FlashInfer version: $version" - -# Upload the wheel to S3 under flashinfer directory -aws s3 cp "$wheel" "s3://vllm-wheels/flashinfer/" - -# Download existing index if it exists, then rebuild it with all wheels -echo "Rebuilding index with all available wheels..." -aws s3 ls s3://vllm-wheels/flashinfer/ --recursive | grep '\.whl$' | awk '{print $4}' | sed 's|flashinfer/||' > wheel_list.txt - -# Generate complete index.html for the package (following pip index pattern) -cat > flashinfer_index.html << 'EOF' - - -Links for flashinfer-python - -

Links for flashinfer-python

-EOF - -# Add each wheel to the index -while IFS= read -r wheel_file; do - if [[ -n "$wheel_file" ]]; then - echo "$wheel_file
" >> flashinfer_index.html - fi -done < wheel_list.txt - -cat >> flashinfer_index.html << 'EOF' - - -EOF - -aws s3 cp flashinfer_index.html "s3://vllm-wheels/flashinfer/index.html" - -# Clean up -rm -f flashinfer_index.html wheel_list.txt - -echo "Successfully uploaded FlashInfer wheel $wheel_name (version $version)" \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile index 9d8c024126fa9..3ce13d52d9b2f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -274,10 +274,10 @@ ARG CUDA_VERSION ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git" ARG FLASHINFER_GIT_REF="v0.2.9rc2" -COPY tools/build-flashinfer.sh /tmp/build-flashinfer.sh +COPY tools/flashinfer-build.sh /tmp/flashinfer-build.sh RUN --mount=type=cache,target=/root/.cache/uv \ . /etc/environment && \ - BUILD_WHEEL=true /tmp/build-flashinfer.sh + BUILD_WHEEL=true /tmp/flashinfer-build.sh #################### EXTENSION Build IMAGE #################### @@ -406,10 +406,10 @@ ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git" # Keep this in sync with https://github.com/vllm-project/vllm/blob/main/requirements/cuda.txt # We use `--force-reinstall --no-deps` to avoid issues with the existing FlashInfer wheel. ARG FLASHINFER_GIT_REF="v0.2.9rc2" -COPY tools/build-flashinfer.sh /tmp/build-flashinfer.sh +COPY tools/flashinfer-build.sh /tmp/flashinfer-build.sh RUN --mount=type=cache,target=/root/.cache/uv \ . /etc/environment && \ - /tmp/build-flashinfer.sh + /tmp/flashinfer-build.sh COPY examples examples COPY benchmarks benchmarks COPY ./vllm/collect_env.py . diff --git a/tools/build-flashinfer.sh b/tools/flashinfer-build.sh similarity index 100% rename from tools/build-flashinfer.sh rename to tools/flashinfer-build.sh