From 02f77807169b776346f21b49dc85014dd128236d Mon Sep 17 00:00:00 2001 From: mgoin Date: Wed, 30 Jul 2025 14:40:00 -0400 Subject: [PATCH] Consolidate args Signed-off-by: mgoin --- .buildkite/scripts/build-upload-flashinfer-wheel.sh | 3 +-- docker/Dockerfile | 7 +------ tools/flashinfer-build.sh | 5 +++-- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/.buildkite/scripts/build-upload-flashinfer-wheel.sh b/.buildkite/scripts/build-upload-flashinfer-wheel.sh index 8c7c69be5ddf5..3b5aea4c07a1a 100755 --- a/.buildkite/scripts/build-upload-flashinfer-wheel.sh +++ b/.buildkite/scripts/build-upload-flashinfer-wheel.sh @@ -3,7 +3,7 @@ set -ex CUDA_VERSION="${1:-12.8.1}" -FLASHINFER_VERSION="${FLASHINFER_VERSION:-v0.2.9rc2}" +# FlashInfer version controlled in tools/flashinfer-build.sh echo "Building FlashInfer wheel for CUDA ${CUDA_VERSION} using vLLM Dockerfile" @@ -12,7 +12,6 @@ DOCKER_BUILDKIT=1 docker build \ --build-arg max_jobs=16 \ --build-arg USE_SCCACHE=1 \ --build-arg CUDA_VERSION="${CUDA_VERSION}" \ - --build-arg FLASHINFER_GIT_REF="${FLASHINFER_VERSION}" \ --tag flashinfer-wheel-builder:${CUDA_VERSION} \ --target flashinfer-wheel-builder \ --progress plain \ diff --git a/docker/Dockerfile b/docker/Dockerfile index 3ce13d52d9b2f..e92f20bc9235d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -271,8 +271,6 @@ RUN if [ "$RUN_WHEEL_CHECK" = "true" ]; then \ #################### FLASHINFER WHEEL BUILD IMAGE #################### FROM base AS flashinfer-wheel-builder ARG CUDA_VERSION -ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git" -ARG FLASHINFER_GIT_REF="v0.2.9rc2" COPY tools/flashinfer-build.sh /tmp/flashinfer-build.sh RUN --mount=type=cache,target=/root/.cache/uv \ @@ -402,10 +400,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist # $ # upload the wheel to a public location, e.g. https://wheels.vllm.ai/flashinfer/v0.2.6.post1/flashinfer_python-0.2.6.post1-cp39-abi3-linux_x86_64.whl # Install FlashInfer from source -ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git" -# Keep this in sync with https://github.com/vllm-project/vllm/blob/main/requirements/cuda.txt -# We use `--force-reinstall --no-deps` to avoid issues with the existing FlashInfer wheel. -ARG FLASHINFER_GIT_REF="v0.2.9rc2" +# Version controlled in tools/flashinfer-build.sh - keep in sync with requirements/cuda.txt COPY tools/flashinfer-build.sh /tmp/flashinfer-build.sh RUN --mount=type=cache,target=/root/.cache/uv \ . /etc/environment && \ diff --git a/tools/flashinfer-build.sh b/tools/flashinfer-build.sh index 4182bc22bf067..a9c4931982548 100755 --- a/tools/flashinfer-build.sh +++ b/tools/flashinfer-build.sh @@ -5,8 +5,9 @@ set -ex # Build FlashInfer with AOT kernels # This script is used by both the Dockerfile and standalone wheel building -FLASHINFER_GIT_REPO="${FLASHINFER_GIT_REPO:-https://github.com/flashinfer-ai/flashinfer.git}" -FLASHINFER_GIT_REF="${FLASHINFER_GIT_REF:-v0.2.9rc2}" +# FlashInfer configuration - keep FLASHINFER_GIT_REF in sync with requirements/cuda.txt +FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git" +FLASHINFER_GIT_REF="${FLASHINFER_GIT_REF:-v0.2.9rc2}" # Must match requirements/cuda.txt CUDA_VERSION="${CUDA_VERSION:-12.8.1}" BUILD_WHEEL="${BUILD_WHEEL:-false}"