Add flashinfer_python to CUDA wheel requirements (#21389)

Signed-off-by: mgoin <mgoin64@gmail.com>
2026-03-16 11:47:09 +08:00 · 2025-07-29 15:51:58 -04:00 · 2025-07-29 15:51:58 -04:00 · a33ea28b1b
commit a33ea28b1b
parent 7b49cb1c6b
2 changed files with 5 additions and 1 deletions
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -386,6 +386,8 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist

 # Install FlashInfer from source
 ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
+# Keep this in sync with https://github.com/vllm-project/vllm/blob/main/requirements/cuda.txt
+# We use `--force-reinstall --no-deps` to avoid issues with the existing FlashInfer wheel.
 ARG FLASHINFER_GIT_REF="v0.2.9rc2"
 RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
  . /etc/environment
@ -408,7 +410,7 @@ RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
        TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \
            python3 -m flashinfer.aot
        TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \
-            uv pip install --system --no-build-isolation .
+            uv pip install --system --no-build-isolation --force-reinstall --no-deps .
    popd
    rm -rf flashinfer
 BASH
--- a/requirements/cuda.txt
+++ b/requirements/cuda.txt
@ -12,3 +12,5 @@ torchaudio==2.7.1
 torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
 # https://github.com/facebookresearch/xformers/releases/tag/v0.0.31
 xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch >= 2.7
+# FlashInfer should be updated together with the Dockerfile
+flashinfer_python==0.2.9rc2