Upgrade FlashInfer to v0.3.0 (#24086)

Signed-off-by: Po-Han Huang <pohanh@nvidia.com> Co-authored-by: Simon Mo <simon.mo@hey.com>
2025-12-11 12:19:49 +08:00 · 2025-09-05 00:49:20 +08:00 · 2025-09-05 00:49:20 +08:00 · 78336a0c3e
commit 78336a0c3e
parent 94866d7c93
2 changed files with 2 additions and 2 deletions
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -375,7 +375,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
 # Install FlashInfer from source
 ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
 # Keep this in sync with "flashinfer" extra in setup.py
-ARG FLASHINFER_GIT_REF="v0.2.14.post1"
+ARG FLASHINFER_GIT_REF="v0.3.0"
 # Flag to control whether to compile FlashInfer AOT kernels
 # Set to "true" to enable AOT compilation:
 # docker build --build-arg FLASHINFER_AOT_COMPILE=true ...
--- a/setup.py
+++ b/setup.py
@ -694,7 +694,7 @@ setup(
                  "mistral_common[audio]"],  # Required for audio processing
        "video": [],  # Kept for backwards compatibility
        # FlashInfer should be updated together with the Dockerfile
-        "flashinfer": ["flashinfer-python==0.2.14.post1"],
+        "flashinfer": ["flashinfer-python==0.3.0"],
        # Optional deps for AMD FP4 quantization support
        "petit-kernel": ["petit-kernel"],
    },