From 78336a0c3ee4eb9dba6e37959d926160e91623fd Mon Sep 17 00:00:00 2001 From: "Po-Han Huang (NVIDIA)" <53919306+nvpohanh@users.noreply.github.com> Date: Fri, 5 Sep 2025 00:49:20 +0800 Subject: [PATCH] Upgrade FlashInfer to v0.3.0 (#24086) Signed-off-by: Po-Han Huang Co-authored-by: Simon Mo --- docker/Dockerfile | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 6f8ca30ffd31b..b78d7d88f1f83 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -375,7 +375,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist # Install FlashInfer from source ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git" # Keep this in sync with "flashinfer" extra in setup.py -ARG FLASHINFER_GIT_REF="v0.2.14.post1" +ARG FLASHINFER_GIT_REF="v0.3.0" # Flag to control whether to compile FlashInfer AOT kernels # Set to "true" to enable AOT compilation: # docker build --build-arg FLASHINFER_AOT_COMPILE=true ... diff --git a/setup.py b/setup.py index ffe8ec4e79af7..872696b250849 100644 --- a/setup.py +++ b/setup.py @@ -694,7 +694,7 @@ setup( "mistral_common[audio]"], # Required for audio processing "video": [], # Kept for backwards compatibility # FlashInfer should be updated together with the Dockerfile - "flashinfer": ["flashinfer-python==0.2.14.post1"], + "flashinfer": ["flashinfer-python==0.3.0"], # Optional deps for AMD FP4 quantization support "petit-kernel": ["petit-kernel"], },