mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 12:19:49 +08:00
Upgrade FlashInfer to v0.3.0 (#24086)
Signed-off-by: Po-Han Huang <pohanh@nvidia.com> Co-authored-by: Simon Mo <simon.mo@hey.com>
This commit is contained in:
parent
94866d7c93
commit
78336a0c3e
@ -375,7 +375,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
|
|||||||
# Install FlashInfer from source
|
# Install FlashInfer from source
|
||||||
ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
|
ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
|
||||||
# Keep this in sync with "flashinfer" extra in setup.py
|
# Keep this in sync with "flashinfer" extra in setup.py
|
||||||
ARG FLASHINFER_GIT_REF="v0.2.14.post1"
|
ARG FLASHINFER_GIT_REF="v0.3.0"
|
||||||
# Flag to control whether to compile FlashInfer AOT kernels
|
# Flag to control whether to compile FlashInfer AOT kernels
|
||||||
# Set to "true" to enable AOT compilation:
|
# Set to "true" to enable AOT compilation:
|
||||||
# docker build --build-arg FLASHINFER_AOT_COMPILE=true ...
|
# docker build --build-arg FLASHINFER_AOT_COMPILE=true ...
|
||||||
|
|||||||
2
setup.py
2
setup.py
@ -694,7 +694,7 @@ setup(
|
|||||||
"mistral_common[audio]"], # Required for audio processing
|
"mistral_common[audio]"], # Required for audio processing
|
||||||
"video": [], # Kept for backwards compatibility
|
"video": [], # Kept for backwards compatibility
|
||||||
# FlashInfer should be updated together with the Dockerfile
|
# FlashInfer should be updated together with the Dockerfile
|
||||||
"flashinfer": ["flashinfer-python==0.2.14.post1"],
|
"flashinfer": ["flashinfer-python==0.3.0"],
|
||||||
# Optional deps for AMD FP4 quantization support
|
# Optional deps for AMD FP4 quantization support
|
||||||
"petit-kernel": ["petit-kernel"],
|
"petit-kernel": ["petit-kernel"],
|
||||||
},
|
},
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user