mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-04 18:09:08 +08:00
[CI] Add SM120 to the Dockerfile (#19794)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
2d7620c3eb
commit
296ce95d8e
@ -77,7 +77,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
|||||||
# can be useful for both `dev` and `test`
|
# can be useful for both `dev` and `test`
|
||||||
# explicitly set the list to avoid issues with torch 2.2
|
# explicitly set the list to avoid issues with torch 2.2
|
||||||
# see https://github.com/pytorch/pytorch/pull/123243
|
# see https://github.com/pytorch/pytorch/pull/123243
|
||||||
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0+PTX'
|
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
|
||||||
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
|
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
|
||||||
# Override the arch list for flash-attn to reduce the binary size
|
# Override the arch list for flash-attn to reduce the binary size
|
||||||
ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
|
ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
|
||||||
@ -244,7 +244,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
|
|||||||
|
|
||||||
# If we need to build FlashInfer wheel before its release:
|
# If we need to build FlashInfer wheel before its release:
|
||||||
# $ # Note we remove 7.0 from the arch list compared to the list below, since FlashInfer only supports sm75+
|
# $ # Note we remove 7.0 from the arch list compared to the list below, since FlashInfer only supports sm75+
|
||||||
# $ export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0a 10.0a'
|
# $ export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0a 10.0a 12.0'
|
||||||
# $ git clone https://github.com/flashinfer-ai/flashinfer.git --recursive
|
# $ git clone https://github.com/flashinfer-ai/flashinfer.git --recursive
|
||||||
# $ cd flashinfer
|
# $ cd flashinfer
|
||||||
# $ git checkout v0.2.6.post1
|
# $ git checkout v0.2.6.post1
|
||||||
@ -261,7 +261,7 @@ if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
|
|||||||
if [[ "$CUDA_VERSION" == 12.8* ]]; then \
|
if [[ "$CUDA_VERSION" == 12.8* ]]; then \
|
||||||
uv pip install --system https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.6.post1%2Bcu128torch2.7-cp39-abi3-linux_x86_64.whl; \
|
uv pip install --system https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.6.post1%2Bcu128torch2.7-cp39-abi3-linux_x86_64.whl; \
|
||||||
else \
|
else \
|
||||||
export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0a 10.0a' && \
|
export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0a 10.0a 12.0' && \
|
||||||
git clone https://github.com/flashinfer-ai/flashinfer.git --single-branch --branch v0.2.6.post1 --recursive && \
|
git clone https://github.com/flashinfer-ai/flashinfer.git --single-branch --branch v0.2.6.post1 --recursive && \
|
||||||
# Needed to build AOT kernels
|
# Needed to build AOT kernels
|
||||||
(cd flashinfer && \
|
(cd flashinfer && \
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user