mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-04 14:35:46 +08:00
Remove from dockerfile
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
721dcb2ebc
commit
f1fd89a9bf
@ -164,7 +164,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
|||||||
# see https://github.com/pytorch/pytorch/pull/123243
|
# see https://github.com/pytorch/pytorch/pull/123243
|
||||||
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
|
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
|
||||||
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
|
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
|
||||||
ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all -compress-mode=size"
|
|
||||||
# Override the arch list for flash-attn to reduce the binary size
|
# Override the arch list for flash-attn to reduce the binary size
|
||||||
ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
|
ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
|
||||||
ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
|
ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user