mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 19:14:57 +08:00
[Docker] Add cuda arch list as build option (#1950)
This commit is contained in:
parent
2b981012a6
commit
c85b80c2b6
@ -30,11 +30,15 @@ COPY requirements.txt requirements.txt
|
|||||||
COPY pyproject.toml pyproject.toml
|
COPY pyproject.toml pyproject.toml
|
||||||
COPY vllm/__init__.py vllm/__init__.py
|
COPY vllm/__init__.py vllm/__init__.py
|
||||||
|
|
||||||
|
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
|
||||||
|
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
|
||||||
# max jobs used by Ninja to build extensions
|
# max jobs used by Ninja to build extensions
|
||||||
ENV MAX_JOBS=$max_jobs
|
ARG max_jobs=2
|
||||||
|
ENV MAX_JOBS=${max_jobs}
|
||||||
# number of threads used by nvcc
|
# number of threads used by nvcc
|
||||||
ARG nvcc_threads=8
|
ARG nvcc_threads=8
|
||||||
ENV NVCC_THREADS=$nvcc_threads
|
ENV NVCC_THREADS=$nvcc_threads
|
||||||
|
|
||||||
RUN python3 setup.py build_ext --inplace
|
RUN python3 setup.py build_ext --inplace
|
||||||
|
|
||||||
# image to run unit testing suite
|
# image to run unit testing suite
|
||||||
|
|||||||
@ -31,6 +31,14 @@ You can build and run vLLM from source via the provided dockerfile. To build vLL
|
|||||||
|
|
||||||
$ DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai # optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2
|
$ DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai # optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2
|
||||||
|
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
By default vLLM will build for all GPU types for widest distribution. If you are just building for the
|
||||||
|
current GPU type the machine is running on, you can add the argument ``--build-arg torch_cuda_arch_list=""``
|
||||||
|
for vLLM to find the current GPU type and build for that.
|
||||||
|
|
||||||
|
|
||||||
To run vLLM:
|
To run vLLM:
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user