mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 02:35:24 +08:00
[Docker] Adding number of nvcc_threads during build as envar (#1893)
This commit is contained in:
parent
42c02f5892
commit
24f60a54f4
@ -32,6 +32,9 @@ COPY vllm/__init__.py vllm/__init__.py
|
||||
|
||||
# max jobs used by Ninja to build extensions
|
||||
ENV MAX_JOBS=$max_jobs
|
||||
# number of threads used by nvcc
|
||||
ARG nvcc_threads=8
|
||||
ENV NVCC_THREADS=$nvcc_threads
|
||||
RUN python3 setup.py build_ext --inplace
|
||||
|
||||
# image to run unit testing suite
|
||||
|
||||
@ -29,7 +29,7 @@ You can build and run vLLM from source via the provided dockerfile. To build vLL
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai --build-arg max_jobs=8
|
||||
$ DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai # optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2
|
||||
|
||||
To run vLLM:
|
||||
|
||||
|
||||
3
setup.py
3
setup.py
@ -138,7 +138,8 @@ for capability in compute_capabilities:
|
||||
|
||||
# Use NVCC threads to parallelize the build.
|
||||
if nvcc_cuda_version >= Version("11.2"):
|
||||
num_threads = min(os.cpu_count(), 8)
|
||||
nvcc_threads = int(os.getenv("NVCC_THREADS"), 8)
|
||||
num_threads = min(os.cpu_count(), nvcc_threads)
|
||||
NVCC_FLAGS += ["--threads", str(num_threads)]
|
||||
|
||||
ext_modules = []
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user