mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-17 11:15:02 +08:00
fix gh200 tests on main (#11246)
Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
parent
88a412ed3d
commit
35bae114a8
@ -6,8 +6,8 @@ set -ex
|
|||||||
|
|
||||||
# Try building the docker image
|
# Try building the docker image
|
||||||
DOCKER_BUILDKIT=1 docker build . \
|
DOCKER_BUILDKIT=1 docker build . \
|
||||||
--target test \
|
--target vllm-openai \
|
||||||
-platform "linux/arm64" \
|
--platform "linux/arm64" \
|
||||||
-t gh200-test \
|
-t gh200-test \
|
||||||
--build-arg max_jobs=66 \
|
--build-arg max_jobs=66 \
|
||||||
--build-arg nvcc_threads=2 \
|
--build-arg nvcc_threads=2 \
|
||||||
|
|||||||
@ -54,16 +54,13 @@ of PyTorch Nightly and should be considered **experimental**. Using the flag `--
|
|||||||
# Example of building on Nvidia GH200 server. (Memory usage: ~12GB, Build time: ~1475s / ~25 min, Image size: 7.26GB)
|
# Example of building on Nvidia GH200 server. (Memory usage: ~12GB, Build time: ~1475s / ~25 min, Image size: 7.26GB)
|
||||||
$ DOCKER_BUILDKIT=1 sudo docker build . \
|
$ DOCKER_BUILDKIT=1 sudo docker build . \
|
||||||
--target vllm-openai \
|
--target vllm-openai \
|
||||||
-platform "linux/arm64" \
|
--platform "linux/arm64" \
|
||||||
-t vllm/vllm-gh200-openai:latest \
|
-t vllm/vllm-gh200-openai:latest \
|
||||||
--build-arg max_jobs=66 \
|
--build-arg max_jobs=66 \
|
||||||
--build-arg nvcc_threads=2 \
|
--build-arg nvcc_threads=2 \
|
||||||
--build-arg torch_cuda_arch_list="9.0+PTX" \
|
--build-arg torch_cuda_arch_list="9.0+PTX" \
|
||||||
--build-arg vllm_fa_cmake_gpu_arches="90-real"
|
--build-arg vllm_fa_cmake_gpu_arches="90-real"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
To run vLLM:
|
To run vLLM:
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user