From 4d7231e7743e80078bbc68ccc37b5ba5a1f28bf5 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Fri, 21 Nov 2025 17:40:17 +0800 Subject: [PATCH] Revert #28875 (#29159) --- docker/Dockerfile | 17 +++++++++++++++++ docs/deployment/docker.md | 7 ++++--- .../installation/gpu.cuda.inc.md | 5 ++++- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 709b79e84fbbc..964700e2a43ac 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -56,6 +56,7 @@ ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} # PyTorch provides its own indexes for standard and nightly builds ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl +ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly # PIP supports multiple authentication schemes, including keyring # By parameterizing the PIP_KEYRING_PROVIDER variable and setting it to @@ -97,6 +98,7 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER # Activate virtual environment and add uv to PATH @@ -315,6 +317,7 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER # Install uv for faster pip installs @@ -334,6 +337,20 @@ ENV UV_LINK_MODE=copy # or future versions of triton. RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ +# arm64 (GH200) build follows the practice of "use existing pytorch" build, +# we need to install torch and torchvision from the nightly builds first, +# pytorch will not appear as a vLLM dependency in all of the following steps +# after this step +RUN --mount=type=cache,target=/root/.cache/uv \ + if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ + uv pip install --system \ + --index-url ${PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ + "torch==2.8.0.dev20250318+cu128" "torchvision==0.22.0.dev20250319" ; \ + uv pip install --system \ + --index-url ${PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ + --pre pytorch_triton==3.3.0+gitab727c40 ; \ + fi + # Install vllm wheel first, so that torch etc will be installed. RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ --mount=type=cache,target=/root/.cache/uv \ diff --git a/docs/deployment/docker.md b/docs/deployment/docker.md index 0e636c87f38a4..1c639f3533d47 100644 --- a/docs/deployment/docker.md +++ b/docs/deployment/docker.md @@ -82,7 +82,8 @@ DOCKER_BUILDKIT=1 docker build . \ ## Building for Arm64/aarch64 -A docker container can be built for aarch64 systems such as the Nvidia Grace-Hopper. At time of this writing, this should be considered **experimental**. Using the flag `--platform "linux/arm64"` will attempt to build for arm64. +A docker container can be built for aarch64 systems such as the Nvidia Grace-Hopper. At time of this writing, this requires the use +of PyTorch Nightly and should be considered **experimental**. Using the flag `--platform "linux/arm64"` will attempt to build for arm64. !!! note Multiple modules must be compiled, so this process can take a while. Recommend using `--build-arg max_jobs=` & `--build-arg nvcc_threads=` @@ -93,6 +94,7 @@ A docker container can be built for aarch64 systems such as the Nvidia Grace-Hop ```bash # Example of building on Nvidia GH200 server. (Memory usage: ~15GB, Build time: ~1475s / ~25 min, Image size: 6.93GB) + python3 use_existing_torch.py DOCKER_BUILDKIT=1 docker build . \ --file docker/Dockerfile \ --target vllm-openai \ @@ -100,8 +102,7 @@ A docker container can be built for aarch64 systems such as the Nvidia Grace-Hop -t vllm/vllm-gh200-openai:latest \ --build-arg max_jobs=66 \ --build-arg nvcc_threads=2 \ - --build-arg torch_cuda_arch_list="9.0 10.0+PTX" \ - --build-arg RUN_WHEEL_CHECK=false + --build-arg torch_cuda_arch_list="9.0 10.0+PTX" ``` !!! note diff --git a/docs/getting_started/installation/gpu.cuda.inc.md b/docs/getting_started/installation/gpu.cuda.inc.md index 601d3659af886..b2d0d64a2d355 100644 --- a/docs/getting_started/installation/gpu.cuda.inc.md +++ b/docs/getting_started/installation/gpu.cuda.inc.md @@ -158,7 +158,10 @@ uv pip install -e . ##### Use an existing PyTorch installation -There are scenarios where the PyTorch dependency cannot be easily installed with `uv`, for example, when building vLLM with non-default PyTorch builds (like nightly or a custom build). +There are scenarios where the PyTorch dependency cannot be easily installed with `uv`, e.g.: + +- Building vLLM with PyTorch nightly or a custom PyTorch build. +- Building vLLM with aarch64 and CUDA (GH200), where the PyTorch wheels are not available on PyPI. Currently, only the PyTorch nightly has wheels for aarch64 with CUDA. You can run `uv pip install --index-url https://download.pytorch.org/whl/nightly/cu128 torch torchvision torchaudio` to [install PyTorch nightly](https://pytorch.org/get-started/locally/) and then build vLLM on top of it. To build vLLM using an existing PyTorch installation: