diff --git a/docker/Dockerfile b/docker/Dockerfile index 709b79e84fbb..1b937bbc1225 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -85,7 +85,7 @@ ARG GET_PIP_URL RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ && apt-get update -y \ - && apt-get install -y ccache software-properties-common git curl sudo python3-pip \ + && apt-get install -y ccache software-properties-common git curl sudo python3-pip libibverbs-dev \ && curl -LsSf https://astral.sh/uv/install.sh | sh \ && $HOME/.local/bin/uv venv /opt/venv --python ${PYTHON_VERSION} \ && rm -f /usr/bin/python3 /usr/bin/python3-config /usr/bin/pip \ @@ -224,6 +224,22 @@ RUN --mount=type=cache,target=/root/.cache/ccache \ python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \ fi +# Install DeepGEMM from source +ARG DEEPGEMM_GIT_REF +COPY tools/install_deepgemm.sh /tmp/install_deepgemm.sh +RUN --mount=type=cache,target=/root/.cache/uv \ + VLLM_DOCKER_BUILD_CONTEXT=1 TORCH_CUDA_ARCH_LIST="9.0a 10.0a" /tmp/install_deepgemm.sh --cuda-version "${CUDA_VERSION}" ${DEEPGEMM_GIT_REF:+--ref "$DEEPGEMM_GIT_REF"} --wheel-dir /tmp/deepgemm/dist + +# Ensure the wheel dir exists so later-stage COPY won't fail when DeepGEMM is skipped +RUN mkdir -p /tmp/deepgemm/dist && touch /tmp/deepgemm/dist/.deepgemm_skipped + +COPY tools/ep_kernels/install_python_libraries.sh /tmp/install_python_libraries.sh +# Install EP kernels(pplx-kernels and DeepEP) +RUN --mount=type=cache,target=/root/.cache/uv \ + export TORCH_CUDA_ARCH_LIST='9.0a 10.0a' && \ + /tmp/install_python_libraries.sh /tmp/ep_kernels_workspace wheel && \ + find /tmp/ep_kernels_workspace/nvshmem -name '*.a' -delete + # Check the size of the wheel if RUN_WHEEL_CHECK is true COPY .buildkite/check-wheel-size.py check-wheel-size.py # sync the default value with .buildkite/check-wheel-size.py @@ -289,7 +305,7 @@ RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ && apt-get update -y \ - && apt-get install -y ccache software-properties-common git curl wget sudo vim python3-pip \ + && apt-get install -y software-properties-common curl sudo python3-pip \ && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \ && if [ ! -z ${DEADSNAKES_MIRROR_URL} ] ; then \ if [ ! -z "${DEADSNAKES_GPGKEY_URL}" ] ; then \ @@ -356,36 +372,32 @@ RUN --mount=type=cache,target=/root/.cache/uv \ . /etc/environment && \ uv pip list -# Even when we build Flashinfer with AOT mode, there's still -# some issues w.r.t. JIT compilation. Therefore we need to -# install build dependencies for JIT compilation. -# TODO: Remove this once FlashInfer AOT wheel is fixed -COPY requirements/build.txt requirements/build.txt +# Install deepgemm wheel that has been built in the `build` stage RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system -r requirements/build.txt \ + --mount=type=bind,from=build,source=/tmp/deepgemm/dist,target=/tmp/deepgemm/dist,ro \ + sh -c 'if ls /tmp/deepgemm/dist/*.whl >/dev/null 2>&1; then \ + uv pip install --system /tmp/deepgemm/dist/*.whl; \ + else \ + echo "No DeepGEMM wheels to install; skipping."; \ + fi' + +# Pytorch now installs NVSHMEM, setting LD_LIBRARY_PATH (https://github.com/pytorch/pytorch/blob/d38164a545b4a4e4e0cf73ce67173f70574890b6/.ci/manywheel/build_cuda.sh#L141C14-L141C36) +ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH + +# Install EP kernels wheels (pplx-kernels and DeepEP) that have been built in the `build` stage +RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \ + --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system ep_kernels/dist/*.whl --verbose \ --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') -# Install DeepGEMM from source -ARG DEEPGEMM_GIT_REF -COPY tools/install_deepgemm.sh /tmp/install_deepgemm.sh -RUN --mount=type=cache,target=/root/.cache/uv \ - VLLM_DOCKER_BUILD_CONTEXT=1 TORCH_CUDA_ARCH_LIST="9.0a 10.0a" /tmp/install_deepgemm.sh --cuda-version "${CUDA_VERSION}" ${DEEPGEMM_GIT_REF:+--ref "$DEEPGEMM_GIT_REF"} - -COPY tools/install_gdrcopy.sh install_gdrcopy.sh -RUN set -eux; \ +RUN --mount=type=bind,source=tools/install_gdrcopy.sh,target=/tmp/install_gdrcopy.sh,ro \ + set -eux; \ case "${TARGETPLATFORM}" in \ linux/arm64) UUARCH="aarch64" ;; \ linux/amd64) UUARCH="x64" ;; \ *) echo "Unsupported TARGETPLATFORM: ${TARGETPLATFORM}" >&2; exit 1 ;; \ esac; \ - ./install_gdrcopy.sh "${GDRCOPY_OS_VERSION}" "${GDRCOPY_CUDA_VERSION}" "${UUARCH}"; \ - rm ./install_gdrcopy.sh - -# Install EP kernels(pplx-kernels and DeepEP) -COPY tools/ep_kernels/install_python_libraries.sh install_python_libraries.sh -ENV CUDA_HOME=/usr/local/cuda -RUN export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-9.0a 10.0a+PTX}" \ - && bash install_python_libraries.sh + /tmp/install_gdrcopy.sh "${GDRCOPY_OS_VERSION}" "${GDRCOPY_CUDA_VERSION}" "${UUARCH}" # CUDA image changed from /usr/local/nvidia to /usr/local/cuda in 12.8 but will # return to /usr/local/nvidia in 13.0 to allow container providers to mount drivers @@ -415,6 +427,11 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" # Use copy mode to avoid hardlink failures with Docker cache mounts ENV UV_LINK_MODE=copy +RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ + && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ + && apt-get update -y \ + && apt-get install -y git + # install development dependencies (for testing) RUN --mount=type=cache,target=/root/.cache/uv \ CUDA_MAJOR="${CUDA_VERSION%%.*}"; \ @@ -455,12 +472,11 @@ ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL # Reference: https://github.com/astral-sh/uv/pull/1694 ENV UV_HTTP_TIMEOUT=500 -COPY requirements/kv_connectors.txt requirements/kv_connectors.txt - # install additional dependencies for openai api server RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,source=requirements/kv_connectors.txt,target=/tmp/kv_connectors.txt,ro \ if [ "$INSTALL_KV_CONNECTORS" = "true" ]; then \ - uv pip install --system -r requirements/kv_connectors.txt; \ + uv pip install --system -r /tmp/kv_connectors.txt; \ fi; \ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ BITSANDBYTES_VERSION="0.42.0"; \ diff --git a/docs/assets/contributing/dockerfile-stages-dependency.png b/docs/assets/contributing/dockerfile-stages-dependency.png index f8c104ba1425..57a33524a516 100644 Binary files a/docs/assets/contributing/dockerfile-stages-dependency.png and b/docs/assets/contributing/dockerfile-stages-dependency.png differ diff --git a/tools/ep_kernels/install_python_libraries.sh b/tools/ep_kernels/install_python_libraries.sh index 5ea543f4cb1e..77af3f68a050 100755 --- a/tools/ep_kernels/install_python_libraries.sh +++ b/tools/ep_kernels/install_python_libraries.sh @@ -1,94 +1,79 @@ #!/usr/bin/env bash set -ex -# prepare workspace directory -WORKSPACE=$1 -if [ -z "$WORKSPACE" ]; then - export WORKSPACE=$(pwd)/ep_kernels_workspace -fi +# usage: ./build.sh [workspace_dir] [mode] +# mode: "install" (default) → install directly into current Python env +# "wheel" → build wheels into WORKSPACE/dist -if [ ! -d "$WORKSPACE" ]; then - mkdir -p $WORKSPACE -fi +WORKSPACE=${1:-$(pwd)/ep_kernels_workspace} +MODE=${2:-install} +mkdir -p "$WORKSPACE" + +WHEEL_DIR="$WORKSPACE/dist" +mkdir -p "$WHEEL_DIR" +NVSHMEM_VER=3.3.9 + +pushd "$WORKSPACE" -# configurable pip command (default: pip3) -PIP_CMD=${PIP_CMD:-pip3} CUDA_HOME=${CUDA_HOME:-/usr/local/cuda} # install dependencies if not installed -$PIP_CMD install cmake torch ninja - -# build nvshmem -pushd $WORKSPACE -mkdir -p nvshmem_src -wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.2.5/source/nvshmem_src_3.2.5-1.txz -tar -xvf nvshmem_src_3.2.5-1.txz -C nvshmem_src --strip-components=1 -pushd nvshmem_src -wget https://github.com/deepseek-ai/DeepEP/raw/main/third-party/nvshmem.patch -git init -git apply -vvv nvshmem.patch - -# assume CUDA_HOME is set correctly -if [ -z "$CUDA_HOME" ]; then - echo "CUDA_HOME is not set, please set it to your CUDA installation directory." - exit 1 +if [ -z "$VIRTUAL_ENV" ]; then + uv pip install --system cmake torch ninja +else + uv pip install cmake torch ninja fi -# assume TORCH_CUDA_ARCH_LIST is set correctly -if [ -z "$TORCH_CUDA_ARCH_LIST" ]; then - echo "TORCH_CUDA_ARCH_LIST is not set, please set it to your desired architecture." +# fetch nvshmem +ARCH=$(uname -m) +case "${ARCH,,}" in + x86_64|amd64) + NVSHMEM_SUBDIR="linux-x86_64" + NVSHMEM_FILE="libnvshmem-linux-x86_64-${NVSHMEM_VER}_cuda12-archive.tar.xz" + ;; + aarch64|arm64) + NVSHMEM_SUBDIR="linux-sbsa" + NVSHMEM_FILE="libnvshmem-linux-sbsa-${NVSHMEM_VER}_cuda12-archive.tar.xz" + ;; + *) + echo "Unsupported architecture: ${ARCH}" >&2 exit 1 -fi + ;; +esac -# disable all features except IBGDA -export NVSHMEM_IBGDA_SUPPORT=1 - -export NVSHMEM_SHMEM_SUPPORT=0 -export NVSHMEM_UCX_SUPPORT=0 -export NVSHMEM_USE_NCCL=0 -export NVSHMEM_PMIX_SUPPORT=0 -export NVSHMEM_TIMEOUT_DEVICE_POLLING=0 -export NVSHMEM_USE_GDRCOPY=0 -export NVSHMEM_IBRC_SUPPORT=0 -export NVSHMEM_BUILD_TESTS=0 -export NVSHMEM_BUILD_EXAMPLES=0 -export NVSHMEM_MPI_SUPPORT=0 -export NVSHMEM_BUILD_HYDRA_LAUNCHER=0 -export NVSHMEM_BUILD_TXZ_PACKAGE=0 -export NVSHMEM_TIMEOUT_DEVICE_POLLING=0 - -cmake -G Ninja -S . -B $WORKSPACE/nvshmem_build/ -DCMAKE_INSTALL_PREFIX=$WORKSPACE/nvshmem_install -cmake --build $WORKSPACE/nvshmem_build/ --target install +NVSHMEM_URL="https://developer.download.nvidia.com/compute/nvshmem/redist/libnvshmem/${NVSHMEM_SUBDIR}/${NVSHMEM_FILE}" +pushd "$WORKSPACE" +echo "Downloading NVSHMEM ${NVSHMEM_VER} for ${NVSHMEM_SUBDIR} ..." +curl -fSL "${NVSHMEM_URL}" -o "${NVSHMEM_FILE}" +tar -xf "${NVSHMEM_FILE}" +mv "${NVSHMEM_FILE%.tar.xz}" nvshmem +rm -f "${NVSHMEM_FILE}" +rm -rf nvshmem/lib/bin nvshmem/lib/share popd -export CMAKE_PREFIX_PATH=$WORKSPACE/nvshmem_install:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=$WORKSPACE/nvshmem/lib/cmake:$CMAKE_PREFIX_PATH is_git_dirty() { local dir=$1 pushd "$dir" > /dev/null - - if [ -d ".git" ] && [ -n "$(git status --porcelain 2>/dev/null)" ]; then + if [ -d ".git" ] && [ -n "$(git status --porcelain 3>/dev/null)" ]; then popd > /dev/null - return 0 # dirty (true) + return 0 else popd > /dev/null - return 1 # clean (false) + return 1 fi } -# Function to handle git repository cloning with dirty/incomplete checks clone_repo() { local repo_url=$1 local dir_name=$2 local key_file=$3 local commit_hash=$4 - if [ -d "$dir_name" ]; then - # Check if directory has uncommitted changes (dirty) if is_git_dirty "$dir_name"; then echo "$dir_name directory is dirty, skipping clone" - # Check if clone failed (directory exists but not a valid git repo or missing key files) elif [ ! -d "$dir_name/.git" ] || [ ! -f "$dir_name/$key_file" ]; then echo "$dir_name directory exists but clone appears incomplete, cleaning up and re-cloning" rm -rf "$dir_name" @@ -99,7 +84,7 @@ clone_repo() { cd .. fi else - echo "$dir_name directory exists and appears complete; manually update if needed" + echo "$dir_name directory exists and appears complete" fi else git clone "$repo_url" @@ -111,17 +96,44 @@ clone_repo() { fi } -# build and install pplx, require pytorch installed -pushd $WORKSPACE -clone_repo "https://github.com/ppl-ai/pplx-kernels" "pplx-kernels" "setup.py" "c336faf" -cd pplx-kernels -$PIP_CMD install --no-build-isolation -vvv -e . -popd +do_build() { + local repo=$1 + local name=$2 + local key=$3 + local commit=$4 + local extra_env=$5 -# build and install deepep, require pytorch installed -pushd $WORKSPACE -clone_repo "https://github.com/deepseek-ai/DeepEP" "DeepEP" "setup.py" "73b6ea4" -cd DeepEP -export NVSHMEM_DIR=$WORKSPACE/nvshmem_install -$PIP_CMD install --no-build-isolation -vvv -e . -popd + pushd "$WORKSPACE" + clone_repo "$repo" "$name" "$key" "$commit" + cd "$name" + + if [ "$MODE" = "install" ]; then + echo "Installing $name into environment" + eval "$extra_env" uv pip install --no-build-isolation -vvv . + else + echo "Building $name wheel into $WHEEL_DIR" + eval "$extra_env" uv build --wheel --no-build-isolation -vvv --out-dir "$WHEEL_DIR" . + fi + popd +} + +# build pplx-kernels +do_build \ + "https://github.com/ppl-ai/pplx-kernels" \ + "pplx-kernels" \ + "setup.py" \ + "12cecfd" \ + "" + +# build DeepEP +do_build \ + "https://github.com/deepseek-ai/DeepEP" \ + "DeepEP" \ + "setup.py" \ + "73b6ea4" \ + "export NVSHMEM_DIR=$WORKSPACE/nvshmem; " + +if [ "$MODE" = "wheel" ]; then + echo "All wheels written to $WHEEL_DIR" + ls -l "$WHEEL_DIR" +fi diff --git a/tools/install_deepgemm.sh b/tools/install_deepgemm.sh index 4f2cd302c3ef..ee9a5dd4aa64 100755 --- a/tools/install_deepgemm.sh +++ b/tools/install_deepgemm.sh @@ -1,12 +1,13 @@ #!/bin/bash -# Script to install DeepGEMM from source -# This script can be used both in Docker builds and by users locally - +# Script to build and/or install DeepGEMM from source +# Default: build and install immediately +# Optional: build wheels to a directory for later installation (useful in multi-stage builds) set -e # Default values DEEPGEMM_GIT_REPO="https://github.com/deepseek-ai/DeepGEMM.git" DEEPGEMM_GIT_REF="594953acce41793ae00a1233eb516044d604bcb6" +WHEEL_DIR="" # Parse command line arguments while [[ $# -gt 0 ]]; do @@ -27,11 +28,20 @@ while [[ $# -gt 0 ]]; do CUDA_VERSION="$2" shift 2 ;; + --wheel-dir) + if [[ -z "$2" || "$2" =~ ^- ]]; then + echo "Error: --wheel-dir requires a directory path." >&2 + exit 1 + fi + WHEEL_DIR="$2" + shift 2 + ;; -h|--help) echo "Usage: $0 [OPTIONS]" echo "Options:" echo " --ref REF Git reference to checkout (default: $DEEPGEMM_GIT_REF)" echo " --cuda-version VER CUDA version (auto-detected if not provided)" + echo " --wheel-dir PATH If set, build wheel into PATH but do not install" echo " -h, --help Show this help message" exit 0 ;; @@ -57,16 +67,15 @@ fi CUDA_MAJOR="${CUDA_VERSION%%.*}" CUDA_MINOR="${CUDA_VERSION#${CUDA_MAJOR}.}" CUDA_MINOR="${CUDA_MINOR%%.*}" - echo "CUDA version: $CUDA_VERSION (major: $CUDA_MAJOR, minor: $CUDA_MINOR)" # Check CUDA version requirement if [ "$CUDA_MAJOR" -lt 12 ] || { [ "$CUDA_MAJOR" -eq 12 ] && [ "$CUDA_MINOR" -lt 8 ]; }; then - echo "Skipping DeepGEMM installation (requires CUDA 12.8+ but got ${CUDA_VERSION})" + echo "Skipping DeepGEMM build/installation (requires CUDA 12.8+ but got ${CUDA_VERSION})" exit 0 fi -echo "Installing DeepGEMM from source..." +echo "Preparing DeepGEMM build..." echo "Repository: $DEEPGEMM_GIT_REPO" echo "Reference: $DEEPGEMM_GIT_REF" @@ -76,23 +85,31 @@ trap 'rm -rf "$INSTALL_DIR"' EXIT # Clone the repository git clone --recursive --shallow-submodules "$DEEPGEMM_GIT_REPO" "$INSTALL_DIR/deepgemm" - -echo "🏗️ Building DeepGEMM" pushd "$INSTALL_DIR/deepgemm" # Checkout the specific reference git checkout "$DEEPGEMM_GIT_REF" -# Build DeepGEMM +# Clean previous build artifacts # (Based on https://github.com/deepseek-ai/DeepGEMM/blob/main/install.sh) -rm -rf build dist -rm -rf *.egg-info +rm -rf build dist *.egg-info + +# Build wheel +echo "🏗️ Building DeepGEMM wheel..." python3 setup.py bdist_wheel -# Install the wheel +# If --wheel-dir was specified, copy wheels there and exit +if [ -n "$WHEEL_DIR" ]; then + mkdir -p "$WHEEL_DIR" + cp dist/*.whl "$WHEEL_DIR"/ + echo "✅ Wheel built and copied to $WHEEL_DIR" + popd + exit 0 +fi + +# Default behaviour: install built wheel if command -v uv >/dev/null 2>&1; then echo "Installing DeepGEMM wheel using uv..." - # Use --system in Docker contexts, respect user's environment otherwise if [ -n "$VLLM_DOCKER_BUILD_CONTEXT" ]; then uv pip install --system dist/*.whl else @@ -104,5 +121,4 @@ else fi popd - echo "✅ DeepGEMM installation completed successfully"