From 3e2f37a69a659e8585f84d61afd4623525d70b20 Mon Sep 17 00:00:00 2001 From: "Md. Shafi Hussain" Date: Tue, 25 Mar 2025 15:45:14 +0530 Subject: [PATCH] Dockerfile.ppc64le changes to move to UBI (#15402) Signed-off-by: Md. Shafi Hussain --- Dockerfile.ppc64le | 272 +++++++++++++++++++++++++++++++++++++++---- requirements/cpu.txt | 6 +- 2 files changed, 254 insertions(+), 24 deletions(-) diff --git a/Dockerfile.ppc64le b/Dockerfile.ppc64le index c5ca20d76e3e..913c289adc01 100644 --- a/Dockerfile.ppc64le +++ b/Dockerfile.ppc64le @@ -1,37 +1,267 @@ -FROM mambaorg/micromamba -ARG MAMBA_DOCKERFILE_ACTIVATE=1 -USER root +ARG BASE_UBI_IMAGE_TAG=9.5-1741850109 -ENV PATH="/usr/local/cargo/bin:$PATH:/opt/conda/bin/" +############################################################### +# base stage with basic dependencies +############################################################### -RUN apt-get update -y && apt-get install -y git wget kmod curl vim libnuma-dev libsndfile-dev libprotobuf-dev build-essential ffmpeg libsm6 libxext6 libgl1 libssl-dev +FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS base-builder -# Some packages in requirements/cpu are installed here -# IBM provides optimized packages for ppc64le processors in the open-ce project for mamba -# Currently these may not be available for venv or pip directly -RUN micromamba install -y -n base -c https://ftp.osuosl.org/pub/open-ce/1.11.0-p10/ -c defaults python=3.10 rust && micromamba clean --all --yes +ARG PYTHON_VERSION=3.12 +ARG OPENBLAS_VERSION=0.3.29 + +# Set Environment Variables for venv, cargo & openblas +ENV VIRTUAL_ENV=/opt/vllm +ENV PATH=${VIRTUAL_ENV}/bin:/root/.cargo/bin:$PATH +ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig/ +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib64:/usr/local/lib:/usr/lib64:/usr/lib +ENV UV_LINK_MODE=copy + +# install gcc-13, python, rust, openblas +# Note: A symlink for libatomic.so is created for gcc-13 (linker fails to find libatomic otherwise - reqd. for sentencepiece) +# Note: A dummy file 'control' is created in /tmp/ to artificially create dependencies between stages when building stages in parallel +# when `--jobs=` is passed with podman build command +RUN microdnf install -y openssl-devel dnf \ + && dnf install -y https://mirror.stream.centos.org/9-stream/BaseOS/`arch`/os/Packages/centos-gpg-keys-9.0-24.el9.noarch.rpm \ + https://mirror.stream.centos.org/9-stream/BaseOS/`arch`/os/Packages/centos-stream-repos-9.0-24.el9.noarch.rpm \ + https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm \ + && dnf config-manager --add-repo https://mirror.stream.centos.org/9-stream/BaseOS/`arch`/os \ + && dnf config-manager --add-repo https://mirror.stream.centos.org/9-stream/AppStream/`arch`/os \ + && dnf config-manager --set-enabled crb \ + && dnf install -y \ + git tar gcc-toolset-13 automake libtool numactl-devel lapack-devel \ + pkgconfig xsimd zeromq-devel kmod findutils protobuf* \ + libtiff-devel libjpeg-devel openjpeg2-devel zlib-devel \ + freetype-devel lcms2-devel libwebp-devel tcl-devel tk-devel \ + harfbuzz-devel fribidi-devel libraqm-devel libimagequant-devel libxcb-devel \ + python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip \ + && dnf clean all \ + && ln -sf /usr/lib64/libatomic.so.1 /usr/lib64/libatomic.so \ + && python${PYTHON_VERSION} -m venv ${VIRTUAL_ENV} \ + && python -m pip install -U pip uv \ + && uv pip install wheel build "setuptools<70" setuptools_scm setuptools_rust meson-python cmake ninja cython scikit_build_core scikit_build \ + && curl -sL https://ftp2.osuosl.org/pub/ppc64el/openblas/latest/Openblas_${OPENBLAS_VERSION}_ppc64le.tar.gz | tar xvf - -C /usr/local \ + && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y \ + && cd /tmp && touch control + +############################################################### +# Stage to build torch family +############################################################### + +FROM base-builder AS torch-builder + +ARG MAX_JOBS +ARG TORCH_VERSION=2.6.0 +ARG _GLIBCXX_USE_CXX11_ABI=1 +RUN --mount=type=cache,target=/root/.cache/uv \ + source /opt/rh/gcc-toolset-13/enable && \ + git clone --recursive https://github.com/pytorch/pytorch.git -b v${TORCH_VERSION} && \ + cd pytorch && \ + uv pip install -r requirements.txt && \ + python setup.py develop && \ + rm -f dist/torch*+git*whl && \ + MAX_JOBS=${MAX_JOBS:-$(nproc)} \ + PYTORCH_BUILD_VERSION=${TORCH_VERSION} PYTORCH_BUILD_NUMBER=1 uv build --wheel --out-dir /torchwheels/ + +ARG TORCHVISION_VERSION=0.21.0 +ARG TORCHVISION_USE_NVJPEG=0 +ARG TORCHVISION_USE_FFMPEG=0 +RUN --mount=type=cache,target=/root/.cache/uv \ + source /opt/rh/gcc-toolset-13/enable && \ + git clone --recursive https://github.com/pytorch/vision.git -b v${TORCHVISION_VERSION} && \ + cd vision && \ + MAX_JOBS=${MAX_JOBS:-$(nproc)} \ + BUILD_VERSION=${TORCHVISION_VERSION} \ + uv build --wheel --out-dir /torchwheels/ --no-build-isolation + +ARG TORCHAUDIO_VERSION=2.6.0 +ARG BUILD_SOX=1 +ARG BUILD_KALDI=1 +ARG BUILD_RNNT=1 +ARG USE_FFMPEG=0 +ARG USE_ROCM=0 +ARG USE_CUDA=0 +ARG TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_FFMPEG=1 +RUN --mount=type=cache,target=/root/.cache/uv \ + source /opt/rh/gcc-toolset-13/enable && \ + git clone --recursive https://github.com/pytorch/audio.git -b v${TORCHAUDIO_VERSION} && \ + cd audio && \ + MAX_JOBS=${MAX_JOBS:-$(nproc)} \ + BUILD_VERSION=${TORCHAUDIO_VERSION} \ + uv build --wheel --out-dir /torchwheels/ --no-build-isolation + +############################################################### +# Stage to build pyarrow +############################################################### + +FROM base-builder AS arrow-builder + +ARG MAX_JOBS +ARG PYARROW_PARALLEL +ARG PYARROW_VERSION=19.0.1 +RUN --mount=type=cache,target=/root/.cache/uv \ + source /opt/rh/gcc-toolset-13/enable && \ + git clone --recursive https://github.com/apache/arrow.git -b apache-arrow-${PYARROW_VERSION} && \ + cd arrow/cpp && \ + mkdir build && cd build && \ + cmake -DCMAKE_BUILD_TYPE=release \ + -DCMAKE_INSTALL_PREFIX=/usr/local \ + -DARROW_PYTHON=ON \ + -DARROW_BUILD_TESTS=OFF \ + -DARROW_JEMALLOC=ON \ + -DARROW_BUILD_STATIC="OFF" \ + -DARROW_PARQUET=ON \ + .. && \ + make install -j ${MAX_JOBS:-$(nproc)} && \ + cd ../../python/ && \ + uv pip install -v -r requirements-wheel-build.txt && \ + PYARROW_PARALLEL=${PYARROW_PARALLEL:-$(nproc)} \ + python setup.py build_ext \ + --build-type=release --bundle-arrow-cpp \ + bdist_wheel --dist-dir /arrowwheels/ + +############################################################### +# Stage to build opencv +############################################################### + +FROM base-builder AS cv-builder + +ARG MAX_JOBS +ARG OPENCV_VERSION=84 +ARG ENABLE_HEADLESS=1 +RUN --mount=type=cache,target=/root/.cache/uv \ + source /opt/rh/gcc-toolset-13/enable && \ + git clone --recursive https://github.com/opencv/opencv-python.git -b ${OPENCV_VERSION} && \ + cd opencv-python && \ + sed -i 's/"setuptools==59.2.0",/"setuptools<70.0",/g' pyproject.toml && \ + python -m build --wheel --installer=uv --outdir /opencvwheels/ + +############################################################### +# Stage to build vllm - this stage builds and installs +# vllm, tensorizer and vllm-tgis-adapter and builds uv cache +# for transitive dependencies - eg. grpcio +############################################################### + +FROM base-builder AS vllmcache-builder + +COPY --from=torch-builder /tmp/control /dev/null +COPY --from=arrow-builder /tmp/control /dev/null +COPY --from=cv-builder /tmp/control /dev/null + +ARG VLLM_TARGET_DEVICE=cpu + +# this step installs vllm and populates uv cache +# with all the transitive dependencies +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,from=torch-builder,source=/torchwheels/,target=/torchwheels/,ro \ + --mount=type=bind,from=arrow-builder,source=/arrowwheels/,target=/arrowwheels/,ro \ + --mount=type=bind,from=cv-builder,source=/opencvwheels/,target=/opencvwheels/,ro \ + --mount=type=bind,src=.,dst=/src/,rw \ + source /opt/rh/gcc-toolset-13/enable && \ + uv pip install /opencvwheels/*.whl /arrowwheels/*.whl /torchwheels/*.whl && \ + sed -i -e 's/.*torch.*//g' /src/pyproject.toml /src/requirements/*.txt && \ + uv pip install pandas pythran pybind11 && \ + # sentencepiece.pc is in some pkgconfig inside uv cache + export PKG_CONFIG_PATH=$(find / -type d -name "pkgconfig" 2>/dev/null | tr '\n' ':') && \ + uv pip install -r /src/requirements/common.txt -r /src/requirements/cpu.txt -r /src/requirements/build.txt --no-build-isolation && \ + cd /src/ && \ + uv build --wheel --out-dir /vllmwheel/ --no-build-isolation && \ + uv pip install /vllmwheel/*.whl + + +############################################################### +# Stage to build numactl +############################################################### + +FROM base-builder AS numa-builder + +# Note: Building numactl with gcc-11. Compiling with gcc-13 in this builder stage will +# trigger recompilation with gcc-11 (and require libtool) in the final stage where we do not have gcc-13 +ARG MAX_JOBS +ARG NUMACTL_VERSION=2.0.19 +RUN git clone --recursive https://github.com/numactl/numactl.git -b v${NUMACTL_VERSION} \ + && cd numactl \ + && autoreconf -i && ./configure \ + && make -j ${MAX_JOBS:-$(nproc)} + +############################################################### +# Stage to build lapack +############################################################### + +FROM base-builder AS lapack-builder + +ARG MAX_JOBS +ARG LAPACK_VERSION=3.12.1 +RUN git clone --recursive https://github.com/Reference-LAPACK/lapack.git -b v${LAPACK_VERSION} \ + && cd lapack && source /opt/rh/gcc-toolset-13/enable \ + && cmake -B build -S . \ + && cmake --build build -j ${MAX_JOBS:-$(nproc)} + + +############################################################### +# FINAL VLLM IMAGE STAGE # +############################################################### + +FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS vllm-openai + +ARG PYTHON_VERSION=3.12 +ARG OPENBLAS_VERSION=0.3.29 + +# Set Environment Variables for venv & openblas +ENV VIRTUAL_ENV=/opt/vllm +ENV PATH=${VIRTUAL_ENV}/bin:$PATH +ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig/ +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib64:/usr/local/lib:/usr/lib64:/usr/lib +ENV UV_LINK_MODE=copy + +# create artificial dependencies between stages for independent stages to build in parallel +COPY --from=torch-builder /tmp/control /dev/null +COPY --from=arrow-builder /tmp/control /dev/null +COPY --from=cv-builder /tmp/control /dev/null +COPY --from=vllmcache-builder /tmp/control /dev/null +COPY --from=numa-builder /tmp/control /dev/null +COPY --from=lapack-builder /tmp/control /dev/null + +# install gcc-11, python, openblas, numactl, lapack +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,from=numa-builder,source=/numactl/,target=/numactl/,rw \ + --mount=type=bind,from=lapack-builder,source=/lapack/,target=/lapack/,rw \ + rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ + microdnf install --nodocs -y \ + tar findutils openssl \ + pkgconfig xsimd g++ gcc-fortran libsndfile \ + libtiff libjpeg openjpeg2 zlib zeromq \ + freetype lcms2 libwebp tcl tk utf8proc \ + harfbuzz fribidi libraqm libimagequant libxcb \ + python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip \ + && microdnf clean all \ + && python${PYTHON_VERSION} -m venv ${VIRTUAL_ENV} \ + && python -m pip install -U pip uv --no-cache \ + && curl -sL https://ftp2.osuosl.org/pub/ppc64el/openblas/latest/Openblas_${OPENBLAS_VERSION}_ppc64le.tar.gz | tar xvf - -C /usr/local \ + && make -C /numactl install \ + && uv pip install cmake \ + && cmake --install /lapack/build \ + && uv pip uninstall cmake + +# consume previously built wheels (including vllm) +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,from=torch-builder,source=/torchwheels/,target=/torchwheels/,ro \ + --mount=type=bind,from=arrow-builder,source=/arrowwheels/,target=/arrowwheels/,ro \ + --mount=type=bind,from=cv-builder,source=/opencvwheels/,target=/opencvwheels/,ro \ + --mount=type=bind,from=vllmcache-builder,source=/vllmwheel/,target=/vllmwheel/,ro \ + HOME=/root uv pip install /opencvwheels/*.whl /arrowwheels/*.whl /torchwheels/*.whl /vllmwheel/*.whl COPY ./ /workspace/vllm - WORKDIR /workspace/vllm ARG GIT_REPO_CHECK=0 RUN --mount=type=bind,source=.git,target=.git \ if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh; fi -RUN --mount=type=cache,target=/root/.cache/pip \ - RUSTFLAGS='-L /opt/conda/lib' pip install -v --prefer-binary --extra-index-url https://repo.fury.io/mgiessing \ - 'cmake>=3.26' ninja packaging 'setuptools-scm>=8' wheel jinja2 \ - -r requirements/cpu.txt \ - xformers uvloop==0.20.0 - -RUN --mount=type=bind,source=.git,target=.git \ - VLLM_TARGET_DEVICE=cpu python3 setup.py install - # install development dependencies (for testing) -RUN python3 -m pip install -e tests/vllm_test_utils +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install -e tests/vllm_test_utils WORKDIR /workspace/ RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks -ENTRYPOINT ["/opt/conda/bin/python3", "-m", "vllm.entrypoints.openai.api_server"] +ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server"] diff --git a/requirements/cpu.txt b/requirements/cpu.txt index e4a7f9acdffd..fc09083781e6 100644 --- a/requirements/cpu.txt +++ b/requirements/cpu.txt @@ -4,14 +4,14 @@ # Dependencies for CPUs torch==2.6.0+cpu; platform_machine == "x86_64" torch==2.6.0; platform_system == "Darwin" -torch==2.5.1; platform_machine == "ppc64le" or platform_machine == "aarch64" +torch==2.6.0; platform_machine == "ppc64le" or platform_machine == "aarch64" torch==2.7.0.dev20250304; platform_machine == "s390x" # required for the image processor of minicpm-o-2_6, this must be updated alongside torch torchaudio; platform_machine != "ppc64le" and platform_machine != "s390x" -torchaudio==2.5.1; platform_machine == "ppc64le" +torchaudio==2.6.0; platform_machine == "ppc64le" # required for the image processor of phi3v, this must be updated alongside torch torchvision; platform_machine != "ppc64le" and platform_machine != "s390x" -torchvision==0.20.1; platform_machine == "ppc64le" +torchvision==0.21.0; platform_machine == "ppc64le" datasets # for benchmark scripts