From 2b1b3dfa4b02456b11b2bdbcd0857ddb96214a71 Mon Sep 17 00:00:00 2001 From: Bhagyashri Date: Fri, 21 Nov 2025 17:54:09 +0530 Subject: [PATCH] Update Dockerfile to use gcc-toolset-14 and fix test case failures on power (ppc64le) (#28957) Signed-off-by: Bhagyashri --- .../hardware_ci/run-cpu-test-ppc64le.sh | 10 +++--- docker/Dockerfile.ppc64le | 32 +++++++++++-------- requirements/common.txt | 4 +-- 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh b/.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh index 39ea18017308..3728f73fa2a3 100755 --- a/.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh +++ b/.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh @@ -25,20 +25,22 @@ function cpu_tests() { # offline inference podman exec -it "$container_id" bash -c " + export TORCH_COMPILE_DISABLE=1 set -xve python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m" >> $HOME/test_basic.log # Run basic model test podman exec -it "$container_id" bash -c " + export TORCH_COMPILE_DISABLE=1 set -evx pip install pytest pytest-asyncio einops peft Pillow soundfile transformers_stream_generator matplotlib - pip install sentence-transformers datamodel_code_generator + pip install sentence-transformers datamodel_code_generator tblib # Note: disable Bart until supports V1 # pytest -v -s tests/models/language/generation/test_bart.py -m cpu_model - pytest -v -s tests/models/language/generation/test_common.py::test_models[False-5-32-openai-community/gpt2] - pytest -v -s tests/models/language/generation/test_common.py::test_models[False-5-32-facebook/opt-125m] - pytest -v -s tests/models/language/generation/test_common.py::test_models[False-5-32-google/gemma-1.1-2b-it] + pytest -v -s tests/models/language/generation/test_common.py::test_models[False-False-5-32-openai-community/gpt2] + pytest -v -s tests/models/language/generation/test_common.py::test_models[False-False-5-32-facebook/opt-125m] + pytest -v -s tests/models/language/generation/test_common.py::test_models[False-False-5-32-google/gemma-1.1-2b-it] pytest -v -s tests/models/language/pooling/test_classification.py::test_models[float-jason9693/Qwen2.5-1.5B-apeach] # TODO: Below test case tests/models/language/pooling/test_embedding.py::test_models[True-ssmits/Qwen2-7B-Instruct-embed-base] fails on ppc64le. Disabling it for time being. # pytest -v -s tests/models/language/pooling/test_embedding.py -m cpu_model" >> $HOME/test_rest.log diff --git a/docker/Dockerfile.ppc64le b/docker/Dockerfile.ppc64le index ad9eae94b83d..b16bea3607d2 100644 --- a/docker/Dockerfile.ppc64le +++ b/docker/Dockerfile.ppc64le @@ -8,8 +8,8 @@ FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS openbl ARG MAX_JOBS ARG OPENBLAS_VERSION=0.3.30 -RUN microdnf install -y dnf && dnf install -y gcc-toolset-13 make wget unzip \ - && source /opt/rh/gcc-toolset-13/enable \ +RUN microdnf install -y dnf && dnf install -y gcc-toolset-14 make wget unzip \ + && source /opt/rh/gcc-toolset-14/enable \ && wget https://github.com/OpenMathLib/OpenBLAS/releases/download/v$OPENBLAS_VERSION/OpenBLAS-$OPENBLAS_VERSION.zip \ && unzip OpenBLAS-$OPENBLAS_VERSION.zip \ && cd OpenBLAS-$OPENBLAS_VERSION \ @@ -57,7 +57,7 @@ COPY --from=openblas-builder /tmp/control /dev/null RUN --mount=type=bind,from=openblas-builder,source=/OpenBLAS-$OPENBLAS_VERSION/,target=/openblas/,rw \ dnf install -y openssl-devel \ && dnf install -y \ - git tar gcc-toolset-13 automake libtool \ + git tar gcc-toolset-14 automake libtool \ pkgconfig xsimd zeromq-devel kmod findutils protobuf* \ libtiff-devel libjpeg-devel zlib-devel freetype-devel libwebp-devel \ harfbuzz-devel libraqm-devel libimagequant-devel libxcb-devel \ @@ -84,7 +84,7 @@ ARG _GLIBCXX_USE_CXX11_ABI=1 ARG OPENBLAS_VERSION=0.3.30 RUN --mount=type=cache,target=/root/.cache/uv \ - source /opt/rh/gcc-toolset-13/enable && \ + source /opt/rh/gcc-toolset-14/enable && \ git clone --recursive https://github.com/pytorch/pytorch.git -b v${TORCH_VERSION} && \ cd pytorch && \ uv pip install -r requirements.txt && \ @@ -97,7 +97,7 @@ ARG TORCHVISION_VERSION=0.22.0 ARG TORCHVISION_USE_NVJPEG=0 ARG TORCHVISION_USE_FFMPEG=0 RUN --mount=type=cache,target=/root/.cache/uv \ - source /opt/rh/gcc-toolset-13/enable && \ + source /opt/rh/gcc-toolset-14/enable && \ git clone --recursive https://github.com/pytorch/vision.git -b v${TORCHVISION_VERSION} && \ cd vision && \ MAX_JOBS=${MAX_JOBS:-$(nproc)} \ @@ -113,7 +113,7 @@ ARG USE_ROCM=0 ARG USE_CUDA=0 ARG TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_FFMPEG=1 RUN --mount=type=cache,target=/root/.cache/uv \ - source /opt/rh/gcc-toolset-13/enable && \ + source /opt/rh/gcc-toolset-14/enable && \ git clone --recursive https://github.com/pytorch/audio.git -b v${TORCHAUDIO_VERSION} && \ cd audio && \ MAX_JOBS=${MAX_JOBS:-$(nproc)} \ @@ -130,7 +130,7 @@ ARG MAX_JOBS ARG PYARROW_PARALLEL ARG PYARROW_VERSION=21.0.0 RUN --mount=type=cache,target=/root/.cache/uv \ - source /opt/rh/gcc-toolset-13/enable && \ + source /opt/rh/gcc-toolset-14/enable && \ git clone --recursive https://github.com/apache/arrow.git -b apache-arrow-${PYARROW_VERSION} && \ cd arrow/cpp && \ mkdir build && cd build && \ @@ -162,7 +162,7 @@ ARG OPENCV_VERSION=86 ARG OPENCV_PATCH=97f3f39 ARG ENABLE_HEADLESS=1 RUN --mount=type=cache,target=/root/.cache/uv \ - source /opt/rh/gcc-toolset-13/enable && \ + source /opt/rh/gcc-toolset-14/enable && \ git clone --recursive https://github.com/opencv/opencv-python.git -b ${OPENCV_VERSION} && \ cd opencv-python && \ sed -i -E -e 's/"setuptools.+",/"setuptools",/g' pyproject.toml && \ @@ -196,7 +196,7 @@ ARG MAX_JOBS ARG NUMBA_VERSION=0.61.2 # Clone all required dependencies -RUN dnf install ninja-build llvm15 llvm15-devel -y && source /opt/rh/gcc-toolset-13/enable && export PATH=$PATH:/usr/lib64/llvm15/bin && \ +RUN dnf install ninja-build llvm15 llvm15-devel -y && source /opt/rh/gcc-toolset-14/enable && export PATH=$PATH:/usr/lib64/llvm15/bin && \ git clone --recursive https://github.com/numba/numba.git -b ${NUMBA_VERSION} && \ cd ./numba && \ if ! grep '#include "dynamic_annotations.h"' numba/_dispatcher.cpp; then \ @@ -211,6 +211,9 @@ RUN dnf install ninja-build llvm15 llvm15-devel -y && source /opt/rh/gcc-toolset FROM base-builder AS vllmcache-builder +ENV LLVM_CONFIG=/usr/lib64/llvm15/bin/llvm-config +ENV PATH=/usr/lib64/llvm15/bin:$PATH + COPY --from=torch-builder /tmp/control /dev/null COPY --from=arrow-builder /tmp/control /dev/null COPY --from=cv-builder /tmp/control /dev/null @@ -225,10 +228,13 @@ ARG GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1 RUN --mount=type=cache,target=/root/.cache/uv \ dnf install llvm15 llvm15-devel -y && \ rpm -ivh --nodeps https://mirror.stream.centos.org/9-stream/CRB/ppc64le/os/Packages/protobuf-lite-devel-3.14.0-16.el9.ppc64le.rpm && \ - source /opt/rh/gcc-toolset-13/enable && \ + source /opt/rh/gcc-toolset-14/enable && \ git clone https://github.com/huggingface/xet-core.git && cd xet-core/hf_xet/ && \ uv pip install maturin && \ uv build --wheel --out-dir /hf_wheels/ + +ENV CXXFLAGS="-fno-lto -Wno-error=free-nonheap-object" \ + CFLAGS="-fno-lto" RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,from=torch-builder,source=/torchwheels/,target=/torchwheels/,ro \ --mount=type=bind,from=arrow-builder,source=/arrowwheels/,target=/arrowwheels/,ro \ @@ -236,7 +242,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,from=numa-builder,source=/numactl/,target=/numactl/,rw \ --mount=type=bind,from=numba-builder,source=/numbawheels/,target=/numbawheels/,ro \ --mount=type=bind,src=.,dst=/src/,rw \ - source /opt/rh/gcc-toolset-13/enable && \ + source /opt/rh/gcc-toolset-14/enable && \ export PATH=$PATH:/usr/lib64/llvm15/bin && \ uv pip install /opencvwheels/*.whl /arrowwheels/*.whl /torchwheels/*.whl /numbawheels/*.whl && \ sed -i -e 's/.*torch.*//g' /src/pyproject.toml /src/requirements/*.txt && \ @@ -260,7 +266,7 @@ FROM base-builder AS lapack-builder ARG MAX_JOBS ARG LAPACK_VERSION=3.12.1 RUN git clone --recursive https://github.com/Reference-LAPACK/lapack.git -b v${LAPACK_VERSION} \ - && cd lapack && source /opt/rh/gcc-toolset-13/enable \ + && cd lapack && source /opt/rh/gcc-toolset-14/enable \ && cmake -B build -S . \ && cmake --build build -j ${MAX_JOBS:-$(nproc)} @@ -299,7 +305,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,from=openblas-builder,source=/OpenBLAS-$OPENBLAS_VERSION/,target=/openblas/,rw \ rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ microdnf install --nodocs -y \ - libomp tar findutils openssl llvm15 llvm15-devel \ + libomp libicu tar findutils openssl llvm15 llvm15-devel \ pkgconfig xsimd g++ gcc-fortran libsndfile \ libtiff libjpeg openjpeg2 zlib zeromq \ freetype lcms2 libwebp tcl tk utf8proc \ diff --git a/requirements/common.txt b/requirements/common.txt index f2d1c0762ef6..3f8cd588422d 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -19,12 +19,12 @@ pillow # Required for image processing prometheus-fastapi-instrumentator >= 7.0.0 tiktoken >= 0.6.0 # Required for DBRX tokenizer lm-format-enforcer == 0.11.3 -llguidance >= 1.3.0, < 1.4.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64" or platform_machine == "s390x" +llguidance >= 1.3.0, < 1.4.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64" or platform_machine == "s390x" or platform_machine == "ppc64le" outlines_core == 0.2.11 # required for outlines backend disk cache diskcache == 5.6.3 lark == 1.2.2 -xgrammar == 0.1.27; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" +xgrammar == 0.1.27; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le" typing_extensions >= 4.10 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317 partial-json-parser # used for parsing partial JSON outputs