# Base UBI image for s390x architecture ARG BASE_UBI_IMAGE_TAG=9.5-1736404155 ARG PYTHON_VERSION=3.12 FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS base # Install basic dependencies ARG PYTHON_VERSION ENV PYTHON_VERSION=${PYTHON_VERSION} WORKDIR /workspace ENV LANG=C.UTF-8 \ LC_ALL=C.UTF-8 # Install development utilities RUN microdnf install -y \ which procps findutils tar vim git gcc-toolset-14 gcc-toolset-14-libatomic-devel patch zlib-devel \ libjpeg-turbo-devel libtiff-devel libpng-devel libwebp-devel freetype-devel harfbuzz-devel \ openssl-devel openblas openblas-devel autoconf automake libtool cmake numpy libsndfile \ clang llvm-devel llvm-static clang-devel && \ microdnf clean all # Python Installation FROM base AS python-install ARG PYTHON_VERSION ENV VIRTUAL_ENV=/opt/vllm ENV PATH="$VIRTUAL_ENV/bin:$PATH" ENV PYTHON_VERSION=${PYTHON_VERSION} RUN microdnf install -y \ python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \ python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel uv && microdnf clean all FROM python-install AS pyarrow # Build Apache Arrow WORKDIR /tmp RUN --mount=type=cache,target=/root/.cache/uv \ git clone https://github.com/apache/arrow.git && \ cd arrow/cpp && \ mkdir release && cd release && \ cmake -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=/usr/local \ -DARROW_PYTHON=ON \ -DARROW_PARQUET=ON \ -DARROW_ORC=ON \ -DARROW_FILESYSTEM=ON \ -DARROW_WITH_LZ4=ON \ -DARROW_WITH_ZSTD=ON \ -DARROW_WITH_SNAPPY=ON \ -DARROW_JSON=ON \ -DARROW_CSV=ON \ -DARROW_DATASET=ON \ -DPROTOBUF_PROTOC_EXECUTABLE=/usr/bin/protoc \ -DARROW_DEPENDENCY_SOURCE=BUNDLED \ .. && \ make -j$(nproc) && \ make install && \ cd ../../python && \ export PYARROW_PARALLEL=4 && \ export ARROW_BUILD_TYPE=release && \ uv pip install -r requirements-build.txt && \ python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --bundle-arrow-cpp bdist_wheel FROM python-install AS numa-build # Install numactl (needed for numa.h dependency) WORKDIR /tmp RUN curl -LO https://github.com/numactl/numactl/archive/refs/tags/v2.0.16.tar.gz && \ tar -xvzf v2.0.16.tar.gz && \ cd numactl-2.0.16 && \ ./autogen.sh && \ ./configure && \ make # Set include path ENV C_INCLUDE_PATH="/usr/local/include:$C_INCLUDE_PATH" FROM python-install AS rust ENV CARGO_HOME=/root/.cargo ENV RUSTUP_HOME=/root/.rustup ENV PATH="$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH" RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && \ . "$CARGO_HOME/env" && \ rustup default stable && \ rustup show FROM python-install AS torch-vision # Install torchvision ARG TORCH_VISION_VERSION=v0.23.0 WORKDIR /tmp RUN --mount=type=cache,target=/root/.cache/uv \ git clone https://github.com/pytorch/vision.git && \ cd vision && \ git checkout $TORCH_VISION_VERSION && \ uv pip install torch==2.8.0 --index-url https://download.pytorch.org/whl/cpu && \ python setup.py bdist_wheel FROM python-install AS hf-xet-builder # Install hf-xet WORKDIR /tmp ENV CARGO_HOME=/root/.cargo ENV RUSTUP_HOME=/root/.rustup ENV PATH="$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH" RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,from=rust,source=/root/.cargo,target=/root/.cargo,rw \ --mount=type=bind,from=rust,source=/root/.rustup,target=/root/.rustup,rw \ git clone https://github.com/huggingface/xet-core.git && \ cd xet-core/hf_xet/ && \ uv pip install maturin patchelf && \ python -m maturin build --release --out dist && \ mkdir -p /tmp/hf-xet/dist && \ cp dist/*.whl /tmp/hf-xet/dist/ # Build numba FROM python-install AS numba-builder ARG MAX_JOBS ARG NUMBA_VERSION=0.61.2 WORKDIR /tmp # Clone all required dependencies RUN --mount=type=cache,target=/root/.cache/uv \ microdnf install ninja-build gcc gcc-c++ -y && \ git clone --recursive https://github.com/llvm/llvm-project.git -b llvmorg-15.0.7 && \ git clone --recursive https://github.com/numba/llvmlite.git -b v0.44.0 && \ git clone --recursive https://github.com/numba/numba.git -b ${NUMBA_VERSION} && \ cd llvm-project && mkdir build && cd build && \ uv pip install 'cmake<4' setuptools numpy && \ export PREFIX=/usr/local && CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_ENABLE_PROJECTS=lld;libunwind;compiler-rt" \ CFLAGS="$(echo $CFLAGS | sed 's/-fno-plt //g')" \ CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fno-plt //g')" \ CMAKE_ARGS="${CMAKE_ARGS} -DFFI_INCLUDE_DIR=$PREFIX/include" \ CMAKE_ARGS="${CMAKE_ARGS} -DFFI_LIBRARY_DIR=$PREFIX/lib" \ cmake -DCMAKE_INSTALL_PREFIX="${PREFIX}" \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_LIBRARY_PATH="${PREFIX}" \ -DLLVM_ENABLE_LIBEDIT=OFF \ -DLLVM_ENABLE_LIBXML2=OFF \ -DLLVM_ENABLE_RTTI=ON \ -DLLVM_ENABLE_TERMINFO=OFF \ -DLLVM_INCLUDE_BENCHMARKS=OFF \ -DLLVM_INCLUDE_DOCS=OFF \ -DLLVM_INCLUDE_EXAMPLES=OFF \ -DLLVM_INCLUDE_GO_TESTS=OFF \ -DLLVM_INCLUDE_TESTS=OFF \ -DLLVM_INCLUDE_UTILS=ON \ -DLLVM_INSTALL_UTILS=ON \ -DLLVM_UTILS_INSTALL_DIR=libexec/llvm \ -DLLVM_BUILD_LLVM_DYLIB=OFF \ -DLLVM_LINK_LLVM_DYLIB=OFF \ -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly \ -DLLVM_ENABLE_FFI=ON \ -DLLVM_ENABLE_Z3_SOLVER=OFF \ -DLLVM_OPTIMIZED_TABLEGEN=ON \ -DCMAKE_POLICY_DEFAULT_CMP0111=NEW \ -DCOMPILER_RT_BUILD_BUILTINS=ON \ -DCOMPILER_RT_BUILTINS_HIDE_SYMBOLS=OFF \ -DCOMPILER_RT_BUILD_LIBFUZZER=OFF \ -DCOMPILER_RT_BUILD_CRT=OFF \ -DCOMPILER_RT_BUILD_MEMPROF=OFF \ -DCOMPILER_RT_BUILD_PROFILE=OFF \ -DCOMPILER_RT_BUILD_SANITIZERS=OFF \ -DCOMPILER_RT_BUILD_XRAY=OFF \ -DCOMPILER_RT_BUILD_GWP_ASAN=OFF \ -DCOMPILER_RT_BUILD_ORC=OFF \ -DCOMPILER_RT_INCLUDE_TESTS=OFF \ ${CMAKE_ARGS} -GNinja ../llvm \ && ninja install . && \ # build llvmlite cd ../../llvmlite && python setup.py bdist_wheel && \ cd ../numba && \ if ! grep '#include "dynamic_annotations.h"' numba/_dispatcher.cpp; then \ sed -i '/#include "internal\/pycore_atomic.h"/i\#include "dynamic_annotations.h"' numba/_dispatcher.cpp; \ fi && python setup.py bdist_wheel # Build Outlines Core FROM python-install AS outlines-core-builder WORKDIR /tmp ENV CARGO_HOME=/root/.cargo ENV RUSTUP_HOME=/root/.rustup ENV PATH="$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH" COPY requirements/common.txt /tmp/requirements/common.txt ARG OUTLINES_CORE_VERSION RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,from=rust,source=/root/.cargo,target=/root/.cargo,rw \ --mount=type=bind,from=rust,source=/root/.rustup,target=/root/.rustup,rw \ OUTLINES_CORE_VERSION=${OUTLINES_CORE_VERSION:-$(grep -E '^outlines_core\s*==\s*[0-9.]+' /tmp/requirements/common.txt | grep -Eo '[0-9.]+')} && \ if [ -z "${OUTLINES_CORE_VERSION}" ]; then echo "ERROR: Could not determine outlines_core version"; exit 1; fi && \ git clone https://github.com/dottxt-ai/outlines-core.git && \ cd outlines-core && \ git checkout tags/${OUTLINES_CORE_VERSION} && \ sed -i "s/version = \"0.0.0\"/version = \"${OUTLINES_CORE_VERSION}\"/" Cargo.toml && \ uv pip install maturin && \ python -m maturin build --release --out dist # Final build stage FROM python-install AS vllm-cpu ARG PYTHON_VERSION # Set correct library path for torch and numactl ENV LD_LIBRARY_PATH="/opt/vllm/lib64/python${PYTHON_VERSION}/site-packages/torch/lib:/usr/local/lib:/opt/rh/gcc-toolset-14/root/usr/lib64:$LD_LIBRARY_PATH" ENV C_INCLUDE_PATH="/usr/local/include:$C_INCLUDE_PATH" ENV UV_LINK_MODE=copy ENV CARGO_HOME=/root/.cargo ENV RUSTUP_HOME=/root/.rustup ENV GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1 ENV PCP_DIR=/opt/rh/gcc-toolset-14/root ENV PKG_CONFIG_PATH="/opt/rh/gcc-toolset-14/root/usr/lib64/pkgconfig:/usr/local/lib/pkgconfig/" ENV PATH="${VIRTUAL_ENV:+${VIRTUAL_ENV}/bin}:/opt/rh/gcc-toolset-14/root/usr/bin:/usr/local/bin:$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH" COPY . /workspace/vllm WORKDIR /workspace/vllm RUN --mount=type=bind,from=numa-build,src=/tmp/numactl-2.0.16,target=/numactl \ make -C /numactl install # Install dependencies, including PyTorch and Apache Arrow RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,from=rust,source=/root/.cargo,target=/root/.cargo,rw \ --mount=type=bind,from=rust,source=/root/.rustup,target=/root/.rustup,rw \ --mount=type=bind,from=pyarrow,source=/tmp/arrow/python/dist,target=/tmp/arrow-wheels \ --mount=type=bind,from=torch-vision,source=/tmp/vision/dist,target=/tmp/vision-wheels/ \ --mount=type=bind,from=hf-xet-builder,source=/tmp/hf-xet/dist,target=/tmp/hf-xet-wheels/ \ --mount=type=bind,from=numba-builder,source=/tmp/llvmlite/dist,target=/tmp/llvmlite-wheels/ \ --mount=type=bind,from=numba-builder,source=/tmp/numba/dist,target=/tmp/numba-wheels/ \ --mount=type=bind,from=outlines-core-builder,source=/tmp/outlines-core/dist,target=/tmp/outlines-core/dist/ \ sed -i '/^torch/d' requirements/build.txt && \ ARROW_WHL_FILE=$(ls /tmp/arrow-wheels/pyarrow-*.whl) && \ VISION_WHL_FILE=$(ls /tmp/vision-wheels/*.whl) && \ HF_XET_WHL_FILE=$(ls /tmp/hf-xet-wheels/*.whl) && \ LLVM_WHL_FILE=$(ls /tmp/llvmlite-wheels/*.whl) && \ NUMBA_WHL_FILE=$(ls /tmp/numba-wheels/*.whl) && \ OUTLINES_CORE_WHL_FILE=$(ls /tmp/outlines-core/dist/*.whl) && \ uv pip install -v \ $ARROW_WHL_FILE \ $VISION_WHL_FILE \ $HF_XET_WHL_FILE \ $LLVM_WHL_FILE \ $NUMBA_WHL_FILE \ $OUTLINES_CORE_WHL_FILE \ --index-strategy unsafe-best-match \ -r requirements/build.txt \ -r requirements/cpu.txt # Build and install vllm RUN --mount=type=cache,target=/root/.cache/uv \ VLLM_TARGET_DEVICE=cpu VLLM_CPU_MOE_PREPACK=0 python setup.py bdist_wheel && \ uv pip install "$(echo dist/*.whl)[tensorizer]" # setup non-root user for vllm RUN umask 002 && \ useradd --uid 2000 --gid 0 vllm && \ mkdir -p /home/vllm && \ chmod g+rwx /home/vllm COPY LICENSE /licenses/vllm.md COPY examples/*.jinja /app/data/template/ USER 2000 WORKDIR /home/vllm # Set the default entrypoint ENTRYPOINT ["vllm", "serve"]