diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index f164857325043..ff8ad1607e34a 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -47,6 +47,7 @@ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/requirements /requirements COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/benchmarks /benchmarks COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/tests /tests COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/examples /examples +COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/docker/Dockerfile.rocm /docker/ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite # ----------------------- @@ -71,7 +72,7 @@ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm /vllm-workspace RUN cd /vllm-workspace \ && rm -rf vllm \ && python3 -m pip install -e tests/vllm_test_utils \ - && python3 -m pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d#egg=lm-eval[api] \ + && python3 -m pip install lm-eval[api]==0.4.4 \ && python3 -m pip install pytest-shard # ----------------------- @@ -100,6 +101,7 @@ ARG COMMON_WORKDIR # Copy over the benchmark scripts as well COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples +COPY --from=export_vllm /docker ${COMMON_WORKDIR}/vllm/docker ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 ENV TOKENIZERS_PARALLELISM=false diff --git a/docker/Dockerfile.rocm_base b/docker/Dockerfile.rocm_base index 3414c0aa845cb..2ba5461dfe551 100644 --- a/docker/Dockerfile.rocm_base +++ b/docker/Dockerfile.rocm_base @@ -1,18 +1,16 @@ -ARG BASE_IMAGE=rocm/dev-ubuntu-22.04:6.3.1-complete -ARG HIPBLASLT_BRANCH="db8e93b4" -ARG HIPBLAS_COMMON_BRANCH="7c1566b" +ARG BASE_IMAGE=rocm/dev-ubuntu-22.04:6.4.1-complete +ARG HIPBLASLT_BRANCH="aa0bda7b" +ARG HIPBLAS_COMMON_BRANCH="9b80ba8e" ARG LEGACY_HIPBLASLT_OPTION= -ARG RCCL_BRANCH="648a58d" -ARG RCCL_REPO="https://github.com/ROCm/rccl" ARG TRITON_BRANCH="e5be006" ARG TRITON_REPO="https://github.com/triton-lang/triton.git" -ARG PYTORCH_BRANCH="295f2ed4" +ARG PYTORCH_BRANCH="f717b2af" ARG PYTORCH_VISION_BRANCH="v0.21.0" -ARG PYTORCH_REPO="https://github.com/pytorch/pytorch.git" +ARG PYTORCH_REPO="https://github.com/ROCm/pytorch.git" ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git" ARG FA_BRANCH="1a7f4dfa" ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git" -ARG AITER_BRANCH="916bf3c" +ARG AITER_BRANCH="4822e675" ARG AITER_REPO="https://github.com/ROCm/aiter.git" FROM ${BASE_IMAGE} AS base @@ -45,7 +43,7 @@ RUN apt-get update -y \ && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \ && python3 --version && python3 -m pip --version -RUN pip install -U packaging 'cmake<4' ninja wheel setuptools pybind11 Cython +RUN pip install -U packaging 'cmake<4' ninja wheel 'setuptools<80' pybind11 Cython FROM base AS build_hipblaslt ARG HIPBLASLT_BRANCH @@ -53,6 +51,7 @@ ARG HIPBLAS_COMMON_BRANCH # Set to "--legacy_hipblas_direct" for ROCm<=6.2 ARG LEGACY_HIPBLASLT_OPTION RUN git clone https://github.com/ROCm/hipBLAS-common.git +RUN apt-get remove -y hipblaslt && apt-get autoremove -y && apt-get autoclean -y RUN cd hipBLAS-common \ && git checkout ${HIPBLAS_COMMON_BRANCH} \ && mkdir build \ @@ -69,24 +68,17 @@ RUN cd hipBLASLt \ && make package RUN mkdir -p /app/install && cp /app/hipBLASLt/build/release/*.deb /app/hipBLAS-common/build/*.deb /app/install -FROM base AS build_rccl -ARG RCCL_BRANCH -ARG RCCL_REPO -RUN git clone ${RCCL_REPO} -RUN cd rccl \ - && git checkout ${RCCL_BRANCH} \ - && ./install.sh -p --amdgpu_targets ${PYTORCH_ROCM_ARCH} -RUN mkdir -p /app/install && cp /app/rccl/build/release/*.deb /app/install - FROM base AS build_triton ARG TRITON_BRANCH ARG TRITON_REPO RUN git clone ${TRITON_REPO} RUN cd triton \ && git checkout ${TRITON_BRANCH} \ - && cd python \ - && python3 setup.py bdist_wheel --dist-dir=dist -RUN mkdir -p /app/install && cp /app/triton/python/dist/*.whl /app/install + && if [ ! -f setup.py ]; then cd python; fi \ + && python3 setup.py bdist_wheel --dist-dir=dist \ + && mkdir -p /app/install && cp dist/*.whl /app/install +RUN if [ -d triton/python/triton_kernels ]; then pip install build && cd triton/python/triton_kernels \ + && python3 -m build --wheel && cp dist/*.whl /app/install; fi FROM base AS build_amdsmi RUN cd /opt/rocm/share/amd_smi \ @@ -132,15 +124,25 @@ RUN cd aiter \ RUN pip install pyyaml && cd aiter && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py bdist_wheel --dist-dir=dist && ls /app/aiter/dist/*.whl RUN mkdir -p /app/install && cp /app/aiter/dist/*.whl /app/install +FROM base AS debs +RUN mkdir /app/debs +RUN --mount=type=bind,from=build_hipblaslt,src=/app/install/,target=/install \ + cp /install/*.deb /app/debs +RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \ + cp /install/*.whl /app/debs +RUN --mount=type=bind,from=build_amdsmi,src=/app/install/,target=/install \ + cp /install/*.whl /app/debs +RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \ + cp /install/*.whl /app/debs +RUN --mount=type=bind,from=build_aiter,src=/app/install/,target=/install \ + cp /install/*.whl /app/debs + FROM base AS final RUN --mount=type=bind,from=build_hipblaslt,src=/app/install/,target=/install \ dpkg -i /install/*deb \ - && sed -i 's/, hipblaslt-dev \(.*\), hipcub-dev/, hipcub-dev/g' /var/lib/dpkg/status \ - && sed -i 's/, hipblaslt \(.*\), hipfft/, hipfft/g' /var/lib/dpkg/status -RUN --mount=type=bind,from=build_rccl,src=/app/install/,target=/install \ - dpkg -i /install/*deb \ - && sed -i 's/, rccl-dev \(.*\), rocalution/, rocalution/g' /var/lib/dpkg/status \ - && sed -i 's/, rccl \(.*\), rocalution/, rocalution/g' /var/lib/dpkg/status + && perl -p -i -e 's/, hipblas-common-dev \([^)]*?\), /, /g' /var/lib/dpkg/status \ + && perl -p -i -e 's/, hipblaslt-dev \([^)]*?\), /, /g' /var/lib/dpkg/status \ + && perl -p -i -e 's/, hipblaslt \([^)]*?\), /, /g' /var/lib/dpkg/status RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \ pip install /install/*.whl RUN --mount=type=bind,from=build_amdsmi,src=/app/install/,target=/install \ @@ -154,8 +156,6 @@ ARG BASE_IMAGE ARG HIPBLAS_COMMON_BRANCH ARG HIPBLASLT_BRANCH ARG LEGACY_HIPBLASLT_OPTION -ARG RCCL_BRANCH -ARG RCCL_REPO ARG TRITON_BRANCH ARG TRITON_REPO ARG PYTORCH_BRANCH @@ -170,8 +170,6 @@ RUN echo "BASE_IMAGE: ${BASE_IMAGE}" > /app/versions.txt \ && echo "HIPBLAS_COMMON_BRANCH: ${HIPBLAS_COMMON_BRANCH}" >> /app/versions.txt \ && echo "HIPBLASLT_BRANCH: ${HIPBLASLT_BRANCH}" >> /app/versions.txt \ && echo "LEGACY_HIPBLASLT_OPTION: ${LEGACY_HIPBLASLT_OPTION}" >> /app/versions.txt \ - && echo "RCCL_BRANCH: ${RCCL_BRANCH}" >> /app/versions.txt \ - && echo "RCCL_REPO: ${RCCL_REPO}" >> /app/versions.txt \ && echo "TRITON_BRANCH: ${TRITON_BRANCH}" >> /app/versions.txt \ && echo "TRITON_REPO: ${TRITON_REPO}" >> /app/versions.txt \ && echo "PYTORCH_BRANCH: ${PYTORCH_BRANCH}" >> /app/versions.txt \ @@ -180,4 +178,4 @@ RUN echo "BASE_IMAGE: ${BASE_IMAGE}" > /app/versions.txt \ && echo "PYTORCH_VISION_REPO: ${PYTORCH_VISION_REPO}" >> /app/versions.txt \ && echo "FA_BRANCH: ${FA_BRANCH}" >> /app/versions.txt \ && echo "AITER_BRANCH: ${AITER_BRANCH}" >> /app/versions.txt \ - && echo "AITER_REPO: ${AITER_REPO}" >> /app/versions.txt + && echo "AITER_REPO: ${AITER_REPO}" >> /app/versions.txt \ No newline at end of file