mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 07:15:01 +08:00
[ROCm] Cleanup Dockerfile and remove outdated patch (#6482)
This commit is contained in:
parent
1d094fd7c0
commit
10383887e0
@ -1,11 +1,6 @@
|
|||||||
# Default ROCm 6.1 base image
|
# Default ROCm 6.1 base image
|
||||||
ARG BASE_IMAGE="rocm/pytorch:rocm6.1.2_ubuntu20.04_py3.9_pytorch_staging"
|
ARG BASE_IMAGE="rocm/pytorch:rocm6.1.2_ubuntu20.04_py3.9_pytorch_staging"
|
||||||
|
|
||||||
# Tested and supported base rocm/pytorch images
|
|
||||||
ARG ROCm_5_7_BASE="rocm/pytorch:rocm5.7_ubuntu20.04_py3.9_pytorch_2.0.1" \
|
|
||||||
ROCm_6_0_BASE="rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1" \
|
|
||||||
ROCM_6_1_BASE="rocm/pytorch:rocm6.1.2_ubuntu20.04_py3.9_pytorch_staging"
|
|
||||||
|
|
||||||
# Default ROCm ARCHes to build vLLM for.
|
# Default ROCm ARCHes to build vLLM for.
|
||||||
ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"
|
ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"
|
||||||
|
|
||||||
@ -54,18 +49,6 @@ RUN pip install --upgrade pip
|
|||||||
RUN apt-get purge -y sccache; pip uninstall -y sccache; rm -f "$(which sccache)"
|
RUN apt-get purge -y sccache; pip uninstall -y sccache; rm -f "$(which sccache)"
|
||||||
# Install torch == 2.5.0 on ROCm
|
# Install torch == 2.5.0 on ROCm
|
||||||
RUN case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
|
RUN case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
|
||||||
*"rocm-5.7"*) \
|
|
||||||
pip uninstall -y torch torchaudio torchvision \
|
|
||||||
&& pip install --no-cache-dir --pre \
|
|
||||||
torch==2.5.0.dev20240710 torchaudio==2.4.0.dev20240710 \
|
|
||||||
torchvision==0.20.0.dev20240710 \
|
|
||||||
--index-url https://download.pytorch.org/whl/nightly/rocm5.7;; \
|
|
||||||
*"rocm-6.0"*) \
|
|
||||||
pip uninstall -y torch torchaudio torchvision \
|
|
||||||
&& pip install --no-cache-dir --pre \
|
|
||||||
torch==2.5.0.dev20240710 torchaudio==2.4.0.dev20240710 \
|
|
||||||
torchvision==0.20.0.dev20240710 \
|
|
||||||
--index-url https://download.pytorch.org/whl/nightly/rocm6.0;; \
|
|
||||||
*"rocm-6.1"*) \
|
*"rocm-6.1"*) \
|
||||||
pip uninstall -y torch torchaudio torchvision \
|
pip uninstall -y torch torchaudio torchvision \
|
||||||
&& pip install --no-cache-dir --pre \
|
&& pip install --no-cache-dir --pre \
|
||||||
@ -104,11 +87,6 @@ RUN --mount=type=cache,target=${CCACHE_DIR} \
|
|||||||
&& cd flash-attention \
|
&& cd flash-attention \
|
||||||
&& git checkout "${FA_BRANCH}" \
|
&& git checkout "${FA_BRANCH}" \
|
||||||
&& git submodule update --init \
|
&& git submodule update --init \
|
||||||
&& case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
|
|
||||||
*"rocm-5.7"*) \
|
|
||||||
export VLLM_TORCH_PATH="$(python3 -c 'import torch; print(torch.__path__[0])')" \
|
|
||||||
&& patch "${VLLM_TORCH_PATH}"/utils/hipify/hipify_python.py hipify_patch.patch;; \
|
|
||||||
*) ;; esac \
|
|
||||||
&& GPU_ARCHS="${FA_GFX_ARCHS}" python3 setup.py bdist_wheel --dist-dir=/install; \
|
&& GPU_ARCHS="${FA_GFX_ARCHS}" python3 setup.py bdist_wheel --dist-dir=/install; \
|
||||||
# Create an empty directory otherwise as later build stages expect one
|
# Create an empty directory otherwise as later build stages expect one
|
||||||
else mkdir -p /install; \
|
else mkdir -p /install; \
|
||||||
@ -161,12 +139,9 @@ RUN --mount=type=cache,target=${CCACHE_DIR} \
|
|||||||
--mount=type=cache,target=/root/.cache/pip \
|
--mount=type=cache,target=/root/.cache/pip \
|
||||||
pip install -U -r requirements-rocm.txt \
|
pip install -U -r requirements-rocm.txt \
|
||||||
&& case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
|
&& case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
|
||||||
*"rocm-6.0"*) \
|
|
||||||
patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h rocm_patch/rocm_bf16.patch;; \
|
|
||||||
*"rocm-6.1"*) \
|
*"rocm-6.1"*) \
|
||||||
# Bring in upgrades to HIP graph earlier than ROCm 6.2 for vLLM
|
# Bring in upgrades to HIP graph earlier than ROCm 6.2 for vLLM
|
||||||
wget -N https://github.com/ROCm/vllm/raw/fa78403/rocm_patch/libamdhip64.so.6 -P rocm_patch \
|
wget -N https://github.com/ROCm/vllm/raw/fa78403/rocm_patch/libamdhip64.so.6 -P /opt/rocm/lib \
|
||||||
&& cp rocm_patch/libamdhip64.so.6 /opt/rocm/lib/libamdhip64.so.6 \
|
|
||||||
# Prevent interference if torch bundles its own HIP runtime
|
# Prevent interference if torch bundles its own HIP runtime
|
||||||
&& rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so* || true;; \
|
&& rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so* || true;; \
|
||||||
*) ;; esac \
|
*) ;; esac \
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
Installation with ROCm
|
Installation with ROCm
|
||||||
======================
|
======================
|
||||||
|
|
||||||
vLLM supports AMD GPUs with ROCm 5.7 and 6.0.
|
vLLM supports AMD GPUs with ROCm 6.1.
|
||||||
|
|
||||||
Requirements
|
Requirements
|
||||||
------------
|
------------
|
||||||
@ -11,7 +11,7 @@ Requirements
|
|||||||
* OS: Linux
|
* OS: Linux
|
||||||
* Python: 3.8 -- 3.11
|
* Python: 3.8 -- 3.11
|
||||||
* GPU: MI200s (gfx90a), MI300 (gfx942), Radeon RX 7900 series (gfx1100)
|
* GPU: MI200s (gfx90a), MI300 (gfx942), Radeon RX 7900 series (gfx1100)
|
||||||
* ROCm 6.0 and ROCm 5.7
|
* ROCm 6.1
|
||||||
|
|
||||||
Installation options:
|
Installation options:
|
||||||
|
|
||||||
@ -27,10 +27,10 @@ You can build and install vLLM from source.
|
|||||||
|
|
||||||
First, build a docker image from `Dockerfile.rocm <https://github.com/vllm-project/vllm/blob/main/Dockerfile.rocm>`_ and launch a docker container from the image.
|
First, build a docker image from `Dockerfile.rocm <https://github.com/vllm-project/vllm/blob/main/Dockerfile.rocm>`_ and launch a docker container from the image.
|
||||||
|
|
||||||
`Dockerfile.rocm <https://github.com/vllm-project/vllm/blob/main/Dockerfile.rocm>`_ uses ROCm 6.0 by default, but also supports ROCm 5.7.
|
`Dockerfile.rocm <https://github.com/vllm-project/vllm/blob/main/Dockerfile.rocm>`_ uses ROCm 6.1 by default, but also supports ROCm 5.7 and 6.0 in older vLLM branches.
|
||||||
It provides flexibility to customize the build of docker image using the following arguments:
|
It provides flexibility to customize the build of docker image using the following arguments:
|
||||||
|
|
||||||
* `BASE_IMAGE`: specifies the base image used when running ``docker build``, specifically the PyTorch on ROCm base image. We have tested ROCm 5.7 and ROCm 6.0. The default is `rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1`
|
* `BASE_IMAGE`: specifies the base image used when running ``docker build``, specifically the PyTorch on ROCm base image.
|
||||||
* `BUILD_FA`: specifies whether to build CK flash-attention. The default is 1. For `Radeon RX 7900 series (gfx1100) <https://rocm.docs.amd.com/projects/radeon/en/latest/index.html>`_, this should be set to 0 before flash-attention supports this target.
|
* `BUILD_FA`: specifies whether to build CK flash-attention. The default is 1. For `Radeon RX 7900 series (gfx1100) <https://rocm.docs.amd.com/projects/radeon/en/latest/index.html>`_, this should be set to 0 before flash-attention supports this target.
|
||||||
* `FX_GFX_ARCHS`: specifies the GFX architecture that is used to build CK flash-attention, for example, `gfx90a;gfx942` for MI200 and MI300. The default is `gfx90a;gfx942`
|
* `FX_GFX_ARCHS`: specifies the GFX architecture that is used to build CK flash-attention, for example, `gfx90a;gfx942` for MI200 and MI300. The default is `gfx90a;gfx942`
|
||||||
* `FA_BRANCH`: specifies the branch used to build the CK flash-attention in `ROCm's flash-attention repo <https://github.com/ROCmSoftwarePlatform/flash-attention>`_. The default is `ae7928c`
|
* `FA_BRANCH`: specifies the branch used to build the CK flash-attention in `ROCm's flash-attention repo <https://github.com/ROCmSoftwarePlatform/flash-attention>`_. The default is `ae7928c`
|
||||||
@ -39,24 +39,17 @@ It provides flexibility to customize the build of docker image using the followi
|
|||||||
Their values can be passed in when running ``docker build`` with ``--build-arg`` options.
|
Their values can be passed in when running ``docker build`` with ``--build-arg`` options.
|
||||||
|
|
||||||
|
|
||||||
To build vllm on ROCm 6.0 for MI200 and MI300 series, you can use the default:
|
To build vllm on ROCm 6.1 for MI200 and MI300 series, you can use the default:
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
$ docker build -f Dockerfile.rocm -t vllm-rocm .
|
$ DOCKER_BUILDKIT=1 docker build -f Dockerfile.rocm -t vllm-rocm .
|
||||||
|
|
||||||
To build vllm on ROCm 6.0 for Radeon RX7900 series (gfx1100), you should specify ``BUILD_FA`` as below:
|
To build vllm on ROCm 6.1 for Radeon RX7900 series (gfx1100), you should specify ``BUILD_FA`` as below:
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
$ docker build --build-arg BUILD_FA="0" -f Dockerfile.rocm -t vllm-rocm .
|
$ DOCKER_BUILDKIT=1 docker build --build-arg BUILD_FA="0" -f Dockerfile.rocm -t vllm-rocm .
|
||||||
|
|
||||||
To build docker image for vllm on ROCm 5.7, you can specify ``BASE_IMAGE`` as below:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ docker build --build-arg BASE_IMAGE="rocm/pytorch:rocm5.7_ubuntu22.04_py3.10_pytorch_2.0.1" \
|
|
||||||
-f Dockerfile.rocm -t vllm-rocm .
|
|
||||||
|
|
||||||
To run the above docker image ``vllm-rocm``, use the below command:
|
To run the above docker image ``vllm-rocm``, use the below command:
|
||||||
|
|
||||||
@ -85,25 +78,12 @@ Option 2: Build from source
|
|||||||
0. Install prerequisites (skip if you are already in an environment/docker with the following installed):
|
0. Install prerequisites (skip if you are already in an environment/docker with the following installed):
|
||||||
|
|
||||||
- `ROCm <https://rocm.docs.amd.com/en/latest/deploy/linux/index.html>`_
|
- `ROCm <https://rocm.docs.amd.com/en/latest/deploy/linux/index.html>`_
|
||||||
- `Pytorch <https://pytorch.org/>`_
|
- `PyTorch <https://pytorch.org/>`_
|
||||||
- `hipBLAS <https://rocm.docs.amd.com/projects/hipBLAS/en/latest/install.html>`_
|
- `hipBLAS <https://rocm.docs.amd.com/projects/hipBLAS/en/latest/install.html>`_
|
||||||
|
|
||||||
For installing PyTorch, you can start from a fresh docker image, e.g, `rocm/pytorch:rocm6.1.2_ubuntu20.04_py3.9_pytorch_staging`, `rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1`, `rocm/pytorch-nightly`.
|
For installing PyTorch, you can start from a fresh docker image, e.g, `rocm/pytorch:rocm6.1.2_ubuntu20.04_py3.9_pytorch_staging`, `rocm/pytorch-nightly`.
|
||||||
|
|
||||||
Alternatively, you can install pytorch using pytorch wheels. You can check Pytorch installation guild in Pytorch `Getting Started <https://pytorch.org/get-started/locally/>`_
|
Alternatively, you can install PyTorch using PyTorch wheels. You can check PyTorch installation guild in PyTorch `Getting Started <https://pytorch.org/get-started/locally/>`_
|
||||||
|
|
||||||
For rocm6.0:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ pip3 install torch --index-url https://download.pytorch.org/whl/rocm6.0
|
|
||||||
|
|
||||||
|
|
||||||
For rocm5.7:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ pip install torch --index-url https://download.pytorch.org/whl/rocm5.7
|
|
||||||
|
|
||||||
|
|
||||||
1. Install `Triton flash attention for ROCm <https://github.com/ROCm/triton>`_
|
1. Install `Triton flash attention for ROCm <https://github.com/ROCm/triton>`_
|
||||||
@ -115,8 +95,6 @@ Install ROCm's Triton flash attention (the default triton-mlir branch) following
|
|||||||
Install ROCm's flash attention (v2.0.4) following the instructions from `ROCm/flash-attention <https://github.com/ROCm/flash-attention/tree/flash_attention_for_rocm#amd-gpurocm-support>`_
|
Install ROCm's flash attention (v2.0.4) following the instructions from `ROCm/flash-attention <https://github.com/ROCm/flash-attention/tree/flash_attention_for_rocm#amd-gpurocm-support>`_
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
- If you are using rocm5.7 with pytorch 2.1.0 onwards, you don't need to apply the `hipify_python.patch`. You can build the ROCm flash attention directly.
|
|
||||||
- If you fail to install `ROCm/flash-attention`, try cloning from the commit `6fd2f8e572805681cd67ef8596c7e2ce521ed3c6`.
|
|
||||||
- ROCm's Flash-attention-2 (v2.0.4) does not support sliding windows attention.
|
- ROCm's Flash-attention-2 (v2.0.4) does not support sliding windows attention.
|
||||||
- You might need to downgrade the "ninja" version to 1.10 it is not used when compiling flash-attention-2 (e.g. `pip install ninja==1.10.2.4`)
|
- You might need to downgrade the "ninja" version to 1.10 it is not used when compiling flash-attention-2 (e.g. `pip install ninja==1.10.2.4`)
|
||||||
|
|
||||||
@ -131,7 +109,6 @@ Install ROCm's flash attention (v2.0.4) following the instructions from `ROCm/fl
|
|||||||
|
|
||||||
.. tip::
|
.. tip::
|
||||||
|
|
||||||
- You may need to turn on the ``--enforce-eager`` flag if you experience process hang when running the `benchmark_thoughput.py` script to test your installation.
|
|
||||||
- Triton flash attention is used by default. For benchmarking purposes, it is recommended to run a warm up step before collecting perf numbers.
|
- Triton flash attention is used by default. For benchmarking purposes, it is recommended to run a warm up step before collecting perf numbers.
|
||||||
- To use CK flash-attention, please use this flag ``export VLLM_USE_TRITON_FLASH_ATTN=0`` to turn off triton flash attention.
|
- To use CK flash-attention or PyTorch naive attention, please use this flag ``export VLLM_USE_TRITON_FLASH_ATTN=0`` to turn off triton flash attention.
|
||||||
- The ROCm version of pytorch, ideally, should match the ROCm driver version.
|
- The ROCm version of PyTorch, ideally, should match the ROCm driver version.
|
||||||
|
|||||||
@ -1,15 +0,0 @@
|
|||||||
--- amd_hip_bf16.h 2024-02-06 18:28:58.268699142 +0000
|
|
||||||
+++ amd_hip_bf16.h.new 2024-02-06 18:28:31.988647133 +0000
|
|
||||||
@@ -90,10 +90,10 @@
|
|
||||||
#include "math_fwd.h" // ocml device functions
|
|
||||||
|
|
||||||
#if defined(__HIPCC_RTC__)
|
|
||||||
-#define __HOST_DEVICE__ __device__
|
|
||||||
+#define __HOST_DEVICE__ __device__ static
|
|
||||||
#else
|
|
||||||
#include <climits>
|
|
||||||
-#define __HOST_DEVICE__ __host__ __device__
|
|
||||||
+#define __HOST_DEVICE__ __host__ __device__ static inline
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Since we are using unsigned short to represent data in bfloat16, it can be of different sizes on
|
|
||||||
Loading…
x
Reference in New Issue
Block a user