mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 03:05:02 +08:00
[Hardware][Intel GPU] upgrade IPEX dependency to 2.6.10. (#14564)
Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
parent
07b4b7a37f
commit
c6e14a61ab
@ -1,4 +1,4 @@
|
|||||||
FROM intel/oneapi-basekit:2024.2.1-0-devel-ubuntu22.04 AS vllm-base
|
FROM intel/deep-learning-essentials:2025.0.1-0-devel-ubuntu22.04 AS vllm-base
|
||||||
|
|
||||||
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/intel-oneapi-archive-keyring.gpg > /dev/null && \
|
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/intel-oneapi-archive-keyring.gpg > /dev/null && \
|
||||||
echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " | tee /etc/apt/sources.list.d/oneAPI.list && \
|
echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " | tee /etc/apt/sources.list.d/oneAPI.list && \
|
||||||
@ -21,7 +21,8 @@ RUN apt-get update -y && \
|
|||||||
python3 \
|
python3 \
|
||||||
python3-dev \
|
python3-dev \
|
||||||
python3-pip \
|
python3-pip \
|
||||||
# vim \
|
libze-intel-gpu-dev \
|
||||||
|
libze-intel-gpu1 \
|
||||||
wget
|
wget
|
||||||
|
|
||||||
WORKDIR /workspace/vllm
|
WORKDIR /workspace/vllm
|
||||||
@ -32,19 +33,10 @@ RUN --mount=type=cache,target=/root/.cache/pip \
|
|||||||
pip install --no-cache-dir \
|
pip install --no-cache-dir \
|
||||||
-r requirements/xpu.txt
|
-r requirements/xpu.txt
|
||||||
|
|
||||||
RUN git clone https://github.com/intel/pti-gpu && \
|
|
||||||
cd pti-gpu/sdk && \
|
|
||||||
git checkout 6c491f07a777ed872c2654ca9942f1d0dde0a082 && \
|
|
||||||
mkdir build && \
|
|
||||||
cd build && \
|
|
||||||
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/icpx_toolchain.cmake -DBUILD_TESTING=OFF .. && \
|
|
||||||
make -j && \
|
|
||||||
cmake --install . --config Release --prefix "/usr/local"
|
|
||||||
|
|
||||||
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/"
|
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/"
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
ARG GIT_REPO_CHECK
|
ARG GIT_REPO_CHECK=0
|
||||||
RUN --mount=type=bind,source=.git,target=.git \
|
RUN --mount=type=bind,source=.git,target=.git \
|
||||||
if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh; fi
|
if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh; fi
|
||||||
|
|
||||||
@ -54,6 +46,12 @@ RUN --mount=type=cache,target=/root/.cache/pip \
|
|||||||
--mount=type=bind,source=.git,target=.git \
|
--mount=type=bind,source=.git,target=.git \
|
||||||
python3 setup.py install
|
python3 setup.py install
|
||||||
|
|
||||||
|
# Please refer xpu doc, we need manually install intel-extension-for-pytorch 2.6.10+xpu due to there are some conflict dependencies with torch 2.6.0+xpu
|
||||||
|
# FIXME: This will be fix in ipex 2.7. just leave this here for awareness.
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||||
|
pip install intel-extension-for-pytorch==2.6.10+xpu \
|
||||||
|
--extra-index-url=https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
|
|
||||||
FROM vllm-base AS vllm-openai
|
FROM vllm-base AS vllm-openai
|
||||||
|
|||||||
@ -9,7 +9,7 @@ There are no pre-built wheels or images for this device, so you must build vLLM
|
|||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
- Supported Hardware: Intel Data Center GPU, Intel ARC GPU
|
- Supported Hardware: Intel Data Center GPU, Intel ARC GPU
|
||||||
- OneAPI requirements: oneAPI 2024.2
|
- OneAPI requirements: oneAPI 2025.0
|
||||||
|
|
||||||
## Set up using Python
|
## Set up using Python
|
||||||
|
|
||||||
@ -19,21 +19,27 @@ Currently, there are no pre-built XPU wheels.
|
|||||||
|
|
||||||
### Build wheel from source
|
### Build wheel from source
|
||||||
|
|
||||||
- First, install required driver and intel OneAPI 2024.2 or later.
|
- First, install required driver and Intel OneAPI 2025.0 or later.
|
||||||
- Second, install Python packages for vLLM XPU backend building:
|
- Second, install Python packages for vLLM XPU backend building:
|
||||||
|
|
||||||
```console
|
```console
|
||||||
source /opt/intel/oneapi/setvars.sh
|
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install -v -r requirements/xpu.txt
|
pip install -v -r requirements/xpu.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
- Finally, build and install vLLM XPU backend:
|
- Then, build and install vLLM XPU backend:
|
||||||
|
|
||||||
```console
|
```console
|
||||||
VLLM_TARGET_DEVICE=xpu python setup.py install
|
VLLM_TARGET_DEVICE=xpu python setup.py install
|
||||||
```
|
```
|
||||||
|
|
||||||
|
- Finally, due to a known issue of conflict dependency(oneapi related) in torch-xpu 2.6 and ipex-xpu 2.6, we install ipex here. This will be fixed in the ipex-xpu 2.7.
|
||||||
|
|
||||||
|
```console
|
||||||
|
pip install intel-extension-for-pytorch==2.6.10+xpu \
|
||||||
|
--extra-index-url=https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
|
```
|
||||||
|
|
||||||
:::{note}
|
:::{note}
|
||||||
- FP16 is the default data type in the current XPU backend. The BF16 data
|
- FP16 is the default data type in the current XPU backend. The BF16 data
|
||||||
type is supported on Intel Data Center GPU, not supported on Intel Arc GPU yet.
|
type is supported on Intel Data Center GPU, not supported on Intel Arc GPU yet.
|
||||||
@ -59,7 +65,7 @@ $ docker run -it \
|
|||||||
|
|
||||||
## Supported features
|
## Supported features
|
||||||
|
|
||||||
XPU platform supports tensor-parallel inference/serving and also supports pipeline parallel as a beta feature for online serving. We requires Ray as the distributed runtime backend. For example, a reference execution likes following:
|
XPU platform supports **tensor parallel** inference/serving and also supports **pipeline parallel** as a beta feature for online serving. We requires Ray as the distributed runtime backend. For example, a reference execution likes following:
|
||||||
|
|
||||||
```console
|
```console
|
||||||
python -m vllm.entrypoints.openai.api_server \
|
python -m vllm.entrypoints.openai.api_server \
|
||||||
@ -73,3 +79,5 @@ python -m vllm.entrypoints.openai.api_server \
|
|||||||
```
|
```
|
||||||
|
|
||||||
By default, a ray instance will be launched automatically if no existing one is detected in system, with `num-gpus` equals to `parallel_config.world_size`. We recommend properly starting a ray cluster before execution, referring to the <gh-file:examples/online_serving/run_cluster.sh> helper script.
|
By default, a ray instance will be launched automatically if no existing one is detected in system, with `num-gpus` equals to `parallel_config.world_size`. We recommend properly starting a ray cluster before execution, referring to the <gh-file:examples/online_serving/run_cluster.sh> helper script.
|
||||||
|
|
||||||
|
There are some new features coming with ipex-xpu 2.6, eg: **chunked prefill**, **V1 engine support**, **lora**, **MoE**, etc.
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
# Common dependencies
|
# Common dependencies
|
||||||
-r common.txt
|
-r common.txt
|
||||||
|
|
||||||
ray >= 2.9
|
ray>=2.9
|
||||||
cmake>=3.26
|
cmake>=3.26
|
||||||
ninja
|
ninja
|
||||||
packaging
|
packaging
|
||||||
@ -9,9 +9,16 @@ setuptools-scm>=8
|
|||||||
setuptools>=75.8.0
|
setuptools>=75.8.0
|
||||||
wheel
|
wheel
|
||||||
jinja2
|
jinja2
|
||||||
|
datasets # for benchmark scripts
|
||||||
|
|
||||||
torch @ https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp310-cp310-linux_x86_64.whl
|
torch==2.6.0+xpu
|
||||||
intel-extension-for-pytorch @ https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/intel_extension_for_pytorch-2.5.10%2Bgit9d489a8-cp310-cp310-linux_x86_64.whl
|
torchaudio
|
||||||
oneccl_bind_pt @ https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp310-cp310-linux_x86_64.whl
|
torchvision
|
||||||
|
pytorch-triton-xpu
|
||||||
|
--extra-index-url=https://download.pytorch.org/whl/xpu
|
||||||
|
|
||||||
triton-xpu == 3.0.0b1
|
# Please refer xpu doc, we need manually install intel-extension-for-pytorch 2.6.10+xpu due to there are some conflict dependencies with torch 2.6.0+xpu
|
||||||
|
# FIXME: This will be fix in ipex 2.7. just leave this here for awareness.
|
||||||
|
# intel-extension-for-pytorch==2.6.10+xpu
|
||||||
|
oneccl_bind_pt==2.6.0+xpu
|
||||||
|
--extra-index-url=https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
Loading…
x
Reference in New Issue
Block a user