From c6e14a61ab90370ca924907d3298ecddbb840884 Mon Sep 17 00:00:00 2001 From: Kunshang Ji Date: Tue, 11 Mar 2025 10:11:47 -0700 Subject: [PATCH] [Hardware][Intel GPU] upgrade IPEX dependency to 2.6.10. (#14564) Signed-off-by: Kunshang Ji --- Dockerfile.xpu | 22 +++++++++---------- .../installation/gpu/xpu.inc.md | 18 ++++++++++----- requirements/xpu.txt | 17 +++++++++----- 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/Dockerfile.xpu b/Dockerfile.xpu index 530809bcd4df..672a494eef99 100644 --- a/Dockerfile.xpu +++ b/Dockerfile.xpu @@ -1,4 +1,4 @@ -FROM intel/oneapi-basekit:2024.2.1-0-devel-ubuntu22.04 AS vllm-base +FROM intel/deep-learning-essentials:2025.0.1-0-devel-ubuntu22.04 AS vllm-base RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/intel-oneapi-archive-keyring.gpg > /dev/null && \ echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " | tee /etc/apt/sources.list.d/oneAPI.list && \ @@ -21,7 +21,8 @@ RUN apt-get update -y && \ python3 \ python3-dev \ python3-pip \ - # vim \ + libze-intel-gpu-dev \ + libze-intel-gpu1 \ wget WORKDIR /workspace/vllm @@ -32,19 +33,10 @@ RUN --mount=type=cache,target=/root/.cache/pip \ pip install --no-cache-dir \ -r requirements/xpu.txt -RUN git clone https://github.com/intel/pti-gpu && \ - cd pti-gpu/sdk && \ - git checkout 6c491f07a777ed872c2654ca9942f1d0dde0a082 && \ - mkdir build && \ - cd build && \ - cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/icpx_toolchain.cmake -DBUILD_TESTING=OFF .. && \ - make -j && \ - cmake --install . --config Release --prefix "/usr/local" - ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/" COPY . . -ARG GIT_REPO_CHECK +ARG GIT_REPO_CHECK=0 RUN --mount=type=bind,source=.git,target=.git \ if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh; fi @@ -54,6 +46,12 @@ RUN --mount=type=cache,target=/root/.cache/pip \ --mount=type=bind,source=.git,target=.git \ python3 setup.py install +# Please refer xpu doc, we need manually install intel-extension-for-pytorch 2.6.10+xpu due to there are some conflict dependencies with torch 2.6.0+xpu +# FIXME: This will be fix in ipex 2.7. just leave this here for awareness. +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install intel-extension-for-pytorch==2.6.10+xpu \ + --extra-index-url=https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ + CMD ["/bin/bash"] FROM vllm-base AS vllm-openai diff --git a/docs/source/getting_started/installation/gpu/xpu.inc.md b/docs/source/getting_started/installation/gpu/xpu.inc.md index 9678c25b1dd8..5a47b16f7766 100644 --- a/docs/source/getting_started/installation/gpu/xpu.inc.md +++ b/docs/source/getting_started/installation/gpu/xpu.inc.md @@ -9,7 +9,7 @@ There are no pre-built wheels or images for this device, so you must build vLLM ## Requirements - Supported Hardware: Intel Data Center GPU, Intel ARC GPU -- OneAPI requirements: oneAPI 2024.2 +- OneAPI requirements: oneAPI 2025.0 ## Set up using Python @@ -19,21 +19,27 @@ Currently, there are no pre-built XPU wheels. ### Build wheel from source -- First, install required driver and intel OneAPI 2024.2 or later. +- First, install required driver and Intel OneAPI 2025.0 or later. - Second, install Python packages for vLLM XPU backend building: ```console -source /opt/intel/oneapi/setvars.sh pip install --upgrade pip pip install -v -r requirements/xpu.txt ``` -- Finally, build and install vLLM XPU backend: +- Then, build and install vLLM XPU backend: ```console VLLM_TARGET_DEVICE=xpu python setup.py install ``` +- Finally, due to a known issue of conflict dependency(oneapi related) in torch-xpu 2.6 and ipex-xpu 2.6, we install ipex here. This will be fixed in the ipex-xpu 2.7. + +```console +pip install intel-extension-for-pytorch==2.6.10+xpu \ + --extra-index-url=https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +``` + :::{note} - FP16 is the default data type in the current XPU backend. The BF16 data type is supported on Intel Data Center GPU, not supported on Intel Arc GPU yet. @@ -59,7 +65,7 @@ $ docker run -it \ ## Supported features -XPU platform supports tensor-parallel inference/serving and also supports pipeline parallel as a beta feature for online serving. We requires Ray as the distributed runtime backend. For example, a reference execution likes following: +XPU platform supports **tensor parallel** inference/serving and also supports **pipeline parallel** as a beta feature for online serving. We requires Ray as the distributed runtime backend. For example, a reference execution likes following: ```console python -m vllm.entrypoints.openai.api_server \ @@ -73,3 +79,5 @@ python -m vllm.entrypoints.openai.api_server \ ``` By default, a ray instance will be launched automatically if no existing one is detected in system, with `num-gpus` equals to `parallel_config.world_size`. We recommend properly starting a ray cluster before execution, referring to the helper script. + +There are some new features coming with ipex-xpu 2.6, eg: **chunked prefill**, **V1 engine support**, **lora**, **MoE**, etc. diff --git a/requirements/xpu.txt b/requirements/xpu.txt index 265205957be4..0e3252f02d35 100644 --- a/requirements/xpu.txt +++ b/requirements/xpu.txt @@ -1,7 +1,7 @@ # Common dependencies -r common.txt -ray >= 2.9 +ray>=2.9 cmake>=3.26 ninja packaging @@ -9,9 +9,16 @@ setuptools-scm>=8 setuptools>=75.8.0 wheel jinja2 +datasets # for benchmark scripts -torch @ https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp310-cp310-linux_x86_64.whl -intel-extension-for-pytorch @ https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/intel_extension_for_pytorch-2.5.10%2Bgit9d489a8-cp310-cp310-linux_x86_64.whl -oneccl_bind_pt @ https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp310-cp310-linux_x86_64.whl +torch==2.6.0+xpu +torchaudio +torchvision +pytorch-triton-xpu +--extra-index-url=https://download.pytorch.org/whl/xpu -triton-xpu == 3.0.0b1 +# Please refer xpu doc, we need manually install intel-extension-for-pytorch 2.6.10+xpu due to there are some conflict dependencies with torch 2.6.0+xpu +# FIXME: This will be fix in ipex 2.7. just leave this here for awareness. +# intel-extension-for-pytorch==2.6.10+xpu +oneccl_bind_pt==2.6.0+xpu +--extra-index-url=https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ \ No newline at end of file