[XPU] Update latest IPEX 2.8 release (#27735)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
Kunshang Ji 2025-10-30 11:17:13 +08:00 committed by GitHub
parent d7fb10c574
commit b5bae42f91
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 14 additions and 20 deletions

View File

@ -20,7 +20,10 @@ trap remove_docker_container EXIT
# Run the image and test offline inference/tensor parallel
docker run \
--device /dev/dri \
--device /dev/dri:/dev/dri \
--net=host \
--ipc=host \
--privileged \
-v /dev/dri/by-path:/dev/dri/by-path \
--entrypoint="" \
-e "HF_TOKEN=${HF_TOKEN}" \
@ -42,7 +45,7 @@ docker run \
pytest -v -s v1/sample --ignore=v1/sample/test_logprobs.py --ignore=v1/sample/test_logprobs_e2e.py
pytest -v -s v1/worker --ignore=v1/worker/test_gpu_model_runner.py
pytest -v -s v1/structured_output
pytest -v -s v1/spec_decode --ignore=v1/spec_decode/test_max_len.py --ignore=v1/spec_decode/test_tree_attention.py
pytest -v -s v1/spec_decode --ignore=v1/spec_decode/test_max_len.py --ignore=v1/spec_decode/test_tree_attention.py --ignore=v1/spec_decode/test_speculators_eagle3.py
pytest -v -s v1/kv_connector/unit --ignore=v1/kv_connector/unit/test_multi_connector.py --ignore=v1/kv_connector/unit/test_nixl_connector.py --ignore=v1/kv_connector/unit/test_shared_storage_connector.py
pytest -v -s v1/test_serial_utils.py
'

View File

@ -56,8 +56,10 @@ docker build -f docker/Dockerfile.xpu -t vllm-xpu-env --shm-size=4g .
docker run -it \
--rm \
--network=host \
--device /dev/dri \
--device /dev/dri:/dev/dri \
-v /dev/dri/by-path:/dev/dri/by-path \
--ipc=host \
--privileged \
vllm-xpu-env
```

View File

@ -15,4 +15,4 @@ torchaudio
torchvision
--extra-index-url=https://download.pytorch.org/whl/xpu
intel-extension-for-pytorch @ https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/intel_extension_for_pytorch-2.8.10.post0%2Bxpu-cp312-cp312-linux_x86_64.whl
intel-extension-for-pytorch @ https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/intel_extension_for_pytorch-2.8.10.post1%2Bxpu-cp312-cp312-linux_x86_64.whl

View File

@ -151,7 +151,9 @@ class ipex_ops:
def rms_norm(
input: torch.Tensor, weight: torch.Tensor, epsilon: float
) -> torch.Tensor:
return ipex.llm.functional.rms_norm(input, weight, epsilon)
out = torch.empty_like(input)
torch.ops.torch_ipex.rms_norm_vllm(out, input.contiguous(), weight, epsilon)
return out
@staticmethod
def fused_add_rms_norm(
@ -160,10 +162,7 @@ class ipex_ops:
weight: torch.Tensor,
epsilon: float,
) -> None:
tmp = ipex.llm.functional.add_rms_norm(
residual, input, weight, None, epsilon, True
)
input.copy_(tmp)
torch.ops.torch_ipex.fused_add_rms_norm_vllm(input, residual, weight, epsilon)
@staticmethod
def varlen_attention(
@ -296,16 +295,6 @@ class ipex_ops:
num_splits=0,
s_aux: torch.Tensor | None = None,
):
if cu_seqlens_k is None:
# cu_seqlens_k is not used in ipex kernel.
cu_seqlens_k = torch.cumsum(seqused_k, dim=0)
cu_seqlens_k = torch.cat(
[
torch.tensor([0], device=seqused_k.device, dtype=torch.int32),
cu_seqlens_k,
]
).to(torch.int32)
real_window_size: tuple[int, int]
if window_size is None:
real_window_size = (-1, -1)
@ -318,7 +307,7 @@ class ipex_ops:
k,
v,
cu_seqlens_q,
cu_seqlens_k,
seqused_k,
max_seqlen_q,
max_seqlen_k,
softmax_scale,