From 2c5ebec064bf3684c8f02b70b5963615daa81b28 Mon Sep 17 00:00:00 2001
From: Liangliang Ma <liangliang.ma@intel.com>
Date: Mon, 7 Jul 2025 16:16:40 +0800
Subject: [PATCH] [XPU][CI] add v1/core test in xpu hardware ci (#20537)

Signed-off-by: Ma, Liangliang <liangliang.ma@intel.com>
---
 .buildkite/scripts/hardware_ci/run-xpu-test.sh | 6 ++++--
 docker/Dockerfile.xpu                          | 2 +-
 vllm/platforms/xpu.py                          | 6 +-----
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/.buildkite/scripts/hardware_ci/run-xpu-test.sh b/.buildkite/scripts/hardware_ci/run-xpu-test.sh
index cf3aaab8493b..a23abdc1ed6c 100644
--- a/.buildkite/scripts/hardware_ci/run-xpu-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-xpu-test.sh
@@ -11,8 +11,8 @@ container_name="xpu_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head
 docker build -t ${image_name} -f docker/Dockerfile.xpu .
 
 # Setup cleanup
-remove_docker_container() { 
-  docker rm -f "${container_name}" || true; 
+remove_docker_container() {
+  docker rm -f "${container_name}" || true;
   docker image rm -f "${image_name}" || true;
   docker system prune -f || true;
 }
@@ -27,4 +27,6 @@ docker run \
     "${image_name}" \
     sh -c '
     VLLM_USE_V1=1 python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager
+    cd tests
+    pytest -v -s v1/core
 '
diff --git a/docker/Dockerfile.xpu b/docker/Dockerfile.xpu
index 466ba9833363..41b4c42e4c4b 100644
--- a/docker/Dockerfile.xpu
+++ b/docker/Dockerfile.xpu
@@ -47,7 +47,7 @@ FROM vllm-base AS vllm-openai
 
 # install additional dependencies for openai api server
 RUN --mount=type=cache,target=/root/.cache/pip \
-    pip install accelerate hf_transfer 'modelscope!=1.15.0'
+    pip install accelerate hf_transfer pytest 'modelscope!=1.15.0'
 
 ENV VLLM_USAGE_SOURCE production-docker-image \
     TRITON_XPU_PROFILE 1
diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py
index de715fd894c3..39828d321ede 100644
--- a/vllm/platforms/xpu.py
+++ b/vllm/platforms/xpu.py
@@ -93,10 +93,6 @@ class XPUPlatform(Platform):
                     "mode.")
                 model_config.enforce_eager = True
 
-        if vllm_config.speculative_config is not None:
-            raise NotImplementedError(
-                "XPU does not support speculative decoding")
-
         if vllm_config.device_config is not None:
             assert vllm_config.device_config.device_type == "xpu"
 
@@ -181,4 +177,4 @@ class XPUPlatform(Platform):
 
     @classmethod
     def device_count(cls) -> int:
-        return torch.xpu.device_count()
\ No newline at end of file
+        return torch.xpu.device_count()