[Misc][XPU] Avoid torch compile for XPU platform (#10747)

Signed-off-by: yan ma <yan.ma@intel.com> Co-authored-by: youkaichao <youkaichao@gmail.com>
2026-05-05 23:12:25 +08:00 · 2024-12-03 01:53:55 +08:00 · 2024-12-03 01:53:55 +08:00 · 519cc6ca12
commit 519cc6ca12
parent b45f0d7946
2 changed files with 8 additions and 2 deletions
--- a/.buildkite/run-xpu-test.sh
+++ b/.buildkite/run-xpu-test.sh
@ -12,5 +12,7 @@ remove_docker_container() { docker rm -f xpu-test || true; }
 trap remove_docker_container EXIT
 remove_docker_container

-# Run the image and launch offline inference
-docker run --network host --name xpu-test --device /dev/dri -v /dev/dri/by-path:/dev/dri/by-path --entrypoint="" xpu-test python3 examples/offline_inference.py
+# Run the image and test offline inference/tensor parallel
+docker run -it -d --name xpu-test --device /dev/dri -v /dev/dri/by-path:/dev/dri/by-path xpu-test /bin/bash
+docker exec xpu-test bash -c "python3 examples/offline_inference.py"
+docker exec xpu-test bash -c "python3 examples/offline_inference_cli.py -tp 2" 
--- a/vllm/plugins/init.py
+++ b/vllm/plugins/init.py
@ -4,6 +4,7 @@ import os
 import torch

 import vllm.envs as envs
+from vllm.platforms import current_platform

 logger = logging.getLogger(__name__)

@ -25,6 +26,9 @@ def load_general_plugins():
    os.environ['TORCHINDUCTOR_COMPILE_THREADS'] = '1'
    # see https://github.com/vllm-project/vllm/issues/10619
    torch._inductor.config.compile_threads = 1
+    if current_platform.is_xpu():
+        # see https://github.com/pytorch/pytorch/blob/8cada5cbe5450e17c26fb8b358116785324537b2/torch/_dynamo/config.py#L158  # noqa
+        os.environ['TORCH_COMPILE_DISABLE'] = 'True'
    global plugins_loaded
    if plugins_loaded:
        return