From dbbe0c756a41e5a64d6e364c131fd7d12a56b926 Mon Sep 17 00:00:00 2001 From: Fanli Lin Date: Thu, 13 Nov 2025 13:31:42 +0800 Subject: [PATCH] [XPU] Support Triton path for LoRA operations on XPU (#28511) Signed-off-by: Fanli Lin --- vllm/lora/ops/triton_ops/lora_expand_op.py | 1 + vllm/lora/ops/triton_ops/lora_shrink_op.py | 1 + vllm/platforms/xpu.py | 6 +++++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/vllm/lora/ops/triton_ops/lora_expand_op.py b/vllm/lora/ops/triton_ops/lora_expand_op.py index 7f7d70cdc3a4..311c4b191859 100644 --- a/vllm/lora/ops/triton_ops/lora_expand_op.py +++ b/vllm/lora/ops/triton_ops/lora_expand_op.py @@ -48,6 +48,7 @@ def _lora_expand_kernel( SLICE_NUM: tl.constexpr, SAME_STRIDE: tl.constexpr, USE_GDC: tl.constexpr, + launch_pdl: tl.constexpr, ): cta_n_num = tl.cdiv(N, BLOCK_N) cta_m_num = tl.cdiv(M, BLOCK_M) diff --git a/vllm/lora/ops/triton_ops/lora_shrink_op.py b/vllm/lora/ops/triton_ops/lora_shrink_op.py index e78379cf684a..71bd5e361466 100644 --- a/vllm/lora/ops/triton_ops/lora_shrink_op.py +++ b/vllm/lora/ops/triton_ops/lora_shrink_op.py @@ -46,6 +46,7 @@ def _lora_shrink_kernel( GROUP_SIZE_M: tl.constexpr, SLICE_NUM: tl.constexpr, USE_GDC: tl.constexpr, + launch_pdl: tl.constexpr, ): cta_n_num = tl.cdiv(N, BLOCK_N) cta_m_num = tl.cdiv(M, BLOCK_M) diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index 0309ae0fe962..c629325f76a3 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -101,7 +101,11 @@ class XPUPlatform(Platform): @classmethod def get_punica_wrapper(cls) -> str: - return "vllm.lora.punica_wrapper.punica_xpu.PunicaWrapperXPU" + xpu_use_triton_kernel = os.getenv("XPU_USE_TRITON_KERNEL", "0") == "1" + if not xpu_use_triton_kernel: + return "vllm.lora.punica_wrapper.punica_xpu.PunicaWrapperXPU" + else: + return "vllm.lora.punica_wrapper.punica_gpu.PunicaWrapperGPU" @classmethod def get_device_total_memory(cls, device_id: int = 0) -> int: