[XPU] Whisper model support on XPU Platform (#25123)

Signed-off-by: chzhang <chaojun.zhang@intel.com>
This commit is contained in:
Chaojun Zhang 2025-09-18 12:30:10 +08:00 committed by GitHub
parent bec060fd99
commit 3bc18127ff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 3 additions and 3 deletions

View File

@ -391,8 +391,8 @@ class MultiHeadAttention(nn.Module):
backend = _Backend.FLASH_ATTN
use_upstream_fa = True
if current_platform.is_rocm():
# currently, only torch_sdpa is supported on rocm
if current_platform.is_rocm() or current_platform.is_xpu():
# currently, only torch_sdpa is supported on rocm/xpu
self.attn_backend = _Backend.TORCH_SDPA
else:

View File

@ -282,7 +282,7 @@ def bind_kv_cache(
# TODO - analyze where runner_kv_caches is used and the right
# way to ensure it properly reflects multiple attention layers
# in the same decoder block.
if current_platform.is_cuda():
if current_platform.is_cuda() or current_platform.is_xpu():
# We know that the GPU runner is not impacted by this
# case. Some test code depends on runner_kv_caches, but
# not in a way that's impacted by ignoring this.