From f34eca5f0141088fef5b81a933f9869e1a04f188 Mon Sep 17 00:00:00 2001 From: TJian Date: Wed, 17 Dec 2025 07:32:43 +0800 Subject: [PATCH] [ROCm] [Bugfix] Fix torch sdpa hallucination (#30789) Signed-off-by: tjtanaa (cherry picked from commit 2410132bb1f9faa5b252fad3f2b83dc926946b08) --- vllm/attention/ops/vit_attn_wrappers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vllm/attention/ops/vit_attn_wrappers.py b/vllm/attention/ops/vit_attn_wrappers.py index 46c7d83dfa5c2..892c4209c01e0 100644 --- a/vllm/attention/ops/vit_attn_wrappers.py +++ b/vllm/attention/ops/vit_attn_wrappers.py @@ -16,6 +16,7 @@ import einops import torch import torch.nn.functional as F +from vllm.platforms import current_platform from vllm.utils.torch_utils import direct_register_custom_op @@ -89,6 +90,13 @@ def torch_sdpa_wrapper( v: torch.Tensor, cu_seqlens: torch.Tensor, ) -> torch.Tensor: + # Never remove the contiguous logic for ROCm + # Without it, hallucinations occur with the backend + if current_platform.is_rocm(): + q = q.contiguous() + k = k.contiguous() + v = v.contiguous() + outputs = [] lens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()