mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 13:35:48 +08:00
[FIXBUG] Qwen3VL hallucinations without Contiguous on Torch.SDPA (#27744)
Signed-off-by: JartX <sagformas@epdcenter.es> Co-authored-by: Lukas Geiger <lukas.geiger94@gmail.com>
This commit is contained in:
parent
1da3309ace
commit
7568a282b9
@ -428,6 +428,14 @@ class Qwen2_5_VisionAttention(nn.Module):
|
|||||||
)
|
)
|
||||||
elif self.attn_backend == _Backend.TORCH_SDPA:
|
elif self.attn_backend == _Backend.TORCH_SDPA:
|
||||||
# Execute attention entry by entry for speed & less VRAM.
|
# Execute attention entry by entry for speed & less VRAM.
|
||||||
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
|
# Never remove the next contiguous logic
|
||||||
|
# Without it, hallucinations occur with the backend
|
||||||
|
if current_platform.is_rocm():
|
||||||
|
q = q.contiguous()
|
||||||
|
k = k.contiguous()
|
||||||
|
v = v.contiguous()
|
||||||
outputs = []
|
outputs = []
|
||||||
for i in range(1, len(cu_seqlens)):
|
for i in range(1, len(cu_seqlens)):
|
||||||
start_idx = cu_seqlens[i - 1]
|
start_idx = cu_seqlens[i - 1]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user