From bfa2c0bbb9b4bf3f893a84f429a79c7905574aa5 Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Tue, 23 Dec 2025 15:48:01 -0600 Subject: [PATCH] [ROCm][Bugfix] Fix RuntimeError in MMEncoderAttention by replacing .view() with .reshape() (#31203) Signed-off-by: Andreas Karatzas --- tests/models/multimodal/conftest.py | 2 +- vllm/attention/layers/mm_encoder_attention.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/models/multimodal/conftest.py b/tests/models/multimodal/conftest.py index 4243298cdc896..31d99218c8276 100644 --- a/tests/models/multimodal/conftest.py +++ b/tests/models/multimodal/conftest.py @@ -19,7 +19,7 @@ def pytest_collection_modifyitems(config, items): return # Disable Flash/MemEfficient SDP on ROCm to avoid HF Transformers - # accuracy issues + # accuracy issues: https://github.com/vllm-project/vllm/issues/30167 # TODO: Remove once ROCm SDP accuracy issues are resolved on HuggingFace torch.backends.cuda.enable_flash_sdp(False) torch.backends.cuda.enable_mem_efficient_sdp(False) diff --git a/vllm/attention/layers/mm_encoder_attention.py b/vllm/attention/layers/mm_encoder_attention.py index 25f54cc867b5a..1c1623b13f55a 100644 --- a/vllm/attention/layers/mm_encoder_attention.py +++ b/vllm/attention/layers/mm_encoder_attention.py @@ -136,7 +136,7 @@ class MMEncoderAttention(CustomOp): cu_seqlens=cu_seqlens, ) if is_reshaped: - output = output.view(bsz, q_len, -1) + output = output.reshape(bsz, q_len, -1) return output def _forward_fa( @@ -174,7 +174,7 @@ class MMEncoderAttention(CustomOp): fa_version=self._fa_version, ) if is_reshaped: - output = output.view(bsz, q_len, -1) + output = output.reshape(bsz, q_len, -1) return output def forward_native(