[Bugfix][ROCm] Fix ViT rotary embeddings for torch.compile compatibility on ROCm (#27748)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
2026-01-26 15:44:30 +08:00 · 2025-11-04 09:12:19 +08:00 · 2025-11-04 09:12:19 +08:00 · b13a447546
commit b13a447546
parent 7956b0c0bc
2 changed files with 8 additions and 5 deletions
--- a/vllm/model_executor/layers/rotary_embedding/common.py
+++ b/vllm/model_executor/layers/rotary_embedding/common.py
@ -77,7 +77,11 @@ def dispatch_rotary_emb_function(
    if current_platform.is_cuda():
        return apply_rotary_emb

-    if current_platform.is_rocm():
+    # if torch compile is not enabled
+    # use rotary embedding function from flash_attn package
+    # otherwise use the naive pytorch embedding implementation
+    # is faster when torch compile is enabled.
+    if current_platform.is_rocm() and not torch.compiler.is_compiling():
        if find_spec("flash_attn") is not None:
            from flash_attn.ops.triton.rotary import apply_rotary

@ -87,11 +91,10 @@ def dispatch_rotary_emb_function(
                "flash_attn is not installed. Falling back to PyTorch "
                "implementation for rotary embeddings."
            )
-
    if default is not None:
        return default
-    else:
-        return apply_rotary_emb_torch
+
+    return apply_rotary_emb_torch


 # yarn functions
--- a/vllm/model_executor/models/glm4_1v.py
+++ b/vllm/model_executor/models/glm4_1v.py
@ -370,7 +370,7 @@ class Glm4vVisionAttention(nn.Module):
                cu_seqlens_k=cu_seqlens,
                max_seqlen_q=max_seqlen,
                max_seqlen_k=max_seqlen,
-                dropout_p=0,
+                dropout_p=0.0,
                causal=False,
            )