From b9ce9a301341a4f128f434c874f452fb7767e94b Mon Sep 17 00:00:00 2001 From: Fanli Lin Date: Wed, 12 Nov 2025 11:13:21 +0800 Subject: [PATCH] [BugFix] Add fallback path in `apply_rotary_pos_emb_flashattn` for non-cuda platforms (#28447) Signed-off-by: Lin, Fanli --- vllm/model_executor/models/keye.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vllm/model_executor/models/keye.py b/vllm/model_executor/models/keye.py index aa0134badc40..2998c87918a9 100644 --- a/vllm/model_executor/models/keye.py +++ b/vllm/model_executor/models/keye.py @@ -346,6 +346,13 @@ def apply_rotary_pos_emb_flashatt( from vllm.vllm_flash_attn.layers.rotary import apply_rotary_emb elif current_platform.is_rocm(): from flash_attn.ops.triton.rotary import apply_rotary as apply_rotary_emb + else: + # For other platforms, use PyTorch fallback + from vllm.model_executor.layers.rotary_embedding.common import ( + apply_rotary_emb_torch, + ) + + apply_rotary_emb = partial(apply_rotary_emb_torch, is_neox_style=True) q_embed = apply_rotary_emb(q.float(), cos.float(), sin.float()).type_as(q) k_embed = apply_rotary_emb(k.float(), cos.float(), sin.float()).type_as(k)