diff --git a/vllm/model_executor/layers/rotary_embedding/llama4_vision_rope.py b/vllm/model_executor/layers/rotary_embedding/llama4_vision_rope.py index 8717280353068..c98a426a2a1ef 100644 --- a/vllm/model_executor/layers/rotary_embedding/llama4_vision_rope.py +++ b/vllm/model_executor/layers/rotary_embedding/llama4_vision_rope.py @@ -59,7 +59,9 @@ class Llama4VisionRotaryEmbedding(RotaryEmbedding): key: Optional[torch.Tensor] = None, ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: assert key is not None - self._match_cos_sin_cache_dtype(query) + # self.cos_sin_cache here is complex tensor so we cannot cast into + # query's dtype directly with self._match_cos_sin_cache_dtype + self.cos_sin_cache: torch.Tensor = self.cos_sin_cache.to(query.device) query_ = torch.view_as_complex(query.float().reshape( *query.shape[:-1], -1, 2)) key_ = torch.view_as_complex(key.float().reshape(