diff --git a/vllm/model_executor/layers/rotary_embedding.py b/vllm/model_executor/layers/rotary_embedding.py index 00f4e66bd13e0..b179a0f00be2e 100644 --- a/vllm/model_executor/layers/rotary_embedding.py +++ b/vllm/model_executor/layers/rotary_embedding.py @@ -32,6 +32,9 @@ from transformers import PretrainedConfig from vllm.model_executor.custom_op import CustomOp from vllm.platforms import current_platform +if current_platform.is_cuda_alike(): + from vllm.vllm_flash_attn.layers.rotary import apply_rotary_emb + def _rotate_neox(x: torch.Tensor) -> torch.Tensor: x1 = x[..., :x.shape[-1] // 2] @@ -78,7 +81,6 @@ def _apply_rotary_emb(x: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor, positional embeddings. """ if current_platform.is_cuda_alike(): - from vllm.vllm_flash_attn.layers.rotary import apply_rotary_emb return apply_rotary_emb(x.unsqueeze(0), cos, sin, not is_neox_style).squeeze(0) else: