mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-19 05:35:01 +08:00
[Attention] Remove slow setattr in MLA (#14769)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
parent
02fcaa3d0a
commit
d47807ba08
@ -161,8 +161,13 @@ class RotaryEmbedding(CustomOp):
|
|||||||
) -> Tuple[torch.Tensor, torch.Tensor]:
|
) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||||
from vllm import _custom_ops as ops
|
from vllm import _custom_ops as ops
|
||||||
|
|
||||||
|
# __setattr__ in nn.Module (called by `self.cos_sin_cache = ...`)
|
||||||
|
# is expensive, so avoid calling it if possible
|
||||||
|
if self.cos_sin_cache.device != query.device or \
|
||||||
|
self.cos_sin_cache.dtype != query.dtype:
|
||||||
self.cos_sin_cache = self.cos_sin_cache.to(query.device,
|
self.cos_sin_cache = self.cos_sin_cache.to(query.device,
|
||||||
dtype=query.dtype)
|
dtype=query.dtype)
|
||||||
|
|
||||||
# ops.rotary_embedding()/batched_rotary_embedding()
|
# ops.rotary_embedding()/batched_rotary_embedding()
|
||||||
# are in-place operations that update the query and key tensors.
|
# are in-place operations that update the query and key tensors.
|
||||||
if offsets is not None:
|
if offsets is not None:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user