mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 17:45:19 +08:00
faster startup of vLLM (#982)
* update --------- Co-authored-by: Robert Irvine <robert@seamlessml.com>
This commit is contained in:
parent
852ef5b4f5
commit
4b5bcf8906
@ -259,8 +259,9 @@ class PagedAttentionWithRoPE(PagedAttention):
|
||||
self.is_neox_style = is_neox_style
|
||||
|
||||
# Create the cos and sin cache.
|
||||
inv_freq = 1.0 / (base**(torch.arange(0, rotary_dim, 2) / rotary_dim))
|
||||
t = torch.arange(max_position).float()
|
||||
inv_freq = 1.0 / (base**(
|
||||
torch.arange(0, rotary_dim, 2, device="cuda") / rotary_dim))
|
||||
t = torch.arange(max_position, device="cuda").float()
|
||||
freqs = torch.einsum("i,j -> ij", t, inv_freq.float())
|
||||
cos = freqs.cos()
|
||||
sin = freqs.sin()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user