mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-04 12:46:13 +08:00
Fix mla prefill context performance (#13897)
Signed-off-by: ZhongYingMatrix <zhongyingmatrix@gmail.com>
This commit is contained in:
parent
e642ec962c
commit
9f1710f1ac
@ -1308,7 +1308,7 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
|
||||
)
|
||||
|
||||
kv_c_normed = workspace[:toks]\
|
||||
[..., :self.kv_lora_rank].unsqueeze(1)
|
||||
[..., :self.kv_lora_rank]
|
||||
k_pe = workspace[:toks]\
|
||||
[..., self.kv_lora_rank:].unsqueeze(1)
|
||||
|
||||
|
||||
@ -874,7 +874,7 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]):
|
||||
)
|
||||
|
||||
kv_c_normed = workspace[:toks]\
|
||||
[..., :self.kv_lora_rank].unsqueeze(1)
|
||||
[..., :self.kv_lora_rank]
|
||||
k_pe = workspace[:toks]\
|
||||
[..., self.kv_lora_rank:].unsqueeze(1)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user