Fix mla prefill context performance (#13897)

Signed-off-by: ZhongYingMatrix <zhongyingmatrix@gmail.com>
This commit is contained in:
Ying Zhong 2025-03-07 01:35:49 +08:00 committed by GitHub
parent e642ec962c
commit 9f1710f1ac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 2 additions and 2 deletions

View File

@ -1308,7 +1308,7 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
)
kv_c_normed = workspace[:toks]\
[..., :self.kv_lora_rank].unsqueeze(1)
[..., :self.kv_lora_rank]
k_pe = workspace[:toks]\
[..., self.kv_lora_rank:].unsqueeze(1)

View File

@ -874,7 +874,7 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]):
)
kv_c_normed = workspace[:toks]\
[..., :self.kv_lora_rank].unsqueeze(1)
[..., :self.kv_lora_rank]
k_pe = workspace[:toks]\
[..., self.kv_lora_rank:].unsqueeze(1)