mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-31 19:37:04 +08:00
Update vllm/attention/backends/mla/utils.py
Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
parent
4880a43d20
commit
aa19f297d2
@ -161,7 +161,7 @@ class MLAImplCommon(AttentionImpl):
|
|||||||
|
|
||||||
def _v_up_proj_and_o_proj(self, x):
|
def _v_up_proj_and_o_proj(self, x):
|
||||||
if envs.VLLM_MLA_PERFORM_MATRIX_ABSORPTION:
|
if envs.VLLM_MLA_PERFORM_MATRIX_ABSORPTION:
|
||||||
return self.o_proj_absored(
|
return self.o_proj_absorbed(
|
||||||
x.reshape(-1, self.num_heads * self.kv_lora_rank))[0]
|
x.reshape(-1, self.num_heads * self.kv_lora_rank))[0]
|
||||||
else:
|
else:
|
||||||
x = torch.einsum("bnl,lnv->bnv", x, self.W_UV)
|
x = torch.einsum("bnl,lnv->bnv", x, self.W_UV)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user