mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-08 13:41:54 +08:00
[Bugfix] Fix for issue 17396 (#18773)
Signed-off-by: Fred Reiss <frreiss@us.ibm.com>
This commit is contained in:
parent
0f5e0d567e
commit
f2c3f66d59
@ -36,10 +36,13 @@ def bgmv_expand(inputs: torch.Tensor,
|
||||
if outputs.shape[0] == 1 and output_tensor.shape[0] != 1:
|
||||
limit = 1
|
||||
|
||||
# LoRA adapter and model may add different amounts of padding to output
|
||||
common_len = min(outputs.shape[1], output_tensor.shape[1])
|
||||
|
||||
if add_inputs:
|
||||
output_tensor[:, :outputs.shape[1]] += outputs[:limit, :]
|
||||
output_tensor[:, :common_len] += outputs[:limit, :common_len]
|
||||
else:
|
||||
output_tensor[:, :outputs.shape[1]] = outputs[:limit, :]
|
||||
output_tensor[:, :common_len] = outputs[:limit, :common_len]
|
||||
|
||||
|
||||
def sgmv_shrink(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user