[Bugfix] Fix for issue 17396 (#18773)

Signed-off-by: Fred Reiss <frreiss@us.ibm.com>
2026-01-08 13:41:54 +08:00 · 2025-05-31 04:58:17 -07:00 · 2025-05-31 04:58:17 -07:00 · f2c3f66d59
commit f2c3f66d59
parent 0f5e0d567e
1 changed files with 5 additions and 2 deletions
--- a/vllm/lora/ops/torch_ops/lora_ops.py
+++ b/vllm/lora/ops/torch_ops/lora_ops.py
@ -36,10 +36,13 @@ def bgmv_expand(inputs: torch.Tensor,
    if outputs.shape[0] == 1 and output_tensor.shape[0] != 1:
        limit = 1

+    # LoRA adapter and model may add different amounts of padding to output
+    common_len = min(outputs.shape[1], output_tensor.shape[1])
+
    if add_inputs:
-        output_tensor[:, :outputs.shape[1]] += outputs[:limit, :]
+        output_tensor[:, :common_len] += outputs[:limit, :common_len]
    else:
-        output_tensor[:, :outputs.shape[1]] = outputs[:limit, :]
+        output_tensor[:, :common_len] = outputs[:limit, :common_len]


 def sgmv_shrink(