From f2c3f66d59f9e38aa94985b54f370219222e7bd1 Mon Sep 17 00:00:00 2001
From: Fred Reiss <frreiss@us.ibm.com>
Date: Sat, 31 May 2025 04:58:17 -0700
Subject: [PATCH] [Bugfix] Fix for issue 17396 (#18773)

Signed-off-by: Fred Reiss <frreiss@us.ibm.com>
---
 vllm/lora/ops/torch_ops/lora_ops.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/vllm/lora/ops/torch_ops/lora_ops.py b/vllm/lora/ops/torch_ops/lora_ops.py
index af79f98415cbc..ab65faceb2c10 100644
--- a/vllm/lora/ops/torch_ops/lora_ops.py
+++ b/vllm/lora/ops/torch_ops/lora_ops.py
@@ -36,10 +36,13 @@ def bgmv_expand(inputs: torch.Tensor,
     if outputs.shape[0] == 1 and output_tensor.shape[0] != 1:
         limit = 1
 
+    # LoRA adapter and model may add different amounts of padding to output
+    common_len = min(outputs.shape[1], output_tensor.shape[1])
+
     if add_inputs:
-        output_tensor[:, :outputs.shape[1]] += outputs[:limit, :]
+        output_tensor[:, :common_len] += outputs[:limit, :common_len]
     else:
-        output_tensor[:, :outputs.shape[1]] = outputs[:limit, :]
+        output_tensor[:, :common_len] = outputs[:limit, :common_len]
 
 
 def sgmv_shrink(