From f2c3f66d59f9e38aa94985b54f370219222e7bd1 Mon Sep 17 00:00:00 2001 From: Fred Reiss Date: Sat, 31 May 2025 04:58:17 -0700 Subject: [PATCH] [Bugfix] Fix for issue 17396 (#18773) Signed-off-by: Fred Reiss --- vllm/lora/ops/torch_ops/lora_ops.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vllm/lora/ops/torch_ops/lora_ops.py b/vllm/lora/ops/torch_ops/lora_ops.py index af79f98415cbc..ab65faceb2c10 100644 --- a/vllm/lora/ops/torch_ops/lora_ops.py +++ b/vllm/lora/ops/torch_ops/lora_ops.py @@ -36,10 +36,13 @@ def bgmv_expand(inputs: torch.Tensor, if outputs.shape[0] == 1 and output_tensor.shape[0] != 1: limit = 1 + # LoRA adapter and model may add different amounts of padding to output + common_len = min(outputs.shape[1], output_tensor.shape[1]) + if add_inputs: - output_tensor[:, :outputs.shape[1]] += outputs[:limit, :] + output_tensor[:, :common_len] += outputs[:limit, :common_len] else: - output_tensor[:, :outputs.shape[1]] = outputs[:limit, :] + output_tensor[:, :common_len] = outputs[:limit, :common_len] def sgmv_shrink(