From a4cf2561599448d4a5c3de4d79c73ca37cb8d647 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Tue, 3 Dec 2024 12:10:29 +0800 Subject: [PATCH] [Bugfix] Fix QKVParallelLinearWithShardedLora bias bug (#10844) Signed-off-by: Jee Jee Li --- .buildkite/test-pipeline.yaml | 1 - vllm/lora/fully_sharded_layers.py | 9 +-------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index f5591f1098534..455f02a2062f1 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -481,7 +481,6 @@ steps: - label: LoRA TP Test (Distributed) num_gpus: 4 - soft_fail: true source_file_dependencies: - vllm/lora - tests/lora diff --git a/vllm/lora/fully_sharded_layers.py b/vllm/lora/fully_sharded_layers.py index 5f2d32defe030..e25e453201f01 100644 --- a/vllm/lora/fully_sharded_layers.py +++ b/vllm/lora/fully_sharded_layers.py @@ -77,13 +77,6 @@ class ColumnParallelLinearWithShardedLoRA(ColumnParallelLinearWithLoRA): add_input=True) # now have column partitioned output - if self.bias_stacked is not None: - self.bias_stacked = self.bias_stacked.view( - -1, self.bias_stacked.shape[-1]) - self.bias_stacked = self.bias_stacked[ - self.punica_wrapper.token_lora_indices] - output += self.bias_stacked - output = output.view(*out_orig_shape) return output @@ -222,7 +215,7 @@ class QKVParallelLinearWithShardedLora(QKVParallelLinearWithLora): self.punica_wrapper.add_expand(output, buffer, self.lora_b_stacked, - self.bias_all, + self.bias_stacked, add_input=True) # now have column partitioned output output = output.view(*out_orig_shape)