[Bugfix] Fix QKVParallelLinearWithShardedLora bias bug (#10844)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jee Jee Li 2024-12-03 12:10:29 +08:00 committed by GitHub
parent d746268e92
commit a4cf256159
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 1 additions and 9 deletions

View File

@ -481,7 +481,6 @@ steps:
- label: LoRA TP Test (Distributed)
num_gpus: 4
soft_fail: true
source_file_dependencies:
- vllm/lora
- tests/lora

View File

@ -77,13 +77,6 @@ class ColumnParallelLinearWithShardedLoRA(ColumnParallelLinearWithLoRA):
add_input=True)
# now have column partitioned output
if self.bias_stacked is not None:
self.bias_stacked = self.bias_stacked.view(
-1, self.bias_stacked.shape[-1])
self.bias_stacked = self.bias_stacked[
self.punica_wrapper.token_lora_indices]
output += self.bias_stacked
output = output.view(*out_orig_shape)
return output
@ -222,7 +215,7 @@ class QKVParallelLinearWithShardedLora(QKVParallelLinearWithLora):
self.punica_wrapper.add_expand(output,
buffer,
self.lora_b_stacked,
self.bias_all,
self.bias_stacked,
add_input=True)
# now have column partitioned output
output = output.view(*out_orig_shape)