mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-18 22:57:03 +08:00
[Bugfix] Fix QKVParallelLinearWithShardedLora bias bug (#10844)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
parent
d746268e92
commit
a4cf256159
@ -481,7 +481,6 @@ steps:
|
|||||||
|
|
||||||
- label: LoRA TP Test (Distributed)
|
- label: LoRA TP Test (Distributed)
|
||||||
num_gpus: 4
|
num_gpus: 4
|
||||||
soft_fail: true
|
|
||||||
source_file_dependencies:
|
source_file_dependencies:
|
||||||
- vllm/lora
|
- vllm/lora
|
||||||
- tests/lora
|
- tests/lora
|
||||||
|
|||||||
@ -77,13 +77,6 @@ class ColumnParallelLinearWithShardedLoRA(ColumnParallelLinearWithLoRA):
|
|||||||
add_input=True)
|
add_input=True)
|
||||||
# now have column partitioned output
|
# now have column partitioned output
|
||||||
|
|
||||||
if self.bias_stacked is not None:
|
|
||||||
self.bias_stacked = self.bias_stacked.view(
|
|
||||||
-1, self.bias_stacked.shape[-1])
|
|
||||||
self.bias_stacked = self.bias_stacked[
|
|
||||||
self.punica_wrapper.token_lora_indices]
|
|
||||||
output += self.bias_stacked
|
|
||||||
|
|
||||||
output = output.view(*out_orig_shape)
|
output = output.view(*out_orig_shape)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
@ -222,7 +215,7 @@ class QKVParallelLinearWithShardedLora(QKVParallelLinearWithLora):
|
|||||||
self.punica_wrapper.add_expand(output,
|
self.punica_wrapper.add_expand(output,
|
||||||
buffer,
|
buffer,
|
||||||
self.lora_b_stacked,
|
self.lora_b_stacked,
|
||||||
self.bias_all,
|
self.bias_stacked,
|
||||||
add_input=True)
|
add_input=True)
|
||||||
# now have column partitioned output
|
# now have column partitioned output
|
||||||
output = output.view(*out_orig_shape)
|
output = output.view(*out_orig_shape)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user