mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-03 20:22:18 +08:00
[Bugfix] LoRA : Fix the order in which the kernels process LoRAs (#16040)
Signed-off-by: Varun Sundar Rabindranath <varun@neuralmagic.com> Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
This commit is contained in:
parent
242a637aea
commit
3a100b9278
@ -111,7 +111,7 @@ class LoRAKernelMeta:
|
|||||||
|
|
||||||
# active_lora_ids, num_tokens_per_lora
|
# active_lora_ids, num_tokens_per_lora
|
||||||
lora_ids, num_tokens_per_lora = torch.unique(token_lora_mapping,
|
lora_ids, num_tokens_per_lora = torch.unique(token_lora_mapping,
|
||||||
sorted=False,
|
sorted=True,
|
||||||
return_counts=True)
|
return_counts=True)
|
||||||
self.active_lora_ids[:lora_ids.size(0)].copy_(lora_ids,
|
self.active_lora_ids[:lora_ids.size(0)].copy_(lora_ids,
|
||||||
non_blocking=True)
|
non_blocking=True)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user