mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 17:45:19 +08:00
[Core] Asynchronous h2d in merge_multimodal_embeddings via pinned memory. (#23686)
Signed-off-by: Chenheli Hua <huachenheli@outlook.com> Co-authored-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
parent
786835807b
commit
c905684cfe
@ -508,7 +508,9 @@ def merge_multimodal_embeddings(
|
||||
"""
|
||||
if isinstance(placeholder_token_id, list):
|
||||
placeholder_token_id = torch.tensor(placeholder_token_id,
|
||||
device=input_ids.device)
|
||||
pin_memory=True).to(
|
||||
device=input_ids.device,
|
||||
non_blocking=True)
|
||||
return _merge_multimodal_embeddings(
|
||||
inputs_embeds,
|
||||
torch.isin(input_ids, placeholder_token_id),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user