mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 09:55:57 +08:00
[Core] Asynchronous h2d in merge_multimodal_embeddings via pinned memory. (#23686)
Signed-off-by: Chenheli Hua <huachenheli@outlook.com> Co-authored-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
parent
786835807b
commit
c905684cfe
@ -508,7 +508,9 @@ def merge_multimodal_embeddings(
|
|||||||
"""
|
"""
|
||||||
if isinstance(placeholder_token_id, list):
|
if isinstance(placeholder_token_id, list):
|
||||||
placeholder_token_id = torch.tensor(placeholder_token_id,
|
placeholder_token_id = torch.tensor(placeholder_token_id,
|
||||||
device=input_ids.device)
|
pin_memory=True).to(
|
||||||
|
device=input_ids.device,
|
||||||
|
non_blocking=True)
|
||||||
return _merge_multimodal_embeddings(
|
return _merge_multimodal_embeddings(
|
||||||
inputs_embeds,
|
inputs_embeds,
|
||||||
torch.isin(input_ids, placeholder_token_id),
|
torch.isin(input_ids, placeholder_token_id),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user