[Core] Asynchronous h2d in merge_multimodal_embeddings via pinned memory. (#23686)

Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
Co-authored-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
Chenheli Hua 2025-08-26 20:05:34 -07:00 committed by GitHub
parent 786835807b
commit c905684cfe
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -508,7 +508,9 @@ def merge_multimodal_embeddings(
"""
if isinstance(placeholder_token_id, list):
placeholder_token_id = torch.tensor(placeholder_token_id,
device=input_ids.device)
pin_memory=True).to(
device=input_ids.device,
non_blocking=True)
return _merge_multimodal_embeddings(
inputs_embeds,
torch.isin(input_ids, placeholder_token_id),