diff --git a/vllm/model_executor/models/utils.py b/vllm/model_executor/models/utils.py index 6c27fedc61b17..11e098f1d7bdb 100644 --- a/vllm/model_executor/models/utils.py +++ b/vllm/model_executor/models/utils.py @@ -508,7 +508,9 @@ def merge_multimodal_embeddings( """ if isinstance(placeholder_token_id, list): placeholder_token_id = torch.tensor(placeholder_token_id, - device=input_ids.device) + pin_memory=True).to( + device=input_ids.device, + non_blocking=True) return _merge_multimodal_embeddings( inputs_embeds, torch.isin(input_ids, placeholder_token_id),