mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 11:17:07 +08:00
[Core][MM] Use non-blocking CPU-GPU copy of multimodal data (#28141)
Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com>
This commit is contained in:
parent
43ecd0a900
commit
80679f108f
@ -444,7 +444,9 @@ def group_mm_kwargs_by_modality(
|
||||
|
||||
if device is not None:
|
||||
mm_kwargs_group = json_map_leaves(
|
||||
lambda x: x.to(device=device) if isinstance(x, torch.Tensor) else x,
|
||||
lambda x: x.to(device=device, non_blocking=True)
|
||||
if isinstance(x, torch.Tensor)
|
||||
else x,
|
||||
mm_kwargs_group,
|
||||
)
|
||||
else:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user