mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 03:45:02 +08:00
[Core][MM] Use non-blocking CPU-GPU copy of multimodal data (#28141)
Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com>
This commit is contained in:
parent
43ecd0a900
commit
80679f108f
@ -444,7 +444,9 @@ def group_mm_kwargs_by_modality(
|
|||||||
|
|
||||||
if device is not None:
|
if device is not None:
|
||||||
mm_kwargs_group = json_map_leaves(
|
mm_kwargs_group = json_map_leaves(
|
||||||
lambda x: x.to(device=device) if isinstance(x, torch.Tensor) else x,
|
lambda x: x.to(device=device, non_blocking=True)
|
||||||
|
if isinstance(x, torch.Tensor)
|
||||||
|
else x,
|
||||||
mm_kwargs_group,
|
mm_kwargs_group,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user