mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 10:30:37 +08:00
[Bugfix][V1] Only get input embeddings w/ multi-modal models if first PP (#17916)
Signed-off-by: Jin Huang <jinhun@amazon.com> Co-authored-by: Jin Huang <jinhun@amazon.com>
This commit is contained in:
parent
f0d610a8ae
commit
8dd0671bac
@ -1107,7 +1107,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
else:
|
else:
|
||||||
mm_embeds = []
|
mm_embeds = []
|
||||||
|
|
||||||
if self.is_multimodal_model:
|
if self.is_multimodal_model and get_pp_group().is_first_rank:
|
||||||
# NOTE(woosuk): To unify token ids and soft tokens (vision
|
# NOTE(woosuk): To unify token ids and soft tokens (vision
|
||||||
# embeddings), we always use embeddings (rather than token ids)
|
# embeddings), we always use embeddings (rather than token ids)
|
||||||
# as input to the multimodal model, even when the input is text.
|
# as input to the multimodal model, even when the input is text.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user