mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-11 02:17:05 +08:00
[Misc] Make MM embedding merge interface explicit in model runner (#21147)
Signed-off-by: Roger Wang <hey@rogerw.me> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
parent
1bf65138f6
commit
ba2dfbb0c2
@ -1328,11 +1328,10 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
# embeddings), we always use embeddings (rather than token ids)
|
# embeddings), we always use embeddings (rather than token ids)
|
||||||
# as input to the multimodal model, even when the input is text.
|
# as input to the multimodal model, even when the input is text.
|
||||||
input_ids = self.input_ids[:num_scheduled_tokens]
|
input_ids = self.input_ids[:num_scheduled_tokens]
|
||||||
if mm_embeds:
|
inputs_embeds = self.model.get_input_embeddings(
|
||||||
inputs_embeds = self.model.get_input_embeddings(
|
input_ids=input_ids,
|
||||||
input_ids, mm_embeds)
|
multimodal_embeddings=mm_embeds or None,
|
||||||
else:
|
)
|
||||||
inputs_embeds = self.model.get_input_embeddings(input_ids)
|
|
||||||
# TODO(woosuk): Avoid the copy. Optimize.
|
# TODO(woosuk): Avoid the copy. Optimize.
|
||||||
self.inputs_embeds[:num_scheduled_tokens].copy_(inputs_embeds)
|
self.inputs_embeds[:num_scheduled_tokens].copy_(inputs_embeds)
|
||||||
inputs_embeds = self.inputs_embeds[:num_input_tokens]
|
inputs_embeds = self.inputs_embeds[:num_input_tokens]
|
||||||
|
|||||||
@ -937,11 +937,10 @@ class TPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
# NOTE(woosuk): To unify token ids and soft tokens (vision
|
# NOTE(woosuk): To unify token ids and soft tokens (vision
|
||||||
# embeddings), we always use embeddings (rather than token ids)
|
# embeddings), we always use embeddings (rather than token ids)
|
||||||
# as input to the multimodal model, even when the input is text.
|
# as input to the multimodal model, even when the input is text.
|
||||||
if mm_embeds:
|
inputs_embeds = self.model.get_input_embeddings(
|
||||||
inputs_embeds = self.model.get_input_embeddings(
|
input_ids=input_ids,
|
||||||
input_ids, mm_embeds)
|
multimodal_embeddings=mm_embeds,
|
||||||
else:
|
)
|
||||||
inputs_embeds = self.model.get_input_embeddings(input_ids)
|
|
||||||
return None, inputs_embeds
|
return None, inputs_embeds
|
||||||
else:
|
else:
|
||||||
# For text-only models, we use token ids as input.
|
# For text-only models, we use token ids as input.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user