[V1][VLM] Fix edge case bug for InternVL2 (#11165)

Signed-off-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
Roger Wang 2024-12-13 03:09:30 -08:00 committed by GitHub
parent eeec9e3390
commit 969da7d70b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -669,8 +669,11 @@ class InternVLChatModel(nn.Module, SupportsMultiModal, SupportsPP):
image_embeds = self.extract_feature(image_input["data"])
patches_per_image = image_input["patches_per_image"]
# Only one image in the current batch
if len(patches_per_image) == 1:
image_embeds = image_embeds.unsqueeze(0)
image_embeds = image_embeds.view(
-1, self.config.text_config.hidden_size).unsqueeze(0)
return image_embeds
# NOTE: Image embeddings are split into separate tensors for each image