fix(gemini): use first 10 images as fileData (URLs) and remaining images as inline base64 (#10918)

This commit is contained in:
Alexander Piskun 2025-11-26 20:38:30 +02:00 committed by GitHub
parent 1105e0d139
commit 8908ee2628
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 43 additions and 18 deletions

View File

@ -58,8 +58,14 @@ class GeminiInlineData(BaseModel):
mimeType: GeminiMimeType | None = Field(None) mimeType: GeminiMimeType | None = Field(None)
class GeminiFileData(BaseModel):
fileUri: str | None = Field(None)
mimeType: GeminiMimeType | None = Field(None)
class GeminiPart(BaseModel): class GeminiPart(BaseModel):
inlineData: GeminiInlineData | None = Field(None) inlineData: GeminiInlineData | None = Field(None)
fileData: GeminiFileData | None = Field(None)
text: str | None = Field(None) text: str | None = Field(None)

View File

@ -20,6 +20,7 @@ from comfy_api.latest import IO, ComfyExtension, Input
from comfy_api.util import VideoCodec, VideoContainer from comfy_api.util import VideoCodec, VideoContainer
from comfy_api_nodes.apis.gemini_api import ( from comfy_api_nodes.apis.gemini_api import (
GeminiContent, GeminiContent,
GeminiFileData,
GeminiGenerateContentRequest, GeminiGenerateContentRequest,
GeminiGenerateContentResponse, GeminiGenerateContentResponse,
GeminiImageConfig, GeminiImageConfig,
@ -38,6 +39,7 @@ from comfy_api_nodes.util import (
get_number_of_images, get_number_of_images,
sync_op, sync_op,
tensor_to_base64_string, tensor_to_base64_string,
upload_images_to_comfyapi,
validate_string, validate_string,
video_to_base64_string, video_to_base64_string,
) )
@ -68,24 +70,43 @@ class GeminiImageModel(str, Enum):
gemini_2_5_flash_image = "gemini-2.5-flash-image" gemini_2_5_flash_image = "gemini-2.5-flash-image"
def create_image_parts(image_input: torch.Tensor) -> list[GeminiPart]: async def create_image_parts(
""" cls: type[IO.ComfyNode],
Convert image tensor input to Gemini API compatible parts. images: torch.Tensor,
image_limit: int = 0,
Args: ) -> list[GeminiPart]:
image_input: Batch of image tensors from ComfyUI.
Returns:
List of GeminiPart objects containing the encoded images.
"""
image_parts: list[GeminiPart] = [] image_parts: list[GeminiPart] = []
for image_index in range(image_input.shape[0]): if image_limit < 0:
image_as_b64 = tensor_to_base64_string(image_input[image_index].unsqueeze(0)) raise ValueError("image_limit must be greater than or equal to 0 when creating Gemini image parts.")
total_images = get_number_of_images(images)
if total_images <= 0:
raise ValueError("No images provided to create_image_parts; at least one image is required.")
# If image_limit == 0 --> use all images; otherwise clamp to image_limit.
effective_max = total_images if image_limit == 0 else min(total_images, image_limit)
# Number of images we'll send as URLs (fileData)
num_url_images = min(effective_max, 10) # Vertex API max number of image links
reference_images_urls = await upload_images_to_comfyapi(
cls,
images,
max_images=num_url_images,
)
for reference_image_url in reference_images_urls:
image_parts.append(
GeminiPart(
fileData=GeminiFileData(
mimeType=GeminiMimeType.image_png,
fileUri=reference_image_url,
)
)
)
for idx in range(num_url_images, effective_max):
image_parts.append( image_parts.append(
GeminiPart( GeminiPart(
inlineData=GeminiInlineData( inlineData=GeminiInlineData(
mimeType=GeminiMimeType.image_png, mimeType=GeminiMimeType.image_png,
data=image_as_b64, data=tensor_to_base64_string(images[idx]),
) )
) )
) )
@ -338,8 +359,7 @@ class GeminiNode(IO.ComfyNode):
# Add other modal parts # Add other modal parts
if images is not None: if images is not None:
image_parts = create_image_parts(images) parts.extend(await create_image_parts(cls, images))
parts.extend(image_parts)
if audio is not None: if audio is not None:
parts.extend(cls.create_audio_parts(audio)) parts.extend(cls.create_audio_parts(audio))
if video is not None: if video is not None:
@ -562,8 +582,7 @@ class GeminiImage(IO.ComfyNode):
image_config = GeminiImageConfig(aspectRatio=aspect_ratio) image_config = GeminiImageConfig(aspectRatio=aspect_ratio)
if images is not None: if images is not None:
image_parts = create_image_parts(images) parts.extend(await create_image_parts(cls, images))
parts.extend(image_parts)
if files is not None: if files is not None:
parts.extend(files) parts.extend(files)
@ -702,7 +721,7 @@ class GeminiImage2(IO.ComfyNode):
if images is not None: if images is not None:
if get_number_of_images(images) > 14: if get_number_of_images(images) > 14:
raise ValueError("The current maximum number of supported images is 14.") raise ValueError("The current maximum number of supported images is 14.")
parts.extend(create_image_parts(images)) parts.extend(await create_image_parts(cls, images))
if files is not None: if files is not None:
parts.extend(files) parts.extend(files)