mirror of
https://git.datalinker.icu/comfyanonymous/ComfyUI
synced 2025-12-08 21:44:33 +08:00
513 lines
18 KiB
Python
513 lines
18 KiB
Python
import base64
|
|
from io import BytesIO
|
|
|
|
from typing_extensions import override
|
|
|
|
from comfy_api.latest import IO, ComfyExtension, Input, InputImpl
|
|
from comfy_api_nodes.apis.veo_api import (
|
|
VeoGenVidPollRequest,
|
|
VeoGenVidPollResponse,
|
|
VeoGenVidRequest,
|
|
VeoGenVidResponse,
|
|
VeoRequestInstance,
|
|
VeoRequestInstanceImage,
|
|
VeoRequestParameters,
|
|
)
|
|
from comfy_api_nodes.util import (
|
|
ApiEndpoint,
|
|
download_url_to_video_output,
|
|
poll_op,
|
|
sync_op,
|
|
tensor_to_base64_string,
|
|
)
|
|
|
|
AVERAGE_DURATION_VIDEO_GEN = 32
|
|
MODELS_MAP = {
|
|
"veo-2.0-generate-001": "veo-2.0-generate-001",
|
|
"veo-3.1-generate": "veo-3.1-generate-preview",
|
|
"veo-3.1-fast-generate": "veo-3.1-fast-generate-preview",
|
|
"veo-3.0-generate-001": "veo-3.0-generate-001",
|
|
"veo-3.0-fast-generate-001": "veo-3.0-fast-generate-001",
|
|
}
|
|
|
|
|
|
class VeoVideoGenerationNode(IO.ComfyNode):
|
|
"""
|
|
Generates videos from text prompts using Google's Veo API.
|
|
|
|
This node can create videos from text descriptions and optional image inputs,
|
|
with control over parameters like aspect ratio, duration, and more.
|
|
"""
|
|
|
|
@classmethod
|
|
def define_schema(cls):
|
|
return IO.Schema(
|
|
node_id="VeoVideoGenerationNode",
|
|
display_name="Google Veo 2 Video Generation",
|
|
category="api node/video/Veo",
|
|
description="Generates videos from text prompts using Google's Veo 2 API",
|
|
inputs=[
|
|
IO.String.Input(
|
|
"prompt",
|
|
multiline=True,
|
|
default="",
|
|
tooltip="Text description of the video",
|
|
),
|
|
IO.Combo.Input(
|
|
"aspect_ratio",
|
|
options=["16:9", "9:16"],
|
|
default="16:9",
|
|
tooltip="Aspect ratio of the output video",
|
|
),
|
|
IO.String.Input(
|
|
"negative_prompt",
|
|
multiline=True,
|
|
default="",
|
|
tooltip="Negative text prompt to guide what to avoid in the video",
|
|
optional=True,
|
|
),
|
|
IO.Int.Input(
|
|
"duration_seconds",
|
|
default=5,
|
|
min=5,
|
|
max=8,
|
|
step=1,
|
|
display_mode=IO.NumberDisplay.number,
|
|
tooltip="Duration of the output video in seconds",
|
|
optional=True,
|
|
),
|
|
IO.Boolean.Input(
|
|
"enhance_prompt",
|
|
default=True,
|
|
tooltip="Whether to enhance the prompt with AI assistance",
|
|
optional=True,
|
|
),
|
|
IO.Combo.Input(
|
|
"person_generation",
|
|
options=["ALLOW", "BLOCK"],
|
|
default="ALLOW",
|
|
tooltip="Whether to allow generating people in the video",
|
|
optional=True,
|
|
),
|
|
IO.Int.Input(
|
|
"seed",
|
|
default=0,
|
|
min=0,
|
|
max=0xFFFFFFFF,
|
|
step=1,
|
|
display_mode=IO.NumberDisplay.number,
|
|
control_after_generate=True,
|
|
tooltip="Seed for video generation (0 for random)",
|
|
optional=True,
|
|
),
|
|
IO.Image.Input(
|
|
"image",
|
|
tooltip="Optional reference image to guide video generation",
|
|
optional=True,
|
|
),
|
|
IO.Combo.Input(
|
|
"model",
|
|
options=["veo-2.0-generate-001"],
|
|
default="veo-2.0-generate-001",
|
|
tooltip="Veo 2 model to use for video generation",
|
|
optional=True,
|
|
),
|
|
],
|
|
outputs=[
|
|
IO.Video.Output(),
|
|
],
|
|
hidden=[
|
|
IO.Hidden.auth_token_comfy_org,
|
|
IO.Hidden.api_key_comfy_org,
|
|
IO.Hidden.unique_id,
|
|
],
|
|
is_api_node=True,
|
|
)
|
|
|
|
@classmethod
|
|
async def execute(
|
|
cls,
|
|
prompt,
|
|
aspect_ratio="16:9",
|
|
negative_prompt="",
|
|
duration_seconds=5,
|
|
enhance_prompt=True,
|
|
person_generation="ALLOW",
|
|
seed=0,
|
|
image=None,
|
|
model="veo-2.0-generate-001",
|
|
generate_audio=False,
|
|
):
|
|
model = MODELS_MAP[model]
|
|
# Prepare the instances for the request
|
|
instances = []
|
|
|
|
instance = {"prompt": prompt}
|
|
|
|
# Add image if provided
|
|
if image is not None:
|
|
image_base64 = tensor_to_base64_string(image)
|
|
if image_base64:
|
|
instance["image"] = {"bytesBase64Encoded": image_base64, "mimeType": "image/png"}
|
|
|
|
instances.append(instance)
|
|
|
|
# Create parameters dictionary
|
|
parameters = {
|
|
"aspectRatio": aspect_ratio,
|
|
"personGeneration": person_generation,
|
|
"durationSeconds": duration_seconds,
|
|
"enhancePrompt": enhance_prompt,
|
|
}
|
|
|
|
# Add optional parameters if provided
|
|
if negative_prompt:
|
|
parameters["negativePrompt"] = negative_prompt
|
|
if seed > 0:
|
|
parameters["seed"] = seed
|
|
# Only add generateAudio for Veo 3 models
|
|
if model.find("veo-2.0") == -1:
|
|
parameters["generateAudio"] = generate_audio
|
|
|
|
initial_response = await sync_op(
|
|
cls,
|
|
ApiEndpoint(path=f"/proxy/veo/{model}/generate", method="POST"),
|
|
response_model=VeoGenVidResponse,
|
|
data=VeoGenVidRequest(
|
|
instances=instances,
|
|
parameters=parameters,
|
|
),
|
|
)
|
|
|
|
def status_extractor(response):
|
|
# Only return "completed" if the operation is done, regardless of success or failure
|
|
# We'll check for errors after polling completes
|
|
return "completed" if response.done else "pending"
|
|
|
|
poll_response = await poll_op(
|
|
cls,
|
|
ApiEndpoint(path=f"/proxy/veo/{model}/poll", method="POST"),
|
|
response_model=VeoGenVidPollResponse,
|
|
status_extractor=status_extractor,
|
|
data=VeoGenVidPollRequest(
|
|
operationName=initial_response.name,
|
|
),
|
|
poll_interval=5.0,
|
|
estimated_duration=AVERAGE_DURATION_VIDEO_GEN,
|
|
)
|
|
|
|
# Now check for errors in the final response
|
|
# Check for error in poll response
|
|
if poll_response.error:
|
|
raise Exception(f"Veo API error: {poll_response.error.message} (code: {poll_response.error.code})")
|
|
|
|
# Check for RAI filtered content
|
|
if (
|
|
hasattr(poll_response.response, "raiMediaFilteredCount")
|
|
and poll_response.response.raiMediaFilteredCount > 0
|
|
):
|
|
|
|
# Extract reason message if available
|
|
if (
|
|
hasattr(poll_response.response, "raiMediaFilteredReasons")
|
|
and poll_response.response.raiMediaFilteredReasons
|
|
):
|
|
reason = poll_response.response.raiMediaFilteredReasons[0]
|
|
error_message = f"Content filtered by Google's Responsible AI practices: {reason} ({poll_response.response.raiMediaFilteredCount} videos filtered.)"
|
|
else:
|
|
error_message = f"Content filtered by Google's Responsible AI practices ({poll_response.response.raiMediaFilteredCount} videos filtered.)"
|
|
|
|
raise Exception(error_message)
|
|
|
|
# Extract video data
|
|
if (
|
|
poll_response.response
|
|
and hasattr(poll_response.response, "videos")
|
|
and poll_response.response.videos
|
|
and len(poll_response.response.videos) > 0
|
|
):
|
|
video = poll_response.response.videos[0]
|
|
|
|
# Check if video is provided as base64 or URL
|
|
if hasattr(video, "bytesBase64Encoded") and video.bytesBase64Encoded:
|
|
return IO.NodeOutput(InputImpl.VideoFromFile(BytesIO(base64.b64decode(video.bytesBase64Encoded))))
|
|
|
|
if hasattr(video, "gcsUri") and video.gcsUri:
|
|
return IO.NodeOutput(await download_url_to_video_output(video.gcsUri))
|
|
|
|
raise Exception("Video returned but no data or URL was provided")
|
|
raise Exception("Video generation completed but no video was returned")
|
|
|
|
|
|
class Veo3VideoGenerationNode(VeoVideoGenerationNode):
|
|
"""
|
|
Generates videos from text prompts using Google's Veo 3 API.
|
|
|
|
Supported models:
|
|
- veo-3.0-generate-001
|
|
- veo-3.0-fast-generate-001
|
|
|
|
This node extends the base Veo node with Veo 3 specific features including
|
|
audio generation and fixed 8-second duration.
|
|
"""
|
|
|
|
@classmethod
|
|
def define_schema(cls):
|
|
return IO.Schema(
|
|
node_id="Veo3VideoGenerationNode",
|
|
display_name="Google Veo 3 Video Generation",
|
|
category="api node/video/Veo",
|
|
description="Generates videos from text prompts using Google's Veo 3 API",
|
|
inputs=[
|
|
IO.String.Input(
|
|
"prompt",
|
|
multiline=True,
|
|
default="",
|
|
tooltip="Text description of the video",
|
|
),
|
|
IO.Combo.Input(
|
|
"aspect_ratio",
|
|
options=["16:9", "9:16"],
|
|
default="16:9",
|
|
tooltip="Aspect ratio of the output video",
|
|
),
|
|
IO.String.Input(
|
|
"negative_prompt",
|
|
multiline=True,
|
|
default="",
|
|
tooltip="Negative text prompt to guide what to avoid in the video",
|
|
optional=True,
|
|
),
|
|
IO.Int.Input(
|
|
"duration_seconds",
|
|
default=8,
|
|
min=8,
|
|
max=8,
|
|
step=1,
|
|
display_mode=IO.NumberDisplay.number,
|
|
tooltip="Duration of the output video in seconds (Veo 3 only supports 8 seconds)",
|
|
optional=True,
|
|
),
|
|
IO.Boolean.Input(
|
|
"enhance_prompt",
|
|
default=True,
|
|
tooltip="Whether to enhance the prompt with AI assistance",
|
|
optional=True,
|
|
),
|
|
IO.Combo.Input(
|
|
"person_generation",
|
|
options=["ALLOW", "BLOCK"],
|
|
default="ALLOW",
|
|
tooltip="Whether to allow generating people in the video",
|
|
optional=True,
|
|
),
|
|
IO.Int.Input(
|
|
"seed",
|
|
default=0,
|
|
min=0,
|
|
max=0xFFFFFFFF,
|
|
step=1,
|
|
display_mode=IO.NumberDisplay.number,
|
|
control_after_generate=True,
|
|
tooltip="Seed for video generation (0 for random)",
|
|
optional=True,
|
|
),
|
|
IO.Image.Input(
|
|
"image",
|
|
tooltip="Optional reference image to guide video generation",
|
|
optional=True,
|
|
),
|
|
IO.Combo.Input(
|
|
"model",
|
|
options=[
|
|
"veo-3.1-generate",
|
|
"veo-3.1-fast-generate",
|
|
"veo-3.0-generate-001",
|
|
"veo-3.0-fast-generate-001",
|
|
],
|
|
default="veo-3.0-generate-001",
|
|
tooltip="Veo 3 model to use for video generation",
|
|
optional=True,
|
|
),
|
|
IO.Boolean.Input(
|
|
"generate_audio",
|
|
default=False,
|
|
tooltip="Generate audio for the video. Supported by all Veo 3 models.",
|
|
optional=True,
|
|
),
|
|
],
|
|
outputs=[
|
|
IO.Video.Output(),
|
|
],
|
|
hidden=[
|
|
IO.Hidden.auth_token_comfy_org,
|
|
IO.Hidden.api_key_comfy_org,
|
|
IO.Hidden.unique_id,
|
|
],
|
|
is_api_node=True,
|
|
)
|
|
|
|
|
|
class Veo3FirstLastFrameNode(IO.ComfyNode):
|
|
|
|
@classmethod
|
|
def define_schema(cls):
|
|
return IO.Schema(
|
|
node_id="Veo3FirstLastFrameNode",
|
|
display_name="Google Veo 3 First-Last-Frame to Video",
|
|
category="api node/video/Veo",
|
|
description="Generate video using prompt and first and last frames.",
|
|
inputs=[
|
|
IO.String.Input(
|
|
"prompt",
|
|
multiline=True,
|
|
default="",
|
|
tooltip="Text description of the video",
|
|
),
|
|
IO.String.Input(
|
|
"negative_prompt",
|
|
multiline=True,
|
|
default="",
|
|
tooltip="Negative text prompt to guide what to avoid in the video",
|
|
),
|
|
IO.Combo.Input("resolution", options=["720p", "1080p"]),
|
|
IO.Combo.Input(
|
|
"aspect_ratio",
|
|
options=["16:9", "9:16"],
|
|
default="16:9",
|
|
tooltip="Aspect ratio of the output video",
|
|
),
|
|
IO.Int.Input(
|
|
"duration",
|
|
default=8,
|
|
min=4,
|
|
max=8,
|
|
step=2,
|
|
display_mode=IO.NumberDisplay.slider,
|
|
tooltip="Duration of the output video in seconds",
|
|
),
|
|
IO.Int.Input(
|
|
"seed",
|
|
default=0,
|
|
min=0,
|
|
max=0xFFFFFFFF,
|
|
step=1,
|
|
display_mode=IO.NumberDisplay.number,
|
|
control_after_generate=True,
|
|
tooltip="Seed for video generation",
|
|
),
|
|
IO.Image.Input("first_frame", tooltip="Start frame"),
|
|
IO.Image.Input("last_frame", tooltip="End frame"),
|
|
IO.Combo.Input(
|
|
"model",
|
|
options=["veo-3.1-generate", "veo-3.1-fast-generate"],
|
|
default="veo-3.1-fast-generate",
|
|
),
|
|
IO.Boolean.Input(
|
|
"generate_audio",
|
|
default=True,
|
|
tooltip="Generate audio for the video.",
|
|
),
|
|
],
|
|
outputs=[
|
|
IO.Video.Output(),
|
|
],
|
|
hidden=[
|
|
IO.Hidden.auth_token_comfy_org,
|
|
IO.Hidden.api_key_comfy_org,
|
|
IO.Hidden.unique_id,
|
|
],
|
|
is_api_node=True,
|
|
)
|
|
|
|
@classmethod
|
|
async def execute(
|
|
cls,
|
|
prompt: str,
|
|
negative_prompt: str,
|
|
resolution: str,
|
|
aspect_ratio: str,
|
|
duration: int,
|
|
seed: int,
|
|
first_frame: Input.Image,
|
|
last_frame: Input.Image,
|
|
model: str,
|
|
generate_audio: bool,
|
|
):
|
|
model = MODELS_MAP[model]
|
|
initial_response = await sync_op(
|
|
cls,
|
|
ApiEndpoint(path=f"/proxy/veo/{model}/generate", method="POST"),
|
|
response_model=VeoGenVidResponse,
|
|
data=VeoGenVidRequest(
|
|
instances=[
|
|
VeoRequestInstance(
|
|
prompt=prompt,
|
|
image=VeoRequestInstanceImage(
|
|
bytesBase64Encoded=tensor_to_base64_string(first_frame), mimeType="image/png"
|
|
),
|
|
lastFrame=VeoRequestInstanceImage(
|
|
bytesBase64Encoded=tensor_to_base64_string(last_frame), mimeType="image/png"
|
|
),
|
|
),
|
|
],
|
|
parameters=VeoRequestParameters(
|
|
aspectRatio=aspect_ratio,
|
|
personGeneration="ALLOW",
|
|
durationSeconds=duration,
|
|
enhancePrompt=True, # cannot be False for Veo3
|
|
seed=seed,
|
|
generateAudio=generate_audio,
|
|
negativePrompt=negative_prompt,
|
|
resolution=resolution,
|
|
),
|
|
),
|
|
)
|
|
poll_response = await poll_op(
|
|
cls,
|
|
ApiEndpoint(path=f"/proxy/veo/{model}/poll", method="POST"),
|
|
response_model=VeoGenVidPollResponse,
|
|
status_extractor=lambda r: "completed" if r.done else "pending",
|
|
data=VeoGenVidPollRequest(
|
|
operationName=initial_response.name,
|
|
),
|
|
poll_interval=5.0,
|
|
estimated_duration=AVERAGE_DURATION_VIDEO_GEN,
|
|
)
|
|
|
|
if poll_response.error:
|
|
raise Exception(f"Veo API error: {poll_response.error.message} (code: {poll_response.error.code})")
|
|
|
|
response = poll_response.response
|
|
filtered_count = response.raiMediaFilteredCount
|
|
if filtered_count:
|
|
reasons = response.raiMediaFilteredReasons or []
|
|
reason_part = f": {reasons[0]}" if reasons else ""
|
|
raise Exception(
|
|
f"Content blocked by Google's Responsible AI filters{reason_part} "
|
|
f"({filtered_count} video{'s' if filtered_count != 1 else ''} filtered)."
|
|
)
|
|
|
|
if response.videos:
|
|
video = response.videos[0]
|
|
if video.bytesBase64Encoded:
|
|
return IO.NodeOutput(InputImpl.VideoFromFile(BytesIO(base64.b64decode(video.bytesBase64Encoded))))
|
|
if video.gcsUri:
|
|
return IO.NodeOutput(await download_url_to_video_output(video.gcsUri))
|
|
raise Exception("Video returned but no data or URL was provided")
|
|
raise Exception("Video generation completed but no video was returned")
|
|
|
|
|
|
class VeoExtension(ComfyExtension):
|
|
@override
|
|
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
|
|
return [
|
|
VeoVideoGenerationNode,
|
|
Veo3VideoGenerationNode,
|
|
Veo3FirstLastFrameNode,
|
|
]
|
|
|
|
|
|
async def comfy_entrypoint() -> VeoExtension:
|
|
return VeoExtension()
|