ComfyUI/comfy_api_nodes/nodes_minimax.py
2025-10-29 11:14:56 -07:00

433 lines
15 KiB
Python

from typing import Optional
import torch
from typing_extensions import override
from comfy_api.latest import IO, ComfyExtension
from comfy_api_nodes.apis.minimax_api import (
MinimaxFileRetrieveResponse,
MiniMaxModel,
MinimaxTaskResultResponse,
MinimaxVideoGenerationRequest,
MinimaxVideoGenerationResponse,
SubjectReferenceItem,
)
from comfy_api_nodes.util import (
ApiEndpoint,
download_url_to_video_output,
poll_op,
sync_op,
upload_images_to_comfyapi,
validate_string,
)
I2V_AVERAGE_DURATION = 114
T2V_AVERAGE_DURATION = 234
async def _generate_mm_video(
cls: type[IO.ComfyNode],
*,
prompt_text: str,
seed: int,
model: str,
image: Optional[torch.Tensor] = None, # used for ImageToVideo
subject: Optional[torch.Tensor] = None, # used for SubjectToVideo
average_duration: Optional[int] = None,
) -> IO.NodeOutput:
if image is None:
validate_string(prompt_text, field_name="prompt_text")
image_url = None
if image is not None:
image_url = (await upload_images_to_comfyapi(cls, image, max_images=1))[0]
# TODO: figure out how to deal with subject properly, API returns invalid params when using S2V-01 model
subject_reference = None
if subject is not None:
subject_url = (await upload_images_to_comfyapi(cls, subject, max_images=1))[0]
subject_reference = [SubjectReferenceItem(image=subject_url)]
response = await sync_op(
cls,
ApiEndpoint(path="/proxy/minimax/video_generation", method="POST"),
response_model=MinimaxVideoGenerationResponse,
data=MinimaxVideoGenerationRequest(
model=MiniMaxModel(model),
prompt=prompt_text,
callback_url=None,
first_frame_image=image_url,
subject_reference=subject_reference,
prompt_optimizer=None,
),
)
task_id = response.task_id
if not task_id:
raise Exception(f"MiniMax generation failed: {response.base_resp}")
task_result = await poll_op(
cls,
ApiEndpoint(path="/proxy/minimax/query/video_generation", query_params={"task_id": task_id}),
response_model=MinimaxTaskResultResponse,
status_extractor=lambda x: x.status.value,
estimated_duration=average_duration,
)
file_id = task_result.file_id
if file_id is None:
raise Exception("Request was not successful. Missing file ID.")
file_result = await sync_op(
cls,
ApiEndpoint(path="/proxy/minimax/files/retrieve", query_params={"file_id": int(file_id)}),
response_model=MinimaxFileRetrieveResponse,
)
file_url = file_result.file.download_url
if file_url is None:
raise Exception(f"No video was found in the response. Full response: {file_result.model_dump()}")
if file_result.file.backup_download_url:
try:
return IO.NodeOutput(await download_url_to_video_output(file_url, timeout=10, max_retries=2))
except Exception: # if we have a second URL to retrieve the result, try again using that one
return IO.NodeOutput(
await download_url_to_video_output(file_result.file.backup_download_url, max_retries=3)
)
return IO.NodeOutput(await download_url_to_video_output(file_url))
class MinimaxTextToVideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="MinimaxTextToVideoNode",
display_name="MiniMax Text to Video",
category="api node/video/MiniMax",
description="Generates videos synchronously based on a prompt, and optional parameters.",
inputs=[
IO.String.Input(
"prompt_text",
multiline=True,
default="",
tooltip="Text prompt to guide the video generation",
),
IO.Combo.Input(
"model",
options=["T2V-01", "T2V-01-Director"],
default="T2V-01",
tooltip="Model to use for video generation",
),
IO.Int.Input(
"seed",
default=0,
min=0,
max=0xFFFFFFFFFFFFFFFF,
step=1,
control_after_generate=True,
tooltip="The random seed used for creating the noise.",
optional=True,
),
],
outputs=[IO.Video.Output()],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@classmethod
async def execute(
cls,
prompt_text: str,
model: str = "T2V-01",
seed: int = 0,
) -> IO.NodeOutput:
return await _generate_mm_video(
cls,
prompt_text=prompt_text,
seed=seed,
model=model,
image=None,
subject=None,
average_duration=T2V_AVERAGE_DURATION,
)
class MinimaxImageToVideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="MinimaxImageToVideoNode",
display_name="MiniMax Image to Video",
category="api node/video/MiniMax",
description="Generates videos synchronously based on an image and prompt, and optional parameters.",
inputs=[
IO.Image.Input(
"image",
tooltip="Image to use as first frame of video generation",
),
IO.String.Input(
"prompt_text",
multiline=True,
default="",
tooltip="Text prompt to guide the video generation",
),
IO.Combo.Input(
"model",
options=["I2V-01-Director", "I2V-01", "I2V-01-live"],
default="I2V-01",
tooltip="Model to use for video generation",
),
IO.Int.Input(
"seed",
default=0,
min=0,
max=0xFFFFFFFFFFFFFFFF,
step=1,
control_after_generate=True,
tooltip="The random seed used for creating the noise.",
optional=True,
),
],
outputs=[IO.Video.Output()],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@classmethod
async def execute(
cls,
image: torch.Tensor,
prompt_text: str,
model: str = "I2V-01",
seed: int = 0,
) -> IO.NodeOutput:
return await _generate_mm_video(
cls,
prompt_text=prompt_text,
seed=seed,
model=model,
image=image,
subject=None,
average_duration=I2V_AVERAGE_DURATION,
)
class MinimaxSubjectToVideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="MinimaxSubjectToVideoNode",
display_name="MiniMax Subject to Video",
category="api node/video/MiniMax",
description="Generates videos synchronously based on an image and prompt, and optional parameters.",
inputs=[
IO.Image.Input(
"subject",
tooltip="Image of subject to reference for video generation",
),
IO.String.Input(
"prompt_text",
multiline=True,
default="",
tooltip="Text prompt to guide the video generation",
),
IO.Combo.Input(
"model",
options=["S2V-01"],
default="S2V-01",
tooltip="Model to use for video generation",
),
IO.Int.Input(
"seed",
default=0,
min=0,
max=0xFFFFFFFFFFFFFFFF,
step=1,
control_after_generate=True,
tooltip="The random seed used for creating the noise.",
optional=True,
),
],
outputs=[IO.Video.Output()],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@classmethod
async def execute(
cls,
subject: torch.Tensor,
prompt_text: str,
model: str = "S2V-01",
seed: int = 0,
) -> IO.NodeOutput:
return await _generate_mm_video(
cls,
prompt_text=prompt_text,
seed=seed,
model=model,
image=None,
subject=subject,
average_duration=T2V_AVERAGE_DURATION,
)
class MinimaxHailuoVideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="MinimaxHailuoVideoNode",
display_name="MiniMax Hailuo Video",
category="api node/video/MiniMax",
description="Generates videos from prompt, with optional start frame using the new MiniMax Hailuo-02 model.",
inputs=[
IO.String.Input(
"prompt_text",
multiline=True,
default="",
tooltip="Text prompt to guide the video generation.",
),
IO.Int.Input(
"seed",
default=0,
min=0,
max=0xFFFFFFFFFFFFFFFF,
step=1,
control_after_generate=True,
tooltip="The random seed used for creating the noise.",
optional=True,
),
IO.Image.Input(
"first_frame_image",
tooltip="Optional image to use as the first frame to generate a video.",
optional=True,
),
IO.Boolean.Input(
"prompt_optimizer",
default=True,
tooltip="Optimize prompt to improve generation quality when needed.",
optional=True,
),
IO.Combo.Input(
"duration",
options=[6, 10],
default=6,
tooltip="The length of the output video in seconds.",
optional=True,
),
IO.Combo.Input(
"resolution",
options=["768P", "1080P"],
default="768P",
tooltip="The dimensions of the video display. 1080p is 1920x1080, 768p is 1366x768.",
optional=True,
),
],
outputs=[IO.Video.Output()],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@classmethod
async def execute(
cls,
prompt_text: str,
seed: int = 0,
first_frame_image: Optional[torch.Tensor] = None, # used for ImageToVideo
prompt_optimizer: bool = True,
duration: int = 6,
resolution: str = "768P",
model: str = "MiniMax-Hailuo-02",
) -> IO.NodeOutput:
if first_frame_image is None:
validate_string(prompt_text, field_name="prompt_text")
if model == "MiniMax-Hailuo-02" and resolution.upper() == "1080P" and duration != 6:
raise Exception(
"When model is MiniMax-Hailuo-02 and resolution is 1080P, duration is limited to 6 seconds."
)
# upload image, if passed in
image_url = None
if first_frame_image is not None:
image_url = (await upload_images_to_comfyapi(cls, first_frame_image, max_images=1))[0]
response = await sync_op(
cls,
ApiEndpoint(path="/proxy/minimax/video_generation", method="POST"),
response_model=MinimaxVideoGenerationResponse,
data=MinimaxVideoGenerationRequest(
model=MiniMaxModel(model),
prompt=prompt_text,
callback_url=None,
first_frame_image=image_url,
prompt_optimizer=prompt_optimizer,
duration=duration,
resolution=resolution,
),
)
task_id = response.task_id
if not task_id:
raise Exception(f"MiniMax generation failed: {response.base_resp}")
average_duration = 120 if resolution == "768P" else 240
task_result = await poll_op(
cls,
ApiEndpoint(path="/proxy/minimax/query/video_generation", query_params={"task_id": task_id}),
response_model=MinimaxTaskResultResponse,
status_extractor=lambda x: x.status.value,
estimated_duration=average_duration,
)
file_id = task_result.file_id
if file_id is None:
raise Exception("Request was not successful. Missing file ID.")
file_result = await sync_op(
cls,
ApiEndpoint(path="/proxy/minimax/files/retrieve", query_params={"file_id": int(file_id)}),
response_model=MinimaxFileRetrieveResponse,
)
file_url = file_result.file.download_url
if file_url is None:
raise Exception(f"No video was found in the response. Full response: {file_result.model_dump()}")
if file_result.file.backup_download_url:
try:
return IO.NodeOutput(await download_url_to_video_output(file_url, timeout=10, max_retries=2))
except Exception: # if we have a second URL to retrieve the result, try again using that one
return IO.NodeOutput(
await download_url_to_video_output(file_result.file.backup_download_url, max_retries=3)
)
return IO.NodeOutput(await download_url_to_video_output(file_url))
class MinimaxExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
MinimaxTextToVideoNode,
MinimaxImageToVideoNode,
# MinimaxSubjectToVideoNode,
MinimaxHailuoVideoNode,
]
async def comfy_entrypoint() -> MinimaxExtension:
return MinimaxExtension()