Merge remote-tracking branch 'upstream/master' into multitalk

This commit is contained in:
kijai 2025-10-19 14:11:56 +03:00
commit 99dc95960a
57 changed files with 3210 additions and 1774 deletions

View File

@ -197,7 +197,9 @@ comfy install
## Manual Install (Windows, Linux)
Python 3.13 is very well supported. If you have trouble with some custom node dependencies you can try 3.12
Python 3.14 will work if you comment out the `kornia` dependency in the requirements.txt file (breaks the canny node) but it is not recommended.
Python 3.13 is very well supported. If you have trouble with some custom node dependencies on 3.13 you can try 3.12
Git clone this repo.
@ -253,7 +255,7 @@ This is the command to install the Pytorch xpu nightly which might have some per
Nvidia users should install stable pytorch using this command:
```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu129```
```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu130```
This is the command to install pytorch nightly instead which might have performance improvements.

View File

@ -189,15 +189,15 @@ class ChromaRadiance(Chroma):
nerf_pixels = nn.functional.unfold(img_orig, kernel_size=patch_size, stride=patch_size)
nerf_pixels = nerf_pixels.transpose(1, 2) # -> [B, NumPatches, C * P * P]
# Reshape for per-patch processing
nerf_hidden = img_out.reshape(B * num_patches, params.hidden_size)
nerf_pixels = nerf_pixels.reshape(B * num_patches, C, patch_size**2).transpose(1, 2)
if params.nerf_tile_size > 0 and num_patches > params.nerf_tile_size:
# Enable tiling if nerf_tile_size isn't 0 and we actually have more patches than
# the tile size.
img_dct = self.forward_tiled_nerf(img_out, nerf_pixels, B, C, num_patches, patch_size, params)
img_dct = self.forward_tiled_nerf(nerf_hidden, nerf_pixels, B, C, num_patches, patch_size, params)
else:
# Reshape for per-patch processing
nerf_hidden = img_out.reshape(B * num_patches, params.hidden_size)
nerf_pixels = nerf_pixels.reshape(B * num_patches, C, patch_size**2).transpose(1, 2)
# Get DCT-encoded pixel embeddings [pixel-dct]
img_dct = self.nerf_image_embedder(nerf_pixels)
@ -240,17 +240,8 @@ class ChromaRadiance(Chroma):
end = min(i + tile_size, num_patches)
# Slice the current tile from the input tensors
nerf_hidden_tile = nerf_hidden[:, i:end, :]
nerf_pixels_tile = nerf_pixels[:, i:end, :]
# Get the actual number of patches in this tile (can be smaller for the last tile)
num_patches_tile = nerf_hidden_tile.shape[1]
# Reshape the tile for per-patch processing
# [B, NumPatches_tile, D] -> [B * NumPatches_tile, D]
nerf_hidden_tile = nerf_hidden_tile.reshape(batch * num_patches_tile, params.hidden_size)
# [B, NumPatches_tile, C*P*P] -> [B*NumPatches_tile, C, P*P] -> [B*NumPatches_tile, P*P, C]
nerf_pixels_tile = nerf_pixels_tile.reshape(batch * num_patches_tile, channels, patch_size**2).transpose(1, 2)
nerf_hidden_tile = nerf_hidden[i * batch:end * batch]
nerf_pixels_tile = nerf_pixels[i * batch:end * batch]
# get DCT-encoded pixel embeddings [pixel-dct]
img_dct_tile = self.nerf_image_embedder(nerf_pixels_tile)

View File

View File

@ -0,0 +1,120 @@
# Implementation adapted from https://github.com/EdwardDixon/snake under the MIT license.
# LICENSE is in incl_licenses directory.
import torch
from torch import nn, sin, pow
from torch.nn import Parameter
import comfy.model_management
class Snake(nn.Module):
'''
Implementation of a sine-based periodic activation function
Shape:
- Input: (B, C, T)
- Output: (B, C, T), same shape as the input
Parameters:
- alpha - trainable parameter
References:
- This activation function is from this paper by Liu Ziyin, Tilman Hartwig, Masahito Ueda:
https://arxiv.org/abs/2006.08195
Examples:
>>> a1 = snake(256)
>>> x = torch.randn(256)
>>> x = a1(x)
'''
def __init__(self, in_features, alpha=1.0, alpha_trainable=True, alpha_logscale=False):
'''
Initialization.
INPUT:
- in_features: shape of the input
- alpha: trainable parameter
alpha is initialized to 1 by default, higher values = higher-frequency.
alpha will be trained along with the rest of your model.
'''
super(Snake, self).__init__()
self.in_features = in_features
# initialize alpha
self.alpha_logscale = alpha_logscale
if self.alpha_logscale:
self.alpha = Parameter(torch.empty(in_features))
else:
self.alpha = Parameter(torch.empty(in_features))
self.alpha.requires_grad = alpha_trainable
self.no_div_by_zero = 0.000000001
def forward(self, x):
'''
Forward pass of the function.
Applies the function to the input elementwise.
Snake = x + 1/a * sin^2 (xa)
'''
alpha = comfy.model_management.cast_to(self.alpha, dtype=x.dtype, device=x.device).unsqueeze(0).unsqueeze(-1) # line up with x to [B, C, T]
if self.alpha_logscale:
alpha = torch.exp(alpha)
x = x + (1.0 / (alpha + self.no_div_by_zero)) * pow(sin(x * alpha), 2)
return x
class SnakeBeta(nn.Module):
'''
A modified Snake function which uses separate parameters for the magnitude of the periodic components
Shape:
- Input: (B, C, T)
- Output: (B, C, T), same shape as the input
Parameters:
- alpha - trainable parameter that controls frequency
- beta - trainable parameter that controls magnitude
References:
- This activation function is a modified version based on this paper by Liu Ziyin, Tilman Hartwig, Masahito Ueda:
https://arxiv.org/abs/2006.08195
Examples:
>>> a1 = snakebeta(256)
>>> x = torch.randn(256)
>>> x = a1(x)
'''
def __init__(self, in_features, alpha=1.0, alpha_trainable=True, alpha_logscale=False):
'''
Initialization.
INPUT:
- in_features: shape of the input
- alpha - trainable parameter that controls frequency
- beta - trainable parameter that controls magnitude
alpha is initialized to 1 by default, higher values = higher-frequency.
beta is initialized to 1 by default, higher values = higher-magnitude.
alpha will be trained along with the rest of your model.
'''
super(SnakeBeta, self).__init__()
self.in_features = in_features
# initialize alpha
self.alpha_logscale = alpha_logscale
if self.alpha_logscale:
self.alpha = Parameter(torch.empty(in_features))
self.beta = Parameter(torch.empty(in_features))
else:
self.alpha = Parameter(torch.empty(in_features))
self.beta = Parameter(torch.empty(in_features))
self.alpha.requires_grad = alpha_trainable
self.beta.requires_grad = alpha_trainable
self.no_div_by_zero = 0.000000001
def forward(self, x):
'''
Forward pass of the function.
Applies the function to the input elementwise.
SnakeBeta = x + 1/b * sin^2 (xa)
'''
alpha = comfy.model_management.cast_to(self.alpha, dtype=x.dtype, device=x.device).unsqueeze(0).unsqueeze(-1) # line up with x to [B, C, T]
beta = comfy.model_management.cast_to(self.beta, dtype=x.dtype, device=x.device).unsqueeze(0).unsqueeze(-1)
if self.alpha_logscale:
alpha = torch.exp(alpha)
beta = torch.exp(beta)
x = x + (1.0 / (beta + self.no_div_by_zero)) * pow(sin(x * alpha), 2)
return x

View File

@ -0,0 +1,157 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
import comfy.model_management
if 'sinc' in dir(torch):
sinc = torch.sinc
else:
# This code is adopted from adefossez's julius.core.sinc under the MIT License
# https://adefossez.github.io/julius/julius/core.html
# LICENSE is in incl_licenses directory.
def sinc(x: torch.Tensor):
"""
Implementation of sinc, i.e. sin(pi * x) / (pi * x)
__Warning__: Different to julius.sinc, the input is multiplied by `pi`!
"""
return torch.where(x == 0,
torch.tensor(1., device=x.device, dtype=x.dtype),
torch.sin(math.pi * x) / math.pi / x)
# This code is adopted from adefossez's julius.lowpass.LowPassFilters under the MIT License
# https://adefossez.github.io/julius/julius/lowpass.html
# LICENSE is in incl_licenses directory.
def kaiser_sinc_filter1d(cutoff, half_width, kernel_size): # return filter [1,1,kernel_size]
even = (kernel_size % 2 == 0)
half_size = kernel_size // 2
#For kaiser window
delta_f = 4 * half_width
A = 2.285 * (half_size - 1) * math.pi * delta_f + 7.95
if A > 50.:
beta = 0.1102 * (A - 8.7)
elif A >= 21.:
beta = 0.5842 * (A - 21)**0.4 + 0.07886 * (A - 21.)
else:
beta = 0.
window = torch.kaiser_window(kernel_size, beta=beta, periodic=False)
# ratio = 0.5/cutoff -> 2 * cutoff = 1 / ratio
if even:
time = (torch.arange(-half_size, half_size) + 0.5)
else:
time = torch.arange(kernel_size) - half_size
if cutoff == 0:
filter_ = torch.zeros_like(time)
else:
filter_ = 2 * cutoff * window * sinc(2 * cutoff * time)
# Normalize filter to have sum = 1, otherwise we will have a small leakage
# of the constant component in the input signal.
filter_ /= filter_.sum()
filter = filter_.view(1, 1, kernel_size)
return filter
class LowPassFilter1d(nn.Module):
def __init__(self,
cutoff=0.5,
half_width=0.6,
stride: int = 1,
padding: bool = True,
padding_mode: str = 'replicate',
kernel_size: int = 12):
# kernel_size should be even number for stylegan3 setup,
# in this implementation, odd number is also possible.
super().__init__()
if cutoff < -0.:
raise ValueError("Minimum cutoff must be larger than zero.")
if cutoff > 0.5:
raise ValueError("A cutoff above 0.5 does not make sense.")
self.kernel_size = kernel_size
self.even = (kernel_size % 2 == 0)
self.pad_left = kernel_size // 2 - int(self.even)
self.pad_right = kernel_size // 2
self.stride = stride
self.padding = padding
self.padding_mode = padding_mode
filter = kaiser_sinc_filter1d(cutoff, half_width, kernel_size)
self.register_buffer("filter", filter)
#input [B, C, T]
def forward(self, x):
_, C, _ = x.shape
if self.padding:
x = F.pad(x, (self.pad_left, self.pad_right),
mode=self.padding_mode)
out = F.conv1d(x, comfy.model_management.cast_to(self.filter.expand(C, -1, -1), dtype=x.dtype, device=x.device),
stride=self.stride, groups=C)
return out
class UpSample1d(nn.Module):
def __init__(self, ratio=2, kernel_size=None):
super().__init__()
self.ratio = ratio
self.kernel_size = int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size
self.stride = ratio
self.pad = self.kernel_size // ratio - 1
self.pad_left = self.pad * self.stride + (self.kernel_size - self.stride) // 2
self.pad_right = self.pad * self.stride + (self.kernel_size - self.stride + 1) // 2
filter = kaiser_sinc_filter1d(cutoff=0.5 / ratio,
half_width=0.6 / ratio,
kernel_size=self.kernel_size)
self.register_buffer("filter", filter)
# x: [B, C, T]
def forward(self, x):
_, C, _ = x.shape
x = F.pad(x, (self.pad, self.pad), mode='replicate')
x = self.ratio * F.conv_transpose1d(
x, comfy.model_management.cast_to(self.filter.expand(C, -1, -1), dtype=x.dtype, device=x.device), stride=self.stride, groups=C)
x = x[..., self.pad_left:-self.pad_right]
return x
class DownSample1d(nn.Module):
def __init__(self, ratio=2, kernel_size=None):
super().__init__()
self.ratio = ratio
self.kernel_size = int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size
self.lowpass = LowPassFilter1d(cutoff=0.5 / ratio,
half_width=0.6 / ratio,
stride=ratio,
kernel_size=self.kernel_size)
def forward(self, x):
xx = self.lowpass(x)
return xx
class Activation1d(nn.Module):
def __init__(self,
activation,
up_ratio: int = 2,
down_ratio: int = 2,
up_kernel_size: int = 12,
down_kernel_size: int = 12):
super().__init__()
self.up_ratio = up_ratio
self.down_ratio = down_ratio
self.act = activation
self.upsample = UpSample1d(up_ratio, up_kernel_size)
self.downsample = DownSample1d(down_ratio, down_kernel_size)
# x: [B,C,T]
def forward(self, x):
x = self.upsample(x)
x = self.act(x)
x = self.downsample(x)
return x

View File

@ -0,0 +1,156 @@
from typing import Literal
import torch
import torch.nn as nn
from .distributions import DiagonalGaussianDistribution
from .vae import VAE_16k
from .bigvgan import BigVGANVocoder
import logging
try:
import torchaudio
except:
logging.warning("torchaudio missing, MMAudio VAE model will be broken")
def dynamic_range_compression_torch(x, C=1, clip_val=1e-5, *, norm_fn):
return norm_fn(torch.clamp(x, min=clip_val) * C)
def spectral_normalize_torch(magnitudes, norm_fn):
output = dynamic_range_compression_torch(magnitudes, norm_fn=norm_fn)
return output
class MelConverter(nn.Module):
def __init__(
self,
*,
sampling_rate: float,
n_fft: int,
num_mels: int,
hop_size: int,
win_size: int,
fmin: float,
fmax: float,
norm_fn,
):
super().__init__()
self.sampling_rate = sampling_rate
self.n_fft = n_fft
self.num_mels = num_mels
self.hop_size = hop_size
self.win_size = win_size
self.fmin = fmin
self.fmax = fmax
self.norm_fn = norm_fn
# mel = librosa_mel_fn(sr=self.sampling_rate,
# n_fft=self.n_fft,
# n_mels=self.num_mels,
# fmin=self.fmin,
# fmax=self.fmax)
# mel_basis = torch.from_numpy(mel).float()
mel_basis = torch.empty((num_mels, 1 + n_fft // 2))
hann_window = torch.hann_window(self.win_size)
self.register_buffer('mel_basis', mel_basis)
self.register_buffer('hann_window', hann_window)
@property
def device(self):
return self.mel_basis.device
def forward(self, waveform: torch.Tensor, center: bool = False) -> torch.Tensor:
waveform = waveform.clamp(min=-1., max=1.).to(self.device)
waveform = torch.nn.functional.pad(
waveform.unsqueeze(1),
[int((self.n_fft - self.hop_size) / 2),
int((self.n_fft - self.hop_size) / 2)],
mode='reflect')
waveform = waveform.squeeze(1)
spec = torch.stft(waveform,
self.n_fft,
hop_length=self.hop_size,
win_length=self.win_size,
window=self.hann_window,
center=center,
pad_mode='reflect',
normalized=False,
onesided=True,
return_complex=True)
spec = torch.view_as_real(spec)
spec = torch.sqrt(spec.pow(2).sum(-1) + (1e-9))
spec = torch.matmul(self.mel_basis, spec)
spec = spectral_normalize_torch(spec, self.norm_fn)
return spec
class AudioAutoencoder(nn.Module):
def __init__(
self,
*,
# ckpt_path: str,
mode=Literal['16k', '44k'],
need_vae_encoder: bool = True,
):
super().__init__()
assert mode == "16k", "Only 16k mode is supported currently."
self.mel_converter = MelConverter(sampling_rate=16_000,
n_fft=1024,
num_mels=80,
hop_size=256,
win_size=1024,
fmin=0,
fmax=8_000,
norm_fn=torch.log10)
self.vae = VAE_16k().eval()
bigvgan_config = {
"resblock": "1",
"num_mels": 80,
"upsample_rates": [4, 4, 2, 2, 2, 2],
"upsample_kernel_sizes": [8, 8, 4, 4, 4, 4],
"upsample_initial_channel": 1536,
"resblock_kernel_sizes": [3, 7, 11],
"resblock_dilation_sizes": [
[1, 3, 5],
[1, 3, 5],
[1, 3, 5],
],
"activation": "snakebeta",
"snake_logscale": True,
}
self.vocoder = BigVGANVocoder(
bigvgan_config
).eval()
@torch.inference_mode()
def encode_audio(self, x) -> DiagonalGaussianDistribution:
# x: (B * L)
mel = self.mel_converter(x)
dist = self.vae.encode(mel)
return dist
@torch.no_grad()
def decode(self, z):
mel_decoded = self.vae.decode(z)
audio = self.vocoder(mel_decoded)
audio = torchaudio.functional.resample(audio, 16000, 44100)
return audio
@torch.no_grad()
def encode(self, audio):
audio = audio.mean(dim=1)
audio = torchaudio.functional.resample(audio, 44100, 16000)
dist = self.encode_audio(audio)
return dist.mean

View File

@ -0,0 +1,219 @@
# Copyright (c) 2022 NVIDIA CORPORATION.
# Licensed under the MIT license.
# Adapted from https://github.com/jik876/hifi-gan under the MIT license.
# LICENSE is in incl_licenses directory.
import torch
import torch.nn as nn
from types import SimpleNamespace
from . import activations
from .alias_free_torch import Activation1d
import comfy.ops
ops = comfy.ops.disable_weight_init
def get_padding(kernel_size, dilation=1):
return int((kernel_size * dilation - dilation) / 2)
class AMPBlock1(torch.nn.Module):
def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5), activation=None):
super(AMPBlock1, self).__init__()
self.h = h
self.convs1 = nn.ModuleList([
ops.Conv1d(channels,
channels,
kernel_size,
1,
dilation=dilation[0],
padding=get_padding(kernel_size, dilation[0])),
ops.Conv1d(channels,
channels,
kernel_size,
1,
dilation=dilation[1],
padding=get_padding(kernel_size, dilation[1])),
ops.Conv1d(channels,
channels,
kernel_size,
1,
dilation=dilation[2],
padding=get_padding(kernel_size, dilation[2]))
])
self.convs2 = nn.ModuleList([
ops.Conv1d(channels,
channels,
kernel_size,
1,
dilation=1,
padding=get_padding(kernel_size, 1)),
ops.Conv1d(channels,
channels,
kernel_size,
1,
dilation=1,
padding=get_padding(kernel_size, 1)),
ops.Conv1d(channels,
channels,
kernel_size,
1,
dilation=1,
padding=get_padding(kernel_size, 1))
])
self.num_layers = len(self.convs1) + len(self.convs2) # total number of conv layers
if activation == 'snake': # periodic nonlinearity with snake function and anti-aliasing
self.activations = nn.ModuleList([
Activation1d(
activation=activations.Snake(channels, alpha_logscale=h.snake_logscale))
for _ in range(self.num_layers)
])
elif activation == 'snakebeta': # periodic nonlinearity with snakebeta function and anti-aliasing
self.activations = nn.ModuleList([
Activation1d(
activation=activations.SnakeBeta(channels, alpha_logscale=h.snake_logscale))
for _ in range(self.num_layers)
])
else:
raise NotImplementedError(
"activation incorrectly specified. check the config file and look for 'activation'."
)
def forward(self, x):
acts1, acts2 = self.activations[::2], self.activations[1::2]
for c1, c2, a1, a2 in zip(self.convs1, self.convs2, acts1, acts2):
xt = a1(x)
xt = c1(xt)
xt = a2(xt)
xt = c2(xt)
x = xt + x
return x
class AMPBlock2(torch.nn.Module):
def __init__(self, h, channels, kernel_size=3, dilation=(1, 3), activation=None):
super(AMPBlock2, self).__init__()
self.h = h
self.convs = nn.ModuleList([
ops.Conv1d(channels,
channels,
kernel_size,
1,
dilation=dilation[0],
padding=get_padding(kernel_size, dilation[0])),
ops.Conv1d(channels,
channels,
kernel_size,
1,
dilation=dilation[1],
padding=get_padding(kernel_size, dilation[1]))
])
self.num_layers = len(self.convs) # total number of conv layers
if activation == 'snake': # periodic nonlinearity with snake function and anti-aliasing
self.activations = nn.ModuleList([
Activation1d(
activation=activations.Snake(channels, alpha_logscale=h.snake_logscale))
for _ in range(self.num_layers)
])
elif activation == 'snakebeta': # periodic nonlinearity with snakebeta function and anti-aliasing
self.activations = nn.ModuleList([
Activation1d(
activation=activations.SnakeBeta(channels, alpha_logscale=h.snake_logscale))
for _ in range(self.num_layers)
])
else:
raise NotImplementedError(
"activation incorrectly specified. check the config file and look for 'activation'."
)
def forward(self, x):
for c, a in zip(self.convs, self.activations):
xt = a(x)
xt = c(xt)
x = xt + x
return x
class BigVGANVocoder(torch.nn.Module):
# this is our main BigVGAN model. Applies anti-aliased periodic activation for resblocks.
def __init__(self, h):
super().__init__()
if isinstance(h, dict):
h = SimpleNamespace(**h)
self.h = h
self.num_kernels = len(h.resblock_kernel_sizes)
self.num_upsamples = len(h.upsample_rates)
# pre conv
self.conv_pre = ops.Conv1d(h.num_mels, h.upsample_initial_channel, 7, 1, padding=3)
# define which AMPBlock to use. BigVGAN uses AMPBlock1 as default
resblock = AMPBlock1 if h.resblock == '1' else AMPBlock2
# transposed conv-based upsamplers. does not apply anti-aliasing
self.ups = nn.ModuleList()
for i, (u, k) in enumerate(zip(h.upsample_rates, h.upsample_kernel_sizes)):
self.ups.append(
nn.ModuleList([
ops.ConvTranspose1d(h.upsample_initial_channel // (2**i),
h.upsample_initial_channel // (2**(i + 1)),
k,
u,
padding=(k - u) // 2)
]))
# residual blocks using anti-aliased multi-periodicity composition modules (AMP)
self.resblocks = nn.ModuleList()
for i in range(len(self.ups)):
ch = h.upsample_initial_channel // (2**(i + 1))
for j, (k, d) in enumerate(zip(h.resblock_kernel_sizes, h.resblock_dilation_sizes)):
self.resblocks.append(resblock(h, ch, k, d, activation=h.activation))
# post conv
if h.activation == "snake": # periodic nonlinearity with snake function and anti-aliasing
activation_post = activations.Snake(ch, alpha_logscale=h.snake_logscale)
self.activation_post = Activation1d(activation=activation_post)
elif h.activation == "snakebeta": # periodic nonlinearity with snakebeta function and anti-aliasing
activation_post = activations.SnakeBeta(ch, alpha_logscale=h.snake_logscale)
self.activation_post = Activation1d(activation=activation_post)
else:
raise NotImplementedError(
"activation incorrectly specified. check the config file and look for 'activation'."
)
self.conv_post = ops.Conv1d(ch, 1, 7, 1, padding=3)
def forward(self, x):
# pre conv
x = self.conv_pre(x)
for i in range(self.num_upsamples):
# upsampling
for i_up in range(len(self.ups[i])):
x = self.ups[i][i_up](x)
# AMP blocks
xs = None
for j in range(self.num_kernels):
if xs is None:
xs = self.resblocks[i * self.num_kernels + j](x)
else:
xs += self.resblocks[i * self.num_kernels + j](x)
x = xs / self.num_kernels
# post conv
x = self.activation_post(x)
x = self.conv_post(x)
x = torch.tanh(x)
return x

View File

@ -0,0 +1,92 @@
import torch
import numpy as np
class AbstractDistribution:
def sample(self):
raise NotImplementedError()
def mode(self):
raise NotImplementedError()
class DiracDistribution(AbstractDistribution):
def __init__(self, value):
self.value = value
def sample(self):
return self.value
def mode(self):
return self.value
class DiagonalGaussianDistribution(object):
def __init__(self, parameters, deterministic=False):
self.parameters = parameters
self.mean, self.logvar = torch.chunk(parameters, 2, dim=1)
self.logvar = torch.clamp(self.logvar, -30.0, 20.0)
self.deterministic = deterministic
self.std = torch.exp(0.5 * self.logvar)
self.var = torch.exp(self.logvar)
if self.deterministic:
self.var = self.std = torch.zeros_like(self.mean, device=self.parameters.device)
def sample(self):
x = self.mean + self.std * torch.randn(self.mean.shape, device=self.parameters.device)
return x
def kl(self, other=None):
if self.deterministic:
return torch.Tensor([0.])
else:
if other is None:
return 0.5 * torch.sum(torch.pow(self.mean, 2)
+ self.var - 1.0 - self.logvar,
dim=[1, 2, 3])
else:
return 0.5 * torch.sum(
torch.pow(self.mean - other.mean, 2) / other.var
+ self.var / other.var - 1.0 - self.logvar + other.logvar,
dim=[1, 2, 3])
def nll(self, sample, dims=[1,2,3]):
if self.deterministic:
return torch.Tensor([0.])
logtwopi = np.log(2.0 * np.pi)
return 0.5 * torch.sum(
logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var,
dim=dims)
def mode(self):
return self.mean
def normal_kl(mean1, logvar1, mean2, logvar2):
"""
source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12
Compute the KL divergence between two gaussians.
Shapes are automatically broadcasted, so batches can be compared to
scalars, among other use cases.
"""
tensor = None
for obj in (mean1, logvar1, mean2, logvar2):
if isinstance(obj, torch.Tensor):
tensor = obj
break
assert tensor is not None, "at least one argument must be a Tensor"
# Force variances to be Tensors. Broadcasting helps convert scalars to
# Tensors, but it does not work for torch.exp().
logvar1, logvar2 = [
x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor)
for x in (logvar1, logvar2)
]
return 0.5 * (
-1.0
+ logvar2
- logvar1
+ torch.exp(logvar1 - logvar2)
+ ((mean1 - mean2) ** 2) * torch.exp(-logvar2)
)

View File

@ -0,0 +1,358 @@
import logging
from typing import Optional
import torch
import torch.nn as nn
from .vae_modules import (AttnBlock1D, Downsample1D, ResnetBlock1D,
Upsample1D, nonlinearity)
from .distributions import DiagonalGaussianDistribution
import comfy.ops
ops = comfy.ops.disable_weight_init
log = logging.getLogger()
DATA_MEAN_80D = [
-1.6058, -1.3676, -1.2520, -1.2453, -1.2078, -1.2224, -1.2419, -1.2439, -1.2922, -1.2927,
-1.3170, -1.3543, -1.3401, -1.3836, -1.3907, -1.3912, -1.4313, -1.4152, -1.4527, -1.4728,
-1.4568, -1.5101, -1.5051, -1.5172, -1.5623, -1.5373, -1.5746, -1.5687, -1.6032, -1.6131,
-1.6081, -1.6331, -1.6489, -1.6489, -1.6700, -1.6738, -1.6953, -1.6969, -1.7048, -1.7280,
-1.7361, -1.7495, -1.7658, -1.7814, -1.7889, -1.8064, -1.8221, -1.8377, -1.8417, -1.8643,
-1.8857, -1.8929, -1.9173, -1.9379, -1.9531, -1.9673, -1.9824, -2.0042, -2.0215, -2.0436,
-2.0766, -2.1064, -2.1418, -2.1855, -2.2319, -2.2767, -2.3161, -2.3572, -2.3954, -2.4282,
-2.4659, -2.5072, -2.5552, -2.6074, -2.6584, -2.7107, -2.7634, -2.8266, -2.8981, -2.9673
]
DATA_STD_80D = [
1.0291, 1.0411, 1.0043, 0.9820, 0.9677, 0.9543, 0.9450, 0.9392, 0.9343, 0.9297, 0.9276, 0.9263,
0.9242, 0.9254, 0.9232, 0.9281, 0.9263, 0.9315, 0.9274, 0.9247, 0.9277, 0.9199, 0.9188, 0.9194,
0.9160, 0.9161, 0.9146, 0.9161, 0.9100, 0.9095, 0.9145, 0.9076, 0.9066, 0.9095, 0.9032, 0.9043,
0.9038, 0.9011, 0.9019, 0.9010, 0.8984, 0.8983, 0.8986, 0.8961, 0.8962, 0.8978, 0.8962, 0.8973,
0.8993, 0.8976, 0.8995, 0.9016, 0.8982, 0.8972, 0.8974, 0.8949, 0.8940, 0.8947, 0.8936, 0.8939,
0.8951, 0.8956, 0.9017, 0.9167, 0.9436, 0.9690, 1.0003, 1.0225, 1.0381, 1.0491, 1.0545, 1.0604,
1.0761, 1.0929, 1.1089, 1.1196, 1.1176, 1.1156, 1.1117, 1.1070
]
DATA_MEAN_128D = [
-3.3462, -2.6723, -2.4893, -2.3143, -2.2664, -2.3317, -2.1802, -2.4006, -2.2357, -2.4597,
-2.3717, -2.4690, -2.5142, -2.4919, -2.6610, -2.5047, -2.7483, -2.5926, -2.7462, -2.7033,
-2.7386, -2.8112, -2.7502, -2.9594, -2.7473, -3.0035, -2.8891, -2.9922, -2.9856, -3.0157,
-3.1191, -2.9893, -3.1718, -3.0745, -3.1879, -3.2310, -3.1424, -3.2296, -3.2791, -3.2782,
-3.2756, -3.3134, -3.3509, -3.3750, -3.3951, -3.3698, -3.4505, -3.4509, -3.5089, -3.4647,
-3.5536, -3.5788, -3.5867, -3.6036, -3.6400, -3.6747, -3.7072, -3.7279, -3.7283, -3.7795,
-3.8259, -3.8447, -3.8663, -3.9182, -3.9605, -3.9861, -4.0105, -4.0373, -4.0762, -4.1121,
-4.1488, -4.1874, -4.2461, -4.3170, -4.3639, -4.4452, -4.5282, -4.6297, -4.7019, -4.7960,
-4.8700, -4.9507, -5.0303, -5.0866, -5.1634, -5.2342, -5.3242, -5.4053, -5.4927, -5.5712,
-5.6464, -5.7052, -5.7619, -5.8410, -5.9188, -6.0103, -6.0955, -6.1673, -6.2362, -6.3120,
-6.3926, -6.4797, -6.5565, -6.6511, -6.8130, -6.9961, -7.1275, -7.2457, -7.3576, -7.4663,
-7.6136, -7.7469, -7.8815, -8.0132, -8.1515, -8.3071, -8.4722, -8.7418, -9.3975, -9.6628,
-9.7671, -9.8863, -9.9992, -10.0860, -10.1709, -10.5418, -11.2795, -11.3861
]
DATA_STD_128D = [
2.3804, 2.4368, 2.3772, 2.3145, 2.2803, 2.2510, 2.2316, 2.2083, 2.1996, 2.1835, 2.1769, 2.1659,
2.1631, 2.1618, 2.1540, 2.1606, 2.1571, 2.1567, 2.1612, 2.1579, 2.1679, 2.1683, 2.1634, 2.1557,
2.1668, 2.1518, 2.1415, 2.1449, 2.1406, 2.1350, 2.1313, 2.1415, 2.1281, 2.1352, 2.1219, 2.1182,
2.1327, 2.1195, 2.1137, 2.1080, 2.1179, 2.1036, 2.1087, 2.1036, 2.1015, 2.1068, 2.0975, 2.0991,
2.0902, 2.1015, 2.0857, 2.0920, 2.0893, 2.0897, 2.0910, 2.0881, 2.0925, 2.0873, 2.0960, 2.0900,
2.0957, 2.0958, 2.0978, 2.0936, 2.0886, 2.0905, 2.0845, 2.0855, 2.0796, 2.0840, 2.0813, 2.0817,
2.0838, 2.0840, 2.0917, 2.1061, 2.1431, 2.1976, 2.2482, 2.3055, 2.3700, 2.4088, 2.4372, 2.4609,
2.4731, 2.4847, 2.5072, 2.5451, 2.5772, 2.6147, 2.6529, 2.6596, 2.6645, 2.6726, 2.6803, 2.6812,
2.6899, 2.6916, 2.6931, 2.6998, 2.7062, 2.7262, 2.7222, 2.7158, 2.7041, 2.7485, 2.7491, 2.7451,
2.7485, 2.7233, 2.7297, 2.7233, 2.7145, 2.6958, 2.6788, 2.6439, 2.6007, 2.4786, 2.2469, 2.1877,
2.1392, 2.0717, 2.0107, 1.9676, 1.9140, 1.7102, 0.9101, 0.7164
]
class VAE(nn.Module):
def __init__(
self,
*,
data_dim: int,
embed_dim: int,
hidden_dim: int,
):
super().__init__()
if data_dim == 80:
self.data_mean = nn.Buffer(torch.tensor(DATA_MEAN_80D, dtype=torch.float32))
self.data_std = nn.Buffer(torch.tensor(DATA_STD_80D, dtype=torch.float32))
elif data_dim == 128:
self.data_mean = nn.Buffer(torch.tensor(DATA_MEAN_128D, dtype=torch.float32))
self.data_std = nn.Buffer(torch.tensor(DATA_STD_128D, dtype=torch.float32))
self.data_mean = self.data_mean.view(1, -1, 1)
self.data_std = self.data_std.view(1, -1, 1)
self.encoder = Encoder1D(
dim=hidden_dim,
ch_mult=(1, 2, 4),
num_res_blocks=2,
attn_layers=[3],
down_layers=[0],
in_dim=data_dim,
embed_dim=embed_dim,
)
self.decoder = Decoder1D(
dim=hidden_dim,
ch_mult=(1, 2, 4),
num_res_blocks=2,
attn_layers=[3],
down_layers=[0],
in_dim=data_dim,
out_dim=data_dim,
embed_dim=embed_dim,
)
self.embed_dim = embed_dim
# self.quant_conv = nn.Conv1d(2 * embed_dim, 2 * embed_dim, 1)
# self.post_quant_conv = nn.Conv1d(embed_dim, embed_dim, 1)
self.initialize_weights()
def initialize_weights(self):
pass
def encode(self, x: torch.Tensor, normalize: bool = True) -> DiagonalGaussianDistribution:
if normalize:
x = self.normalize(x)
moments = self.encoder(x)
posterior = DiagonalGaussianDistribution(moments)
return posterior
def decode(self, z: torch.Tensor, unnormalize: bool = True) -> torch.Tensor:
dec = self.decoder(z)
if unnormalize:
dec = self.unnormalize(dec)
return dec
def normalize(self, x: torch.Tensor) -> torch.Tensor:
return (x - comfy.model_management.cast_to(self.data_mean, dtype=x.dtype, device=x.device)) / comfy.model_management.cast_to(self.data_std, dtype=x.dtype, device=x.device)
def unnormalize(self, x: torch.Tensor) -> torch.Tensor:
return x * comfy.model_management.cast_to(self.data_std, dtype=x.dtype, device=x.device) + comfy.model_management.cast_to(self.data_mean, dtype=x.dtype, device=x.device)
def forward(
self,
x: torch.Tensor,
sample_posterior: bool = True,
rng: Optional[torch.Generator] = None,
normalize: bool = True,
unnormalize: bool = True,
) -> tuple[torch.Tensor, DiagonalGaussianDistribution]:
posterior = self.encode(x, normalize=normalize)
if sample_posterior:
z = posterior.sample(rng)
else:
z = posterior.mode()
dec = self.decode(z, unnormalize=unnormalize)
return dec, posterior
def load_weights(self, src_dict) -> None:
self.load_state_dict(src_dict, strict=True)
@property
def device(self) -> torch.device:
return next(self.parameters()).device
def get_last_layer(self):
return self.decoder.conv_out.weight
def remove_weight_norm(self):
return self
class Encoder1D(nn.Module):
def __init__(self,
*,
dim: int,
ch_mult: tuple[int] = (1, 2, 4, 8),
num_res_blocks: int,
attn_layers: list[int] = [],
down_layers: list[int] = [],
resamp_with_conv: bool = True,
in_dim: int,
embed_dim: int,
double_z: bool = True,
kernel_size: int = 3,
clip_act: float = 256.0):
super().__init__()
self.dim = dim
self.num_layers = len(ch_mult)
self.num_res_blocks = num_res_blocks
self.in_channels = in_dim
self.clip_act = clip_act
self.down_layers = down_layers
self.attn_layers = attn_layers
self.conv_in = ops.Conv1d(in_dim, self.dim, kernel_size=kernel_size, padding=kernel_size // 2, bias=False)
in_ch_mult = (1, ) + tuple(ch_mult)
self.in_ch_mult = in_ch_mult
# downsampling
self.down = nn.ModuleList()
for i_level in range(self.num_layers):
block = nn.ModuleList()
attn = nn.ModuleList()
block_in = dim * in_ch_mult[i_level]
block_out = dim * ch_mult[i_level]
for i_block in range(self.num_res_blocks):
block.append(
ResnetBlock1D(in_dim=block_in,
out_dim=block_out,
kernel_size=kernel_size,
use_norm=True))
block_in = block_out
if i_level in attn_layers:
attn.append(AttnBlock1D(block_in))
down = nn.Module()
down.block = block
down.attn = attn
if i_level in down_layers:
down.downsample = Downsample1D(block_in, resamp_with_conv)
self.down.append(down)
# middle
self.mid = nn.Module()
self.mid.block_1 = ResnetBlock1D(in_dim=block_in,
out_dim=block_in,
kernel_size=kernel_size,
use_norm=True)
self.mid.attn_1 = AttnBlock1D(block_in)
self.mid.block_2 = ResnetBlock1D(in_dim=block_in,
out_dim=block_in,
kernel_size=kernel_size,
use_norm=True)
# end
self.conv_out = ops.Conv1d(block_in,
2 * embed_dim if double_z else embed_dim,
kernel_size=kernel_size, padding=kernel_size // 2, bias=False)
self.learnable_gain = nn.Parameter(torch.zeros([]))
def forward(self, x):
# downsampling
h = self.conv_in(x)
for i_level in range(self.num_layers):
for i_block in range(self.num_res_blocks):
h = self.down[i_level].block[i_block](h)
if len(self.down[i_level].attn) > 0:
h = self.down[i_level].attn[i_block](h)
h = h.clamp(-self.clip_act, self.clip_act)
if i_level in self.down_layers:
h = self.down[i_level].downsample(h)
# middle
h = self.mid.block_1(h)
h = self.mid.attn_1(h)
h = self.mid.block_2(h)
h = h.clamp(-self.clip_act, self.clip_act)
# end
h = nonlinearity(h)
h = self.conv_out(h) * (self.learnable_gain + 1)
return h
class Decoder1D(nn.Module):
def __init__(self,
*,
dim: int,
out_dim: int,
ch_mult: tuple[int] = (1, 2, 4, 8),
num_res_blocks: int,
attn_layers: list[int] = [],
down_layers: list[int] = [],
kernel_size: int = 3,
resamp_with_conv: bool = True,
in_dim: int,
embed_dim: int,
clip_act: float = 256.0):
super().__init__()
self.ch = dim
self.num_layers = len(ch_mult)
self.num_res_blocks = num_res_blocks
self.in_channels = in_dim
self.clip_act = clip_act
self.down_layers = [i + 1 for i in down_layers] # each downlayer add one
# compute in_ch_mult, block_in and curr_res at lowest res
block_in = dim * ch_mult[self.num_layers - 1]
# z to block_in
self.conv_in = ops.Conv1d(embed_dim, block_in, kernel_size=kernel_size, padding=kernel_size // 2, bias=False)
# middle
self.mid = nn.Module()
self.mid.block_1 = ResnetBlock1D(in_dim=block_in, out_dim=block_in, use_norm=True)
self.mid.attn_1 = AttnBlock1D(block_in)
self.mid.block_2 = ResnetBlock1D(in_dim=block_in, out_dim=block_in, use_norm=True)
# upsampling
self.up = nn.ModuleList()
for i_level in reversed(range(self.num_layers)):
block = nn.ModuleList()
attn = nn.ModuleList()
block_out = dim * ch_mult[i_level]
for i_block in range(self.num_res_blocks + 1):
block.append(ResnetBlock1D(in_dim=block_in, out_dim=block_out, use_norm=True))
block_in = block_out
if i_level in attn_layers:
attn.append(AttnBlock1D(block_in))
up = nn.Module()
up.block = block
up.attn = attn
if i_level in self.down_layers:
up.upsample = Upsample1D(block_in, resamp_with_conv)
self.up.insert(0, up) # prepend to get consistent order
# end
self.conv_out = ops.Conv1d(block_in, out_dim, kernel_size=kernel_size, padding=kernel_size // 2, bias=False)
self.learnable_gain = nn.Parameter(torch.zeros([]))
def forward(self, z):
# z to block_in
h = self.conv_in(z)
# middle
h = self.mid.block_1(h)
h = self.mid.attn_1(h)
h = self.mid.block_2(h)
h = h.clamp(-self.clip_act, self.clip_act)
# upsampling
for i_level in reversed(range(self.num_layers)):
for i_block in range(self.num_res_blocks + 1):
h = self.up[i_level].block[i_block](h)
if len(self.up[i_level].attn) > 0:
h = self.up[i_level].attn[i_block](h)
h = h.clamp(-self.clip_act, self.clip_act)
if i_level in self.down_layers:
h = self.up[i_level].upsample(h)
h = nonlinearity(h)
h = self.conv_out(h) * (self.learnable_gain + 1)
return h
def VAE_16k(**kwargs) -> VAE:
return VAE(data_dim=80, embed_dim=20, hidden_dim=384, **kwargs)
def VAE_44k(**kwargs) -> VAE:
return VAE(data_dim=128, embed_dim=40, hidden_dim=512, **kwargs)
def get_my_vae(name: str, **kwargs) -> VAE:
if name == '16k':
return VAE_16k(**kwargs)
if name == '44k':
return VAE_44k(**kwargs)
raise ValueError(f'Unknown model: {name}')

View File

@ -0,0 +1,121 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from comfy.ldm.modules.diffusionmodules.model import vae_attention
import math
import comfy.ops
ops = comfy.ops.disable_weight_init
def nonlinearity(x):
# swish
return torch.nn.functional.silu(x) / 0.596
def mp_sum(a, b, t=0.5):
return a.lerp(b, t) / math.sqrt((1 - t)**2 + t**2)
def normalize(x, dim=None, eps=1e-4):
if dim is None:
dim = list(range(1, x.ndim))
norm = torch.linalg.vector_norm(x, dim=dim, keepdim=True, dtype=torch.float32)
norm = torch.add(eps, norm, alpha=math.sqrt(norm.numel() / x.numel()))
return x / norm.to(x.dtype)
class ResnetBlock1D(nn.Module):
def __init__(self, *, in_dim, out_dim=None, conv_shortcut=False, kernel_size=3, use_norm=True):
super().__init__()
self.in_dim = in_dim
out_dim = in_dim if out_dim is None else out_dim
self.out_dim = out_dim
self.use_conv_shortcut = conv_shortcut
self.use_norm = use_norm
self.conv1 = ops.Conv1d(in_dim, out_dim, kernel_size=kernel_size, padding=kernel_size // 2, bias=False)
self.conv2 = ops.Conv1d(out_dim, out_dim, kernel_size=kernel_size, padding=kernel_size // 2, bias=False)
if self.in_dim != self.out_dim:
if self.use_conv_shortcut:
self.conv_shortcut = ops.Conv1d(in_dim, out_dim, kernel_size=kernel_size, padding=kernel_size // 2, bias=False)
else:
self.nin_shortcut = ops.Conv1d(in_dim, out_dim, kernel_size=1, padding=0, bias=False)
def forward(self, x: torch.Tensor) -> torch.Tensor:
# pixel norm
if self.use_norm:
x = normalize(x, dim=1)
h = x
h = nonlinearity(h)
h = self.conv1(h)
h = nonlinearity(h)
h = self.conv2(h)
if self.in_dim != self.out_dim:
if self.use_conv_shortcut:
x = self.conv_shortcut(x)
else:
x = self.nin_shortcut(x)
return mp_sum(x, h, t=0.3)
class AttnBlock1D(nn.Module):
def __init__(self, in_channels, num_heads=1):
super().__init__()
self.in_channels = in_channels
self.num_heads = num_heads
self.qkv = ops.Conv1d(in_channels, in_channels * 3, kernel_size=1, padding=0, bias=False)
self.proj_out = ops.Conv1d(in_channels, in_channels, kernel_size=1, padding=0, bias=False)
self.optimized_attention = vae_attention()
def forward(self, x):
h = x
y = self.qkv(h)
y = y.reshape(y.shape[0], -1, 3, y.shape[-1])
q, k, v = normalize(y, dim=1).unbind(2)
h = self.optimized_attention(q, k, v)
h = self.proj_out(h)
return mp_sum(x, h, t=0.3)
class Upsample1D(nn.Module):
def __init__(self, in_channels, with_conv):
super().__init__()
self.with_conv = with_conv
if self.with_conv:
self.conv = ops.Conv1d(in_channels, in_channels, kernel_size=3, padding=1, bias=False)
def forward(self, x):
x = F.interpolate(x, scale_factor=2.0, mode='nearest-exact') # support 3D tensor(B,C,T)
if self.with_conv:
x = self.conv(x)
return x
class Downsample1D(nn.Module):
def __init__(self, in_channels, with_conv):
super().__init__()
self.with_conv = with_conv
if self.with_conv:
# no asymmetric padding in torch conv, must do it ourselves
self.conv1 = ops.Conv1d(in_channels, in_channels, kernel_size=1, padding=0, bias=False)
self.conv2 = ops.Conv1d(in_channels, in_channels, kernel_size=1, padding=0, bias=False)
def forward(self, x):
if self.with_conv:
x = self.conv1(x)
x = F.avg_pool1d(x, kernel_size=2, stride=2)
if self.with_conv:
x = self.conv2(x)
return x

View File

@ -657,51 +657,51 @@ class WanVAE(nn.Module):
)
def encode(self, x):
self.clear_cache()
conv_idx = [0]
feat_map = [None] * count_conv3d(self.encoder)
x = patchify(x, patch_size=2)
t = x.shape[2]
iter_ = 1 + (t - 1) // 4
for i in range(iter_):
self._enc_conv_idx = [0]
conv_idx = [0]
if i == 0:
out = self.encoder(
x[:, :, :1, :, :],
feat_cache=self._enc_feat_map,
feat_idx=self._enc_conv_idx,
feat_cache=feat_map,
feat_idx=conv_idx,
)
else:
out_ = self.encoder(
x[:, :, 1 + 4 * (i - 1):1 + 4 * i, :, :],
feat_cache=self._enc_feat_map,
feat_idx=self._enc_conv_idx,
feat_cache=feat_map,
feat_idx=conv_idx,
)
out = torch.cat([out, out_], 2)
mu, log_var = self.conv1(out).chunk(2, dim=1)
self.clear_cache()
return mu
def decode(self, z):
self.clear_cache()
conv_idx = [0]
feat_map = [None] * count_conv3d(self.decoder)
iter_ = z.shape[2]
x = self.conv2(z)
for i in range(iter_):
self._conv_idx = [0]
conv_idx = [0]
if i == 0:
out = self.decoder(
x[:, :, i:i + 1, :, :],
feat_cache=self._feat_map,
feat_idx=self._conv_idx,
feat_cache=feat_map,
feat_idx=conv_idx,
first_chunk=True,
)
else:
out_ = self.decoder(
x[:, :, i:i + 1, :, :],
feat_cache=self._feat_map,
feat_idx=self._conv_idx,
feat_cache=feat_map,
feat_idx=conv_idx,
)
out = torch.cat([out, out_], 2)
out = unpatchify(out, patch_size=2)
self.clear_cache()
return out
def reparameterize(self, mu, log_var):
@ -715,12 +715,3 @@ class WanVAE(nn.Module):
return mu
std = torch.exp(0.5 * log_var.clamp(-30.0, 20.0))
return mu + std * torch.randn_like(std)
def clear_cache(self):
self._conv_num = count_conv3d(self.decoder)
self._conv_idx = [0]
self._feat_map = [None] * self._conv_num
# cache encode
self._enc_conv_num = count_conv3d(self.encoder)
self._enc_conv_idx = [0]
self._enc_feat_map = [None] * self._enc_conv_num

View File

@ -138,6 +138,7 @@ class BaseModel(torch.nn.Module):
else:
operations = model_config.custom_operations
self.diffusion_model = unet_model(**unet_config, device=device, operations=operations)
self.diffusion_model.eval()
if comfy.model_management.force_channels_last():
self.diffusion_model.to(memory_format=torch.channels_last)
logging.debug("using channels last mode for diffusion model")
@ -669,7 +670,6 @@ class Lotus(BaseModel):
class StableCascade_C(BaseModel):
def __init__(self, model_config, model_type=ModelType.STABLE_CASCADE, device=None):
super().__init__(model_config, model_type, device=device, unet_model=StageC)
self.diffusion_model.eval().requires_grad_(False)
def extra_conds(self, **kwargs):
out = {}
@ -698,7 +698,6 @@ class StableCascade_C(BaseModel):
class StableCascade_B(BaseModel):
def __init__(self, model_config, model_type=ModelType.STABLE_CASCADE, device=None):
super().__init__(model_config, model_type, device=device, unet_model=StageB)
self.diffusion_model.eval().requires_grad_(False)
def extra_conds(self, **kwargs):
out = {}

View File

@ -213,7 +213,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
dit_config["nerf_mlp_ratio"] = 4
dit_config["nerf_depth"] = 4
dit_config["nerf_max_freqs"] = 8
dit_config["nerf_tile_size"] = 32
dit_config["nerf_tile_size"] = 512
dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear"
dit_config["nerf_embedder_dtype"] = torch.float32
else:

View File

@ -332,6 +332,7 @@ except:
SUPPORT_FP8_OPS = args.supports_fp8_compute
try:
if is_amd():
torch.backends.cudnn.enabled = False # Seems to improve things a lot on AMD
try:
rocm_version = tuple(map(int, str(torch.version.hip).split(".")[:2]))
except:
@ -344,11 +345,11 @@ try:
if torch_version_numeric >= (2, 7): # works on 2.6 but doesn't actually seem to improve much
if any((a in arch) for a in ["gfx90a", "gfx942", "gfx1100", "gfx1101", "gfx1151"]): # TODO: more arches, TODO: gfx950
ENABLE_PYTORCH_ATTENTION = True
# if torch_version_numeric >= (2, 8):
# if any((a in arch) for a in ["gfx1201"]):
# ENABLE_PYTORCH_ATTENTION = True
if rocm_version >= (7, 0):
if any((a in arch) for a in ["gfx1201"]):
ENABLE_PYTORCH_ATTENTION = True
if torch_version_numeric >= (2, 7) and rocm_version >= (6, 4):
if any((a in arch) for a in ["gfx1200", "gfx1201", "gfx942", "gfx950"]): # TODO: more arches
if any((a in arch) for a in ["gfx1200", "gfx1201", "gfx950"]): # TODO: more arches, "gfx942" gives error on pytorch nightly 2.10 1013 rocm7.0
SUPPORT_FP8_OPS = True
except:
@ -370,6 +371,9 @@ try:
except:
pass
if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast:
torch.backends.cudnn.benchmark = True
try:
if torch_version_numeric >= (2, 5):
torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(True)
@ -925,11 +929,7 @@ def vae_dtype(device=None, allowed_dtypes=[]):
if d == torch.float16 and should_use_fp16(device):
return d
# NOTE: bfloat16 seems to work on AMD for the VAE but is extremely slow in some cases compared to fp32
# slowness still a problem on pytorch nightly 2.9.0.dev20250720+rocm6.4 tested on RDNA3
# also a problem on RDNA4 except fp32 is also slow there.
# This is due to large bf16 convolutions being extremely slow.
if d == torch.bfloat16 and ((not is_amd()) or amd_min_version(device, min_rdna_version=4)) and should_use_bf16(device):
if d == torch.bfloat16 and should_use_bf16(device):
return d
return torch.float32

View File

@ -24,6 +24,11 @@ import comfy.float
import comfy.rmsnorm
import contextlib
def run_every_op():
if torch.compiler.is_compiling():
return
comfy.model_management.throw_exception_if_processing_interrupted()
def scaled_dot_product_attention(q, k, v, *args, **kwargs):
return torch.nn.functional.scaled_dot_product_attention(q, k, v, *args, **kwargs)
@ -50,14 +55,22 @@ try:
except (ModuleNotFoundError, TypeError):
logging.warning("Could not set sdpa backend priority.")
cast_to = comfy.model_management.cast_to #TODO: remove once no more references
NVIDIA_MEMORY_CONV_BUG_WORKAROUND = False
try:
if comfy.model_management.is_nvidia():
if torch.backends.cudnn.version() >= 91002 and comfy.model_management.torch_version_numeric >= (2, 9) and comfy.model_management.torch_version_numeric <= (2, 10):
#TODO: change upper bound version once it's fixed'
NVIDIA_MEMORY_CONV_BUG_WORKAROUND = True
logging.info("working around nvidia conv3d memory bug.")
except:
pass
if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast:
torch.backends.cudnn.benchmark = True
cast_to = comfy.model_management.cast_to #TODO: remove once no more references
def cast_to_input(weight, input, non_blocking=False, copy=True):
return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)
@torch.compiler.disable()
def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None):
if input is not None:
if dtype is None:
@ -109,6 +122,7 @@ class disable_weight_init:
return torch.nn.functional.linear(input, weight, bias)
def forward(self, *args, **kwargs):
run_every_op()
if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
return self.forward_comfy_cast_weights(*args, **kwargs)
else:
@ -123,6 +137,7 @@ class disable_weight_init:
return self._conv_forward(input, weight, bias)
def forward(self, *args, **kwargs):
run_every_op()
if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
return self.forward_comfy_cast_weights(*args, **kwargs)
else:
@ -137,6 +152,7 @@ class disable_weight_init:
return self._conv_forward(input, weight, bias)
def forward(self, *args, **kwargs):
run_every_op()
if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
return self.forward_comfy_cast_weights(*args, **kwargs)
else:
@ -146,11 +162,21 @@ class disable_weight_init:
def reset_parameters(self):
return None
def _conv_forward(self, input, weight, bias, *args, **kwargs):
if NVIDIA_MEMORY_CONV_BUG_WORKAROUND and weight.dtype in (torch.float16, torch.bfloat16):
out = torch.cudnn_convolution(input, weight, self.padding, self.stride, self.dilation, self.groups, benchmark=False, deterministic=False, allow_tf32=True)
if bias is not None:
out += bias.reshape((1, -1) + (1,) * (out.ndim - 2))
return out
else:
return super()._conv_forward(input, weight, bias, *args, **kwargs)
def forward_comfy_cast_weights(self, input):
weight, bias = cast_bias_weight(self, input)
return self._conv_forward(input, weight, bias)
def forward(self, *args, **kwargs):
run_every_op()
if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
return self.forward_comfy_cast_weights(*args, **kwargs)
else:
@ -165,6 +191,7 @@ class disable_weight_init:
return torch.nn.functional.group_norm(input, self.num_groups, weight, bias, self.eps)
def forward(self, *args, **kwargs):
run_every_op()
if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
return self.forward_comfy_cast_weights(*args, **kwargs)
else:
@ -183,6 +210,7 @@ class disable_weight_init:
return torch.nn.functional.layer_norm(input, self.normalized_shape, weight, bias, self.eps)
def forward(self, *args, **kwargs):
run_every_op()
if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
return self.forward_comfy_cast_weights(*args, **kwargs)
else:
@ -202,6 +230,7 @@ class disable_weight_init:
# return torch.nn.functional.rms_norm(input, self.normalized_shape, weight, self.eps)
def forward(self, *args, **kwargs):
run_every_op()
if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
return self.forward_comfy_cast_weights(*args, **kwargs)
else:
@ -223,6 +252,7 @@ class disable_weight_init:
output_padding, self.groups, self.dilation)
def forward(self, *args, **kwargs):
run_every_op()
if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
return self.forward_comfy_cast_weights(*args, **kwargs)
else:
@ -244,6 +274,7 @@ class disable_weight_init:
output_padding, self.groups, self.dilation)
def forward(self, *args, **kwargs):
run_every_op()
if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
return self.forward_comfy_cast_weights(*args, **kwargs)
else:
@ -262,6 +293,7 @@ class disable_weight_init:
return torch.nn.functional.embedding(input, weight, self.padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse).to(dtype=output_dtype)
def forward(self, *args, **kwargs):
run_every_op()
if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
return self.forward_comfy_cast_weights(*args, **kwargs)
else:

View File

@ -150,7 +150,7 @@ def merge_nested_dicts(dict1: dict, dict2: dict, copy_dict1=True):
for key, value in dict2.items():
if isinstance(value, dict):
curr_value = merged_dict.setdefault(key, {})
merged_dict[key] = merge_nested_dicts(value, curr_value)
merged_dict[key] = merge_nested_dicts(curr_value, value)
elif isinstance(value, list):
merged_dict.setdefault(key, []).extend(value)
else:

View File

@ -306,17 +306,10 @@ def _calc_cond_batch(model: BaseModel, conds: list[list[dict]], x_in: torch.Tens
copy_dict1=False)
if patches is not None:
# TODO: replace with merge_nested_dicts function
if "patches" in transformer_options:
cur_patches = transformer_options["patches"].copy()
for p in patches:
if p in cur_patches:
cur_patches[p] = cur_patches[p] + patches[p]
else:
cur_patches[p] = patches[p]
transformer_options["patches"] = cur_patches
else:
transformer_options["patches"] = patches
transformer_options["patches"] = comfy.patcher_extension.merge_nested_dicts(
transformer_options.get("patches", {}),
patches
)
transformer_options["cond_or_uncond"] = cond_or_uncond[:]
transformer_options["uuids"] = uuids[:]

View File

@ -18,6 +18,7 @@ import comfy.ldm.wan.vae2_2
import comfy.ldm.hunyuan3d.vae
import comfy.ldm.ace.vae.music_dcae_pipeline
import comfy.ldm.hunyuan_video.vae
import comfy.ldm.mmaudio.vae.autoencoder
import comfy.pixel_space_convert
import yaml
import math
@ -275,8 +276,13 @@ class VAE:
if 'decoder.up_blocks.0.resnets.0.norm1.weight' in sd.keys(): #diffusers format
sd = diffusers_convert.convert_vae_state_dict(sd)
self.memory_used_encode = lambda shape, dtype: (1767 * shape[2] * shape[3]) * model_management.dtype_size(dtype) #These are for AutoencoderKL and need tweaking (should be lower)
self.memory_used_decode = lambda shape, dtype: (2178 * shape[2] * shape[3] * 64) * model_management.dtype_size(dtype)
if model_management.is_amd():
VAE_KL_MEM_RATIO = 2.73
else:
VAE_KL_MEM_RATIO = 1.0
self.memory_used_encode = lambda shape, dtype: (1767 * shape[2] * shape[3]) * model_management.dtype_size(dtype) * VAE_KL_MEM_RATIO #These are for AutoencoderKL and need tweaking (should be lower)
self.memory_used_decode = lambda shape, dtype: (2178 * shape[2] * shape[3] * 64) * model_management.dtype_size(dtype) * VAE_KL_MEM_RATIO
self.downscale_ratio = 8
self.upscale_ratio = 8
self.latent_channels = 4
@ -291,6 +297,7 @@ class VAE:
self.downscale_index_formula = None
self.upscale_index_formula = None
self.extra_1d_channel = None
self.crop_input = True
if config is None:
if "decoder.mid.block_1.mix_factor" in sd:
@ -542,6 +549,25 @@ class VAE:
self.latent_channels = 3
self.latent_dim = 2
self.output_channels = 3
elif "vocoder.activation_post.downsample.lowpass.filter" in sd: #MMAudio VAE
sample_rate = 16000
if sample_rate == 16000:
mode = '16k'
else:
mode = '44k'
self.first_stage_model = comfy.ldm.mmaudio.vae.autoencoder.AudioAutoencoder(mode=mode)
self.memory_used_encode = lambda shape, dtype: (30 * shape[2]) * model_management.dtype_size(dtype)
self.memory_used_decode = lambda shape, dtype: (90 * shape[2] * 1411.2) * model_management.dtype_size(dtype)
self.latent_channels = 20
self.output_channels = 2
self.upscale_ratio = 512 * (44100 / sample_rate)
self.downscale_ratio = 512 * (44100 / sample_rate)
self.latent_dim = 1
self.process_output = lambda audio: audio
self.process_input = lambda audio: audio
self.working_dtypes = [torch.float32]
self.crop_input = False
else:
logging.warning("WARNING: No VAE weights detected, VAE not initalized.")
self.first_stage_model = None
@ -575,6 +601,9 @@ class VAE:
raise RuntimeError("ERROR: VAE is invalid: None\n\nIf the VAE is from a checkpoint loader node your checkpoint does not contain a valid VAE.")
def vae_encode_crop_pixels(self, pixels):
if not self.crop_input:
return pixels
downscale_ratio = self.spacial_compression_encode()
dims = pixels.shape[1:-1]

View File

@ -39,7 +39,11 @@ if hasattr(torch.serialization, "add_safe_globals"): # TODO: this was added in
pass
ModelCheckpoint.__module__ = "pytorch_lightning.callbacks.model_checkpoint"
from numpy.core.multiarray import scalar
def scalar(*args, **kwargs):
from numpy.core.multiarray import scalar as sc
return sc(*args, **kwargs)
scalar.__module__ = "numpy.core.multiarray"
from numpy import dtype
from numpy.dtypes import Float64DType
from _codecs import encode

View File

@ -114,7 +114,9 @@ if TYPE_CHECKING:
ComfyAPISync: Type[comfy_api.latest.generated.ComfyAPISyncStub.ComfyAPISyncStub]
ComfyAPISync = create_sync_class(ComfyAPI_latest)
comfy_io = io # create the new alias for io
# create new aliases for io and ui
IO = io
UI = ui
__all__ = [
"ComfyAPI",
@ -124,6 +126,7 @@ __all__ = [
"Types",
"ComfyExtension",
"io",
"comfy_io",
"IO",
"ui",
"UI",
]

View File

@ -3,6 +3,7 @@ import aiohttp
import io
import logging
import mimetypes
import os
from typing import Optional, Union
from comfy.utils import common_upscale
from comfy_api.input_impl import VideoFromFile
@ -702,3 +703,16 @@ def image_tensor_pair_to_batch(
"center",
).movedim(1, -1)
return torch.cat((image1, image2), dim=0)
def get_size(path_or_object: Union[str, io.BytesIO]) -> int:
if isinstance(path_or_object, str):
return os.path.getsize(path_or_object)
return len(path_or_object.getvalue())
def validate_container_format_is_mp4(video: VideoInput) -> None:
"""Validates video container format is MP4."""
container_format = video.get_container_format()
if container_format not in ["mp4", "mov,mp4,m4a,3gp,3g2,mj2"]:
raise ValueError(f"Only MP4 container format supported. Got: {container_format}")

View File

@ -782,9 +782,11 @@ class PollingOperation(Generic[T, R]):
poll_endpoint: ApiEndpoint[EmptyRequest, R],
completed_statuses: list[str],
failed_statuses: list[str],
*,
status_extractor: Callable[[R], Optional[str]],
progress_extractor: Callable[[R], Optional[float]] | None = None,
result_url_extractor: Callable[[R], Optional[str]] | None = None,
price_extractor: Callable[[R], Optional[float]] | None = None,
request: Optional[T] = None,
api_base: str | None = None,
auth_token: Optional[str] = None,
@ -815,10 +817,12 @@ class PollingOperation(Generic[T, R]):
self.status_extractor = status_extractor or (lambda x: getattr(x, "status", None))
self.progress_extractor = progress_extractor
self.result_url_extractor = result_url_extractor
self.price_extractor = price_extractor
self.node_id = node_id
self.completed_statuses = completed_statuses
self.failed_statuses = failed_statuses
self.final_response: Optional[R] = None
self.extracted_price: Optional[float] = None
async def execute(self, client: Optional[ApiClient] = None) -> R:
owns_client = client is None
@ -840,6 +844,8 @@ class PollingOperation(Generic[T, R]):
def _display_text_on_node(self, text: str):
if not self.node_id:
return
if self.extracted_price is not None:
text = f"Price: ${self.extracted_price}\n{text}"
PromptServer.instance.send_progress_text(text, self.node_id)
def _display_time_progress_on_node(self, time_completed: int | float):
@ -877,9 +883,7 @@ class PollingOperation(Generic[T, R]):
try:
logging.debug("[DEBUG] Polling attempt #%s", poll_count)
request_dict = (
None if self.request is None else self.request.model_dump(exclude_none=True)
)
request_dict = None if self.request is None else self.request.model_dump(exclude_none=True)
if poll_count == 1:
logging.debug(
@ -912,6 +916,11 @@ class PollingOperation(Generic[T, R]):
if new_progress is not None:
progress.update_absolute(new_progress, total=PROGRESS_BAR_MAX)
if self.price_extractor:
price = self.price_extractor(response_obj)
if price is not None:
self.extracted_price = price
if status == TaskStatus.COMPLETED:
message = "Task completed successfully"
if self.result_url_extractor:

View File

@ -1,19 +1,22 @@
from __future__ import annotations
from typing import List, Optional
from typing import Optional
from comfy_api_nodes.apis import GeminiGenerationConfig, GeminiContent, GeminiSafetySetting, GeminiSystemInstructionContent, GeminiTool, GeminiVideoMetadata
from pydantic import BaseModel
class GeminiImageConfig(BaseModel):
aspectRatio: Optional[str] = None
class GeminiImageGenerationConfig(GeminiGenerationConfig):
responseModalities: Optional[List[str]] = None
responseModalities: Optional[list[str]] = None
imageConfig: Optional[GeminiImageConfig] = None
class GeminiImageGenerateContentRequest(BaseModel):
contents: List[GeminiContent]
contents: list[GeminiContent]
generationConfig: Optional[GeminiImageGenerationConfig] = None
safetySettings: Optional[List[GeminiSafetySetting]] = None
safetySettings: Optional[list[GeminiSafetySetting]] = None
systemInstruction: Optional[GeminiSystemInstructionContent] = None
tools: Optional[List[GeminiTool]] = None
tools: Optional[list[GeminiTool]] = None
videoMetadata: Optional[GeminiVideoMetadata] = None

View File

@ -3,7 +3,7 @@ import io
from inspect import cleandoc
from typing import Union, Optional
from typing_extensions import override
from comfy_api.latest import ComfyExtension, io as comfy_io
from comfy_api.latest import ComfyExtension, IO
from comfy_api_nodes.apis.bfl_api import (
BFLStatus,
BFLFluxExpandImageRequest,
@ -131,7 +131,7 @@ def convert_image_to_base64(image: torch.Tensor):
return base64.b64encode(img_byte_arr.getvalue()).decode()
class FluxProUltraImageNode(comfy_io.ComfyNode):
class FluxProUltraImageNode(IO.ComfyNode):
"""
Generates images using Flux Pro 1.1 Ultra via api based on prompt and resolution.
"""
@ -142,25 +142,25 @@ class FluxProUltraImageNode(comfy_io.ComfyNode):
MAXIMUM_RATIO_STR = "4:1"
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="FluxProUltraImageNode",
display_name="Flux 1.1 [pro] Ultra Image",
category="api node/image/BFL",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the image generation",
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"prompt_upsampling",
default=False,
tooltip="Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation, but results are nondeterministic (same seed will not produce exactly the same result).",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -168,21 +168,21 @@ class FluxProUltraImageNode(comfy_io.ComfyNode):
control_after_generate=True,
tooltip="The random seed used for creating the noise.",
),
comfy_io.String.Input(
IO.String.Input(
"aspect_ratio",
default="16:9",
tooltip="Aspect ratio of image; must be between 1:4 and 4:1.",
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"raw",
default=False,
tooltip="When True, generate less processed, more natural-looking images.",
),
comfy_io.Image.Input(
IO.Image.Input(
"image_prompt",
optional=True,
),
comfy_io.Float.Input(
IO.Float.Input(
"image_prompt_strength",
default=0.1,
min=0.0,
@ -192,11 +192,11 @@ class FluxProUltraImageNode(comfy_io.ComfyNode):
optional=True,
),
],
outputs=[comfy_io.Image.Output()],
outputs=[IO.Image.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -225,7 +225,7 @@ class FluxProUltraImageNode(comfy_io.ComfyNode):
seed=0,
image_prompt=None,
image_prompt_strength=0.1,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
if image_prompt is None:
validate_string(prompt, strip_whitespace=False)
operation = SynchronousOperation(
@ -262,10 +262,10 @@ class FluxProUltraImageNode(comfy_io.ComfyNode):
},
)
output_image = await handle_bfl_synchronous_operation(operation, node_id=cls.hidden.unique_id)
return comfy_io.NodeOutput(output_image)
return IO.NodeOutput(output_image)
class FluxKontextProImageNode(comfy_io.ComfyNode):
class FluxKontextProImageNode(IO.ComfyNode):
"""
Edits images using Flux.1 Kontext [pro] via api based on prompt and aspect ratio.
"""
@ -276,25 +276,25 @@ class FluxKontextProImageNode(comfy_io.ComfyNode):
MAXIMUM_RATIO_STR = "4:1"
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id=cls.NODE_ID,
display_name=cls.DISPLAY_NAME,
category="api node/image/BFL",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the image generation - specify what and how to edit.",
),
comfy_io.String.Input(
IO.String.Input(
"aspect_ratio",
default="16:9",
tooltip="Aspect ratio of image; must be between 1:4 and 4:1.",
),
comfy_io.Float.Input(
IO.Float.Input(
"guidance",
default=3.0,
min=0.1,
@ -302,14 +302,14 @@ class FluxKontextProImageNode(comfy_io.ComfyNode):
step=0.1,
tooltip="Guidance strength for the image generation process",
),
comfy_io.Int.Input(
IO.Int.Input(
"steps",
default=50,
min=1,
max=150,
tooltip="Number of steps for the image generation process",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=1234,
min=0,
@ -317,21 +317,21 @@ class FluxKontextProImageNode(comfy_io.ComfyNode):
control_after_generate=True,
tooltip="The random seed used for creating the noise.",
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"prompt_upsampling",
default=False,
tooltip="Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation, but results are nondeterministic (same seed will not produce exactly the same result).",
),
comfy_io.Image.Input(
IO.Image.Input(
"input_image",
optional=True,
),
],
outputs=[comfy_io.Image.Output()],
outputs=[IO.Image.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -350,7 +350,7 @@ class FluxKontextProImageNode(comfy_io.ComfyNode):
input_image: Optional[torch.Tensor]=None,
seed=0,
prompt_upsampling=False,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
aspect_ratio = validate_aspect_ratio(
aspect_ratio,
minimum_ratio=cls.MINIMUM_RATIO,
@ -386,7 +386,7 @@ class FluxKontextProImageNode(comfy_io.ComfyNode):
},
)
output_image = await handle_bfl_synchronous_operation(operation, node_id=cls.hidden.unique_id)
return comfy_io.NodeOutput(output_image)
return IO.NodeOutput(output_image)
class FluxKontextMaxImageNode(FluxKontextProImageNode):
@ -400,45 +400,45 @@ class FluxKontextMaxImageNode(FluxKontextProImageNode):
DISPLAY_NAME = "Flux.1 Kontext [max] Image"
class FluxProImageNode(comfy_io.ComfyNode):
class FluxProImageNode(IO.ComfyNode):
"""
Generates images synchronously based on prompt and resolution.
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="FluxProImageNode",
display_name="Flux 1.1 [pro] Image",
category="api node/image/BFL",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the image generation",
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"prompt_upsampling",
default=False,
tooltip="Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation, but results are nondeterministic (same seed will not produce exactly the same result).",
),
comfy_io.Int.Input(
IO.Int.Input(
"width",
default=1024,
min=256,
max=1440,
step=32,
),
comfy_io.Int.Input(
IO.Int.Input(
"height",
default=768,
min=256,
max=1440,
step=32,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -446,7 +446,7 @@ class FluxProImageNode(comfy_io.ComfyNode):
control_after_generate=True,
tooltip="The random seed used for creating the noise.",
),
comfy_io.Image.Input(
IO.Image.Input(
"image_prompt",
optional=True,
),
@ -461,11 +461,11 @@ class FluxProImageNode(comfy_io.ComfyNode):
# },
# ),
],
outputs=[comfy_io.Image.Output()],
outputs=[IO.Image.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -480,7 +480,7 @@ class FluxProImageNode(comfy_io.ComfyNode):
seed=0,
image_prompt=None,
# image_prompt_strength=0.1,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
image_prompt = (
image_prompt
if image_prompt is None
@ -508,77 +508,77 @@ class FluxProImageNode(comfy_io.ComfyNode):
},
)
output_image = await handle_bfl_synchronous_operation(operation, node_id=cls.hidden.unique_id)
return comfy_io.NodeOutput(output_image)
return IO.NodeOutput(output_image)
class FluxProExpandNode(comfy_io.ComfyNode):
class FluxProExpandNode(IO.ComfyNode):
"""
Outpaints image based on prompt.
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="FluxProExpandNode",
display_name="Flux.1 Expand Image",
category="api node/image/BFL",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input("image"),
comfy_io.String.Input(
IO.Image.Input("image"),
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the image generation",
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"prompt_upsampling",
default=False,
tooltip="Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation, but results are nondeterministic (same seed will not produce exactly the same result).",
),
comfy_io.Int.Input(
IO.Int.Input(
"top",
default=0,
min=0,
max=2048,
tooltip="Number of pixels to expand at the top of the image",
),
comfy_io.Int.Input(
IO.Int.Input(
"bottom",
default=0,
min=0,
max=2048,
tooltip="Number of pixels to expand at the bottom of the image",
),
comfy_io.Int.Input(
IO.Int.Input(
"left",
default=0,
min=0,
max=2048,
tooltip="Number of pixels to expand at the left of the image",
),
comfy_io.Int.Input(
IO.Int.Input(
"right",
default=0,
min=0,
max=2048,
tooltip="Number of pixels to expand at the right of the image",
),
comfy_io.Float.Input(
IO.Float.Input(
"guidance",
default=60,
min=1.5,
max=100,
tooltip="Guidance strength for the image generation process",
),
comfy_io.Int.Input(
IO.Int.Input(
"steps",
default=50,
min=15,
max=50,
tooltip="Number of steps for the image generation process",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -587,11 +587,11 @@ class FluxProExpandNode(comfy_io.ComfyNode):
tooltip="The random seed used for creating the noise.",
),
],
outputs=[comfy_io.Image.Output()],
outputs=[IO.Image.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -609,7 +609,7 @@ class FluxProExpandNode(comfy_io.ComfyNode):
steps: int,
guidance: float,
seed=0,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
image = convert_image_to_base64(image)
operation = SynchronousOperation(
@ -637,51 +637,51 @@ class FluxProExpandNode(comfy_io.ComfyNode):
},
)
output_image = await handle_bfl_synchronous_operation(operation, node_id=cls.hidden.unique_id)
return comfy_io.NodeOutput(output_image)
return IO.NodeOutput(output_image)
class FluxProFillNode(comfy_io.ComfyNode):
class FluxProFillNode(IO.ComfyNode):
"""
Inpaints image based on mask and prompt.
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="FluxProFillNode",
display_name="Flux.1 Fill Image",
category="api node/image/BFL",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input("image"),
comfy_io.Mask.Input("mask"),
comfy_io.String.Input(
IO.Image.Input("image"),
IO.Mask.Input("mask"),
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the image generation",
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"prompt_upsampling",
default=False,
tooltip="Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation, but results are nondeterministic (same seed will not produce exactly the same result).",
),
comfy_io.Float.Input(
IO.Float.Input(
"guidance",
default=60,
min=1.5,
max=100,
tooltip="Guidance strength for the image generation process",
),
comfy_io.Int.Input(
IO.Int.Input(
"steps",
default=50,
min=15,
max=50,
tooltip="Number of steps for the image generation process",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -690,11 +690,11 @@ class FluxProFillNode(comfy_io.ComfyNode):
tooltip="The random seed used for creating the noise.",
),
],
outputs=[comfy_io.Image.Output()],
outputs=[IO.Image.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -709,7 +709,7 @@ class FluxProFillNode(comfy_io.ComfyNode):
steps: int,
guidance: float,
seed=0,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
# prepare mask
mask = resize_mask_to_image(mask, image)
mask = convert_image_to_base64(convert_mask_to_image(mask))
@ -738,35 +738,35 @@ class FluxProFillNode(comfy_io.ComfyNode):
},
)
output_image = await handle_bfl_synchronous_operation(operation, node_id=cls.hidden.unique_id)
return comfy_io.NodeOutput(output_image)
return IO.NodeOutput(output_image)
class FluxProCannyNode(comfy_io.ComfyNode):
class FluxProCannyNode(IO.ComfyNode):
"""
Generate image using a control image (canny).
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="FluxProCannyNode",
display_name="Flux.1 Canny Control Image",
category="api node/image/BFL",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input("control_image"),
comfy_io.String.Input(
IO.Image.Input("control_image"),
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the image generation",
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"prompt_upsampling",
default=False,
tooltip="Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation, but results are nondeterministic (same seed will not produce exactly the same result).",
),
comfy_io.Float.Input(
IO.Float.Input(
"canny_low_threshold",
default=0.1,
min=0.01,
@ -774,7 +774,7 @@ class FluxProCannyNode(comfy_io.ComfyNode):
step=0.01,
tooltip="Low threshold for Canny edge detection; ignored if skip_processing is True",
),
comfy_io.Float.Input(
IO.Float.Input(
"canny_high_threshold",
default=0.4,
min=0.01,
@ -782,26 +782,26 @@ class FluxProCannyNode(comfy_io.ComfyNode):
step=0.01,
tooltip="High threshold for Canny edge detection; ignored if skip_processing is True",
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"skip_preprocessing",
default=False,
tooltip="Whether to skip preprocessing; set to True if control_image already is canny-fied, False if it is a raw image.",
),
comfy_io.Float.Input(
IO.Float.Input(
"guidance",
default=30,
min=1,
max=100,
tooltip="Guidance strength for the image generation process",
),
comfy_io.Int.Input(
IO.Int.Input(
"steps",
default=50,
min=15,
max=50,
tooltip="Number of steps for the image generation process",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -810,11 +810,11 @@ class FluxProCannyNode(comfy_io.ComfyNode):
tooltip="The random seed used for creating the noise.",
),
],
outputs=[comfy_io.Image.Output()],
outputs=[IO.Image.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -831,7 +831,7 @@ class FluxProCannyNode(comfy_io.ComfyNode):
steps: int,
guidance: float,
seed=0,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
control_image = convert_image_to_base64(control_image[:, :, :, :3])
preprocessed_image = None
@ -872,54 +872,54 @@ class FluxProCannyNode(comfy_io.ComfyNode):
},
)
output_image = await handle_bfl_synchronous_operation(operation, node_id=cls.hidden.unique_id)
return comfy_io.NodeOutput(output_image)
return IO.NodeOutput(output_image)
class FluxProDepthNode(comfy_io.ComfyNode):
class FluxProDepthNode(IO.ComfyNode):
"""
Generate image using a control image (depth).
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="FluxProDepthNode",
display_name="Flux.1 Depth Control Image",
category="api node/image/BFL",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input("control_image"),
comfy_io.String.Input(
IO.Image.Input("control_image"),
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the image generation",
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"prompt_upsampling",
default=False,
tooltip="Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation, but results are nondeterministic (same seed will not produce exactly the same result).",
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"skip_preprocessing",
default=False,
tooltip="Whether to skip preprocessing; set to True if control_image already is depth-ified, False if it is a raw image.",
),
comfy_io.Float.Input(
IO.Float.Input(
"guidance",
default=15,
min=1,
max=100,
tooltip="Guidance strength for the image generation process",
),
comfy_io.Int.Input(
IO.Int.Input(
"steps",
default=50,
min=15,
max=50,
tooltip="Number of steps for the image generation process",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -928,11 +928,11 @@ class FluxProDepthNode(comfy_io.ComfyNode):
tooltip="The random seed used for creating the noise.",
),
],
outputs=[comfy_io.Image.Output()],
outputs=[IO.Image.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -947,7 +947,7 @@ class FluxProDepthNode(comfy_io.ComfyNode):
steps: int,
guidance: float,
seed=0,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
control_image = convert_image_to_base64(control_image[:,:,:,:3])
preprocessed_image = None
@ -977,12 +977,12 @@ class FluxProDepthNode(comfy_io.ComfyNode):
},
)
output_image = await handle_bfl_synchronous_operation(operation, node_id=cls.hidden.unique_id)
return comfy_io.NodeOutput(output_image)
return IO.NodeOutput(output_image)
class BFLExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
FluxProUltraImageNode,
# FluxProImageNode,

View File

@ -7,7 +7,7 @@ from typing_extensions import override
import torch
from pydantic import BaseModel, Field
from comfy_api.latest import ComfyExtension, io as comfy_io
from comfy_api.latest import ComfyExtension, IO
from comfy_api_nodes.util.validation_utils import (
validate_image_aspect_ratio_range,
get_number_of_images,
@ -237,33 +237,33 @@ async def poll_until_finished(
).execute()
class ByteDanceImageNode(comfy_io.ComfyNode):
class ByteDanceImageNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="ByteDanceImageNode",
display_name="ByteDance Image",
category="api node/image/ByteDance",
description="Generate images using ByteDance models via api based on prompt",
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=Text2ImageModelName,
default=Text2ImageModelName.seedream_3,
tooltip="Model name",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
tooltip="The text prompt used to generate the image",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"size_preset",
options=[label for label, _, _ in RECOMMENDED_PRESETS],
tooltip="Pick a recommended size. Select Custom to use the width and height below",
),
comfy_io.Int.Input(
IO.Int.Input(
"width",
default=1024,
min=512,
@ -271,7 +271,7 @@ class ByteDanceImageNode(comfy_io.ComfyNode):
step=64,
tooltip="Custom width for image. Value is working only if `size_preset` is set to `Custom`",
),
comfy_io.Int.Input(
IO.Int.Input(
"height",
default=1024,
min=512,
@ -279,28 +279,28 @@ class ByteDanceImageNode(comfy_io.ComfyNode):
step=64,
tooltip="Custom height for image. Value is working only if `size_preset` is set to `Custom`",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed to use for generation",
optional=True,
),
comfy_io.Float.Input(
IO.Float.Input(
"guidance_scale",
default=2.5,
min=1.0,
max=10.0,
step=0.01,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Higher value makes the image follow the prompt more closely",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"watermark",
default=True,
tooltip="Whether to add an \"AI generated\" watermark to the image",
@ -308,12 +308,12 @@ class ByteDanceImageNode(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Image.Output(),
IO.Image.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -329,7 +329,7 @@ class ByteDanceImageNode(comfy_io.ComfyNode):
seed: int,
guidance_scale: float,
watermark: bool,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=True, min_length=1)
w = h = None
for label, tw, th in RECOMMENDED_PRESETS:
@ -367,57 +367,57 @@ class ByteDanceImageNode(comfy_io.ComfyNode):
request=payload,
auth_kwargs=auth_kwargs,
).execute()
return comfy_io.NodeOutput(await download_url_to_image_tensor(get_image_url_from_response(response)))
return IO.NodeOutput(await download_url_to_image_tensor(get_image_url_from_response(response)))
class ByteDanceImageEditNode(comfy_io.ComfyNode):
class ByteDanceImageEditNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="ByteDanceImageEditNode",
display_name="ByteDance Image Edit",
category="api node/image/ByteDance",
description="Edit images using ByteDance models via api based on prompt",
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=Image2ImageModelName,
default=Image2ImageModelName.seededit_3,
tooltip="Model name",
),
comfy_io.Image.Input(
IO.Image.Input(
"image",
tooltip="The base image to edit",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Instruction to edit image",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed to use for generation",
optional=True,
),
comfy_io.Float.Input(
IO.Float.Input(
"guidance_scale",
default=5.5,
min=1.0,
max=10.0,
step=0.01,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Higher value makes the image follow the prompt more closely",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"watermark",
default=True,
tooltip="Whether to add an \"AI generated\" watermark to the image",
@ -425,12 +425,12 @@ class ByteDanceImageEditNode(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Image.Output(),
IO.Image.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -444,7 +444,7 @@ class ByteDanceImageEditNode(comfy_io.ComfyNode):
seed: int,
guidance_scale: float,
watermark: bool,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=True, min_length=1)
if get_number_of_images(image) != 1:
raise ValueError("Exactly one input image is required.")
@ -477,42 +477,42 @@ class ByteDanceImageEditNode(comfy_io.ComfyNode):
request=payload,
auth_kwargs=auth_kwargs,
).execute()
return comfy_io.NodeOutput(await download_url_to_image_tensor(get_image_url_from_response(response)))
return IO.NodeOutput(await download_url_to_image_tensor(get_image_url_from_response(response)))
class ByteDanceSeedreamNode(comfy_io.ComfyNode):
class ByteDanceSeedreamNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="ByteDanceSeedreamNode",
display_name="ByteDance Seedream 4",
category="api node/image/ByteDance",
description="Unified text-to-image generation and precise single-sentence editing at up to 4K resolution.",
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=["seedream-4-0-250828"],
tooltip="Model name",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Text prompt for creating or editing an image.",
),
comfy_io.Image.Input(
IO.Image.Input(
"image",
tooltip="Input image(s) for image-to-image generation. "
"List of 1-10 images for single or multi-reference generation.",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"size_preset",
options=[label for label, _, _ in RECOMMENDED_PRESETS_SEEDREAM_4],
tooltip="Pick a recommended size. Select Custom to use the width and height below.",
),
comfy_io.Int.Input(
IO.Int.Input(
"width",
default=2048,
min=1024,
@ -521,7 +521,7 @@ class ByteDanceSeedreamNode(comfy_io.ComfyNode):
tooltip="Custom width for image. Value is working only if `size_preset` is set to `Custom`",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"height",
default=2048,
min=1024,
@ -530,7 +530,7 @@ class ByteDanceSeedreamNode(comfy_io.ComfyNode):
tooltip="Custom height for image. Value is working only if `size_preset` is set to `Custom`",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"sequential_image_generation",
options=["disabled", "auto"],
tooltip="Group image generation mode. "
@ -539,35 +539,35 @@ class ByteDanceSeedreamNode(comfy_io.ComfyNode):
"(e.g., story scenes, character variations).",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"max_images",
default=1,
min=1,
max=15,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Maximum number of images to generate when sequential_image_generation='auto'. "
"Total images (input + generated) cannot exceed 15.",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed to use for generation.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"watermark",
default=True,
tooltip="Whether to add an \"AI generated\" watermark to the image.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"fail_on_partial",
default=True,
tooltip="If enabled, abort execution if any requested images are missing or return an error.",
@ -575,12 +575,12 @@ class ByteDanceSeedreamNode(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Image.Output(),
IO.Image.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -599,7 +599,7 @@ class ByteDanceSeedreamNode(comfy_io.ComfyNode):
seed: int = 0,
watermark: bool = True,
fail_on_partial: bool = True,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=True, min_length=1)
w = h = None
for label, tw, th in RECOMMENDED_PRESETS_SEEDREAM_4:
@ -657,72 +657,72 @@ class ByteDanceSeedreamNode(comfy_io.ComfyNode):
).execute()
if len(response.data) == 1:
return comfy_io.NodeOutput(await download_url_to_image_tensor(get_image_url_from_response(response)))
return IO.NodeOutput(await download_url_to_image_tensor(get_image_url_from_response(response)))
urls = [str(d["url"]) for d in response.data if isinstance(d, dict) and "url" in d]
if fail_on_partial and len(urls) < len(response.data):
raise RuntimeError(f"Only {len(urls)} of {len(response.data)} images were generated before error.")
return comfy_io.NodeOutput(torch.cat([await download_url_to_image_tensor(i) for i in urls]))
return IO.NodeOutput(torch.cat([await download_url_to_image_tensor(i) for i in urls]))
class ByteDanceTextToVideoNode(comfy_io.ComfyNode):
class ByteDanceTextToVideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="ByteDanceTextToVideoNode",
display_name="ByteDance Text to Video",
category="api node/video/ByteDance",
description="Generate video using ByteDance models via api based on prompt",
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=Text2VideoModelName,
default=Text2VideoModelName.seedance_1_pro,
tooltip="Model name",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
tooltip="The text prompt used to generate the video.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"resolution",
options=["480p", "720p", "1080p"],
tooltip="The resolution of the output video.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"aspect_ratio",
options=["16:9", "4:3", "1:1", "3:4", "9:16", "21:9"],
tooltip="The aspect ratio of the output video.",
),
comfy_io.Int.Input(
IO.Int.Input(
"duration",
default=5,
min=3,
max=12,
step=1,
tooltip="The duration of the output video in seconds.",
display_mode=comfy_io.NumberDisplay.slider,
display_mode=IO.NumberDisplay.slider,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed to use for generation.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"camera_fixed",
default=False,
tooltip="Specifies whether to fix the camera. The platform appends an instruction "
"to fix the camera to your prompt, but does not guarantee the actual effect.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"watermark",
default=True,
tooltip="Whether to add an \"AI generated\" watermark to the video.",
@ -730,12 +730,12 @@ class ByteDanceTextToVideoNode(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Video.Output(),
IO.Video.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -751,7 +751,7 @@ class ByteDanceTextToVideoNode(comfy_io.ComfyNode):
seed: int,
camera_fixed: bool,
watermark: bool,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=True, min_length=1)
raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "camerafixed", "watermark"])
@ -781,69 +781,69 @@ class ByteDanceTextToVideoNode(comfy_io.ComfyNode):
)
class ByteDanceImageToVideoNode(comfy_io.ComfyNode):
class ByteDanceImageToVideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="ByteDanceImageToVideoNode",
display_name="ByteDance Image to Video",
category="api node/video/ByteDance",
description="Generate video using ByteDance models via api based on image and prompt",
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=Image2VideoModelName,
default=Image2VideoModelName.seedance_1_pro,
tooltip="Model name",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
tooltip="The text prompt used to generate the video.",
),
comfy_io.Image.Input(
IO.Image.Input(
"image",
tooltip="First frame to be used for the video.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"resolution",
options=["480p", "720p", "1080p"],
tooltip="The resolution of the output video.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"aspect_ratio",
options=["adaptive", "16:9", "4:3", "1:1", "3:4", "9:16", "21:9"],
tooltip="The aspect ratio of the output video.",
),
comfy_io.Int.Input(
IO.Int.Input(
"duration",
default=5,
min=3,
max=12,
step=1,
tooltip="The duration of the output video in seconds.",
display_mode=comfy_io.NumberDisplay.slider,
display_mode=IO.NumberDisplay.slider,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed to use for generation.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"camera_fixed",
default=False,
tooltip="Specifies whether to fix the camera. The platform appends an instruction "
"to fix the camera to your prompt, but does not guarantee the actual effect.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"watermark",
default=True,
tooltip="Whether to add an \"AI generated\" watermark to the video.",
@ -851,12 +851,12 @@ class ByteDanceImageToVideoNode(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Video.Output(),
IO.Video.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -873,7 +873,7 @@ class ByteDanceImageToVideoNode(comfy_io.ComfyNode):
seed: int,
camera_fixed: bool,
watermark: bool,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=True, min_length=1)
raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "camerafixed", "watermark"])
validate_image_dimensions(image, min_width=300, min_height=300, max_width=6000, max_height=6000)
@ -908,73 +908,73 @@ class ByteDanceImageToVideoNode(comfy_io.ComfyNode):
)
class ByteDanceFirstLastFrameNode(comfy_io.ComfyNode):
class ByteDanceFirstLastFrameNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="ByteDanceFirstLastFrameNode",
display_name="ByteDance First-Last-Frame to Video",
category="api node/video/ByteDance",
description="Generate video using prompt and first and last frames.",
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=[model.value for model in Image2VideoModelName],
default=Image2VideoModelName.seedance_1_lite.value,
tooltip="Model name",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
tooltip="The text prompt used to generate the video.",
),
comfy_io.Image.Input(
IO.Image.Input(
"first_frame",
tooltip="First frame to be used for the video.",
),
comfy_io.Image.Input(
IO.Image.Input(
"last_frame",
tooltip="Last frame to be used for the video.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"resolution",
options=["480p", "720p", "1080p"],
tooltip="The resolution of the output video.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"aspect_ratio",
options=["adaptive", "16:9", "4:3", "1:1", "3:4", "9:16", "21:9"],
tooltip="The aspect ratio of the output video.",
),
comfy_io.Int.Input(
IO.Int.Input(
"duration",
default=5,
min=3,
max=12,
step=1,
tooltip="The duration of the output video in seconds.",
display_mode=comfy_io.NumberDisplay.slider,
display_mode=IO.NumberDisplay.slider,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed to use for generation.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"camera_fixed",
default=False,
tooltip="Specifies whether to fix the camera. The platform appends an instruction "
"to fix the camera to your prompt, but does not guarantee the actual effect.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"watermark",
default=True,
tooltip="Whether to add an \"AI generated\" watermark to the video.",
@ -982,12 +982,12 @@ class ByteDanceFirstLastFrameNode(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Video.Output(),
IO.Video.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -1005,7 +1005,7 @@ class ByteDanceFirstLastFrameNode(comfy_io.ComfyNode):
seed: int,
camera_fixed: bool,
watermark: bool,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=True, min_length=1)
raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "camerafixed", "watermark"])
for i in (first_frame, last_frame):
@ -1050,62 +1050,62 @@ class ByteDanceFirstLastFrameNode(comfy_io.ComfyNode):
)
class ByteDanceImageReferenceNode(comfy_io.ComfyNode):
class ByteDanceImageReferenceNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="ByteDanceImageReferenceNode",
display_name="ByteDance Reference Images to Video",
category="api node/video/ByteDance",
description="Generate video using prompt and reference images.",
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=[Image2VideoModelName.seedance_1_lite.value],
default=Image2VideoModelName.seedance_1_lite.value,
tooltip="Model name",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
tooltip="The text prompt used to generate the video.",
),
comfy_io.Image.Input(
IO.Image.Input(
"images",
tooltip="One to four images.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"resolution",
options=["480p", "720p"],
tooltip="The resolution of the output video.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"aspect_ratio",
options=["adaptive", "16:9", "4:3", "1:1", "3:4", "9:16", "21:9"],
tooltip="The aspect ratio of the output video.",
),
comfy_io.Int.Input(
IO.Int.Input(
"duration",
default=5,
min=3,
max=12,
step=1,
tooltip="The duration of the output video in seconds.",
display_mode=comfy_io.NumberDisplay.slider,
display_mode=IO.NumberDisplay.slider,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed to use for generation.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"watermark",
default=True,
tooltip="Whether to add an \"AI generated\" watermark to the video.",
@ -1113,12 +1113,12 @@ class ByteDanceImageReferenceNode(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Video.Output(),
IO.Video.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -1134,7 +1134,7 @@ class ByteDanceImageReferenceNode(comfy_io.ComfyNode):
duration: int,
seed: int,
watermark: bool,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=True, min_length=1)
raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "watermark"])
for image in images:
@ -1180,7 +1180,7 @@ async def process_video_task(
auth_kwargs: dict,
node_id: str,
estimated_duration: Optional[int],
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
initial_response = await SynchronousOperation(
endpoint=ApiEndpoint(
path=BYTEPLUS_TASK_ENDPOINT,
@ -1197,7 +1197,7 @@ async def process_video_task(
estimated_duration=estimated_duration,
node_id=node_id,
)
return comfy_io.NodeOutput(await download_url_to_video_output(get_video_url_from_task_status(response)))
return IO.NodeOutput(await download_url_to_video_output(get_video_url_from_task_status(response)))
def raise_if_text_params(prompt: str, text_params: list[str]) -> None:
@ -1210,7 +1210,7 @@ def raise_if_text_params(prompt: str, text_params: list[str]) -> None:
class ByteDanceExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
ByteDanceImageNode,
ByteDanceImageEditNode,

View File

@ -26,7 +26,7 @@ from comfy_api_nodes.apis import (
GeminiPart,
GeminiMimeType,
)
from comfy_api_nodes.apis.gemini_api import GeminiImageGenerationConfig, GeminiImageGenerateContentRequest
from comfy_api_nodes.apis.gemini_api import GeminiImageGenerationConfig, GeminiImageGenerateContentRequest, GeminiImageConfig
from comfy_api_nodes.apis.client import (
ApiEndpoint,
HttpMethod,
@ -63,6 +63,7 @@ class GeminiImageModel(str, Enum):
"""
gemini_2_5_flash_image_preview = "gemini-2.5-flash-image-preview"
gemini_2_5_flash_image = "gemini-2.5-flash-image"
def get_gemini_endpoint(
@ -538,7 +539,7 @@ class GeminiImage(ComfyNodeABC):
{
"tooltip": "The Gemini model to use for generating responses.",
"options": [model.value for model in GeminiImageModel],
"default": GeminiImageModel.gemini_2_5_flash_image_preview.value,
"default": GeminiImageModel.gemini_2_5_flash_image.value,
},
),
"seed": (
@ -579,6 +580,14 @@ class GeminiImage(ComfyNodeABC):
# "tooltip": "How many images to generate",
# },
# ),
"aspect_ratio": (
IO.COMBO,
{
"tooltip": "Defaults to matching the output image size to that of your input image, or otherwise generates 1:1 squares.",
"options": ["auto", "1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"],
"default": "auto",
},
),
},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG",
@ -600,15 +609,17 @@ class GeminiImage(ComfyNodeABC):
images: Optional[IO.IMAGE] = None,
files: Optional[list[GeminiPart]] = None,
n=1,
aspect_ratio: str = "auto",
unique_id: Optional[str] = None,
**kwargs,
):
# Validate inputs
validate_string(prompt, strip_whitespace=True, min_length=1)
# Create parts list with text prompt as the first part
parts: list[GeminiPart] = [create_text_part(prompt)]
# Add other modal parts
if not aspect_ratio:
aspect_ratio = "auto" # for backward compatability with old workflows; to-do remove this in December
image_config = GeminiImageConfig(aspectRatio=aspect_ratio)
if images is not None:
image_parts = create_image_parts(images)
parts.extend(image_parts)
@ -625,7 +636,8 @@ class GeminiImage(ComfyNodeABC):
),
],
generationConfig=GeminiImageGenerationConfig(
responseModalities=["TEXT","IMAGE"]
responseModalities=["TEXT","IMAGE"],
imageConfig=None if aspect_ratio == "auto" else image_config,
)
),
auth_kwargs=kwargs,

View File

@ -1,6 +1,6 @@
from io import BytesIO
from typing_extensions import override
from comfy_api.latest import ComfyExtension, io as comfy_io
from comfy_api.latest import ComfyExtension, IO
from PIL import Image
import numpy as np
import torch
@ -246,76 +246,76 @@ def display_image_urls_on_node(image_urls, node_id):
PromptServer.instance.send_progress_text(urls_text, node_id)
class IdeogramV1(comfy_io.ComfyNode):
class IdeogramV1(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="IdeogramV1",
display_name="Ideogram V1",
category="api node/image/Ideogram",
description="Generates images using the Ideogram V1 model.",
is_api_node=True,
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the image generation",
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"turbo",
default=False,
tooltip="Whether to use turbo mode (faster generation, potentially lower quality)",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"aspect_ratio",
options=list(V1_V2_RATIO_MAP.keys()),
default="1:1",
tooltip="The aspect ratio for image generation.",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"magic_prompt_option",
options=["AUTO", "ON", "OFF"],
default="AUTO",
tooltip="Determine if MagicPrompt should be used in generation",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
control_after_generate=True,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
optional=True,
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
multiline=True,
default="",
tooltip="Description of what to exclude from the image",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"num_images",
default=1,
min=1,
max=8,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
optional=True,
),
],
outputs=[
comfy_io.Image.Output(),
IO.Image.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
)
@ -372,39 +372,39 @@ class IdeogramV1(comfy_io.ComfyNode):
raise Exception("No image URLs were generated in the response")
display_image_urls_on_node(image_urls, cls.hidden.unique_id)
return comfy_io.NodeOutput(await download_and_process_images(image_urls))
return IO.NodeOutput(await download_and_process_images(image_urls))
class IdeogramV2(comfy_io.ComfyNode):
class IdeogramV2(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="IdeogramV2",
display_name="Ideogram V2",
category="api node/image/Ideogram",
description="Generates images using the Ideogram V2 model.",
is_api_node=True,
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the image generation",
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"turbo",
default=False,
tooltip="Whether to use turbo mode (faster generation, potentially lower quality)",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"aspect_ratio",
options=list(V1_V2_RATIO_MAP.keys()),
default="1:1",
tooltip="The aspect ratio for image generation. Ignored if resolution is not set to AUTO.",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"resolution",
options=list(V1_V1_RES_MAP.keys()),
default="Auto",
@ -412,44 +412,44 @@ class IdeogramV2(comfy_io.ComfyNode):
"If not set to AUTO, this overrides the aspect_ratio setting.",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"magic_prompt_option",
options=["AUTO", "ON", "OFF"],
default="AUTO",
tooltip="Determine if MagicPrompt should be used in generation",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
control_after_generate=True,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"style_type",
options=["AUTO", "GENERAL", "REALISTIC", "DESIGN", "RENDER_3D", "ANIME"],
default="NONE",
tooltip="Style type for generation (V2 only)",
optional=True,
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
multiline=True,
default="",
tooltip="Description of what to exclude from the image",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"num_images",
default=1,
min=1,
max=8,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
optional=True,
),
#"color_palette": (
@ -462,12 +462,12 @@ class IdeogramV2(comfy_io.ComfyNode):
#),
],
outputs=[
comfy_io.Image.Output(),
IO.Image.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
)
@ -541,14 +541,14 @@ class IdeogramV2(comfy_io.ComfyNode):
raise Exception("No image URLs were generated in the response")
display_image_urls_on_node(image_urls, cls.hidden.unique_id)
return comfy_io.NodeOutput(await download_and_process_images(image_urls))
return IO.NodeOutput(await download_and_process_images(image_urls))
class IdeogramV3(comfy_io.ComfyNode):
class IdeogramV3(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="IdeogramV3",
display_name="Ideogram V3",
category="api node/image/Ideogram",
@ -556,30 +556,30 @@ class IdeogramV3(comfy_io.ComfyNode):
"Supports both regular image generation from text prompts and image editing with mask.",
is_api_node=True,
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the image generation or editing",
),
comfy_io.Image.Input(
IO.Image.Input(
"image",
tooltip="Optional reference image for image editing.",
optional=True,
),
comfy_io.Mask.Input(
IO.Mask.Input(
"mask",
tooltip="Optional mask for inpainting (white areas will be replaced)",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"aspect_ratio",
options=list(V3_RATIO_MAP.keys()),
default="1:1",
tooltip="The aspect ratio for image generation. Ignored if resolution is not set to Auto.",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"resolution",
options=V3_RESOLUTIONS,
default="Auto",
@ -587,57 +587,57 @@ class IdeogramV3(comfy_io.ComfyNode):
"If not set to Auto, this overrides the aspect_ratio setting.",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"magic_prompt_option",
options=["AUTO", "ON", "OFF"],
default="AUTO",
tooltip="Determine if MagicPrompt should be used in generation",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
control_after_generate=True,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"num_images",
default=1,
min=1,
max=8,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"rendering_speed",
options=["DEFAULT", "TURBO", "QUALITY"],
default="DEFAULT",
tooltip="Controls the trade-off between generation speed and quality",
optional=True,
),
comfy_io.Image.Input(
IO.Image.Input(
"character_image",
tooltip="Image to use as character reference.",
optional=True,
),
comfy_io.Mask.Input(
IO.Mask.Input(
"character_mask",
tooltip="Optional mask for character reference image.",
optional=True,
),
],
outputs=[
comfy_io.Image.Output(),
IO.Image.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
)
@ -826,12 +826,12 @@ class IdeogramV3(comfy_io.ComfyNode):
raise Exception("No image URLs were generated in the response")
display_image_urls_on_node(image_urls, cls.hidden.unique_id)
return comfy_io.NodeOutput(await download_and_process_images(image_urls))
return IO.NodeOutput(await download_and_process_images(image_urls))
class IdeogramExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
IdeogramV1,
IdeogramV2,

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@ from __future__ import annotations
from inspect import cleandoc
from typing import Optional
from typing_extensions import override
from comfy_api.latest import ComfyExtension, io as comfy_io
from comfy_api.latest import ComfyExtension, IO
from comfy_api.input_impl.video_types import VideoFromFile
from comfy_api_nodes.apis.luma_api import (
LumaImageModel,
@ -52,24 +52,24 @@ def image_result_url_extractor(response: LumaGeneration):
def video_result_url_extractor(response: LumaGeneration):
return response.assets.video if hasattr(response, "assets") and hasattr(response.assets, "video") else None
class LumaReferenceNode(comfy_io.ComfyNode):
class LumaReferenceNode(IO.ComfyNode):
"""
Holds an image and weight for use with Luma Generate Image node.
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="LumaReferenceNode",
display_name="Luma Reference",
category="api node/image/Luma",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input(
IO.Image.Input(
"image",
tooltip="Image to use as reference.",
),
comfy_io.Float.Input(
IO.Float.Input(
"weight",
default=1.0,
min=0.0,
@ -77,71 +77,71 @@ class LumaReferenceNode(comfy_io.ComfyNode):
step=0.01,
tooltip="Weight of image reference.",
),
comfy_io.Custom(LumaIO.LUMA_REF).Input(
IO.Custom(LumaIO.LUMA_REF).Input(
"luma_ref",
optional=True,
),
],
outputs=[comfy_io.Custom(LumaIO.LUMA_REF).Output(display_name="luma_ref")],
outputs=[IO.Custom(LumaIO.LUMA_REF).Output(display_name="luma_ref")],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
)
@classmethod
def execute(
cls, image: torch.Tensor, weight: float, luma_ref: LumaReferenceChain = None
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
if luma_ref is not None:
luma_ref = luma_ref.clone()
else:
luma_ref = LumaReferenceChain()
luma_ref.add(LumaReference(image=image, weight=round(weight, 2)))
return comfy_io.NodeOutput(luma_ref)
return IO.NodeOutput(luma_ref)
class LumaConceptsNode(comfy_io.ComfyNode):
class LumaConceptsNode(IO.ComfyNode):
"""
Holds one or more Camera Concepts for use with Luma Text to Video and Luma Image to Video nodes.
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="LumaConceptsNode",
display_name="Luma Concepts",
category="api node/video/Luma",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"concept1",
options=get_luma_concepts(include_none=True),
),
comfy_io.Combo.Input(
IO.Combo.Input(
"concept2",
options=get_luma_concepts(include_none=True),
),
comfy_io.Combo.Input(
IO.Combo.Input(
"concept3",
options=get_luma_concepts(include_none=True),
),
comfy_io.Combo.Input(
IO.Combo.Input(
"concept4",
options=get_luma_concepts(include_none=True),
),
comfy_io.Custom(LumaIO.LUMA_CONCEPTS).Input(
IO.Custom(LumaIO.LUMA_CONCEPTS).Input(
"luma_concepts",
tooltip="Optional Camera Concepts to add to the ones chosen here.",
optional=True,
),
],
outputs=[comfy_io.Custom(LumaIO.LUMA_CONCEPTS).Output(display_name="luma_concepts")],
outputs=[IO.Custom(LumaIO.LUMA_CONCEPTS).Output(display_name="luma_concepts")],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
)
@ -153,42 +153,42 @@ class LumaConceptsNode(comfy_io.ComfyNode):
concept3: str,
concept4: str,
luma_concepts: LumaConceptChain = None,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
chain = LumaConceptChain(str_list=[concept1, concept2, concept3, concept4])
if luma_concepts is not None:
chain = luma_concepts.clone_and_merge(chain)
return comfy_io.NodeOutput(chain)
return IO.NodeOutput(chain)
class LumaImageGenerationNode(comfy_io.ComfyNode):
class LumaImageGenerationNode(IO.ComfyNode):
"""
Generates images synchronously based on prompt and aspect ratio.
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="LumaImageNode",
display_name="Luma Text to Image",
category="api node/image/Luma",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the image generation",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=LumaImageModel,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"aspect_ratio",
options=LumaAspectRatio,
default=LumaAspectRatio.ratio_16_9,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -196,7 +196,7 @@ class LumaImageGenerationNode(comfy_io.ComfyNode):
control_after_generate=True,
tooltip="Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
),
comfy_io.Float.Input(
IO.Float.Input(
"style_image_weight",
default=1.0,
min=0.0,
@ -204,27 +204,27 @@ class LumaImageGenerationNode(comfy_io.ComfyNode):
step=0.01,
tooltip="Weight of style image. Ignored if no style_image provided.",
),
comfy_io.Custom(LumaIO.LUMA_REF).Input(
IO.Custom(LumaIO.LUMA_REF).Input(
"image_luma_ref",
tooltip="Luma Reference node connection to influence generation with input images; up to 4 images can be considered.",
optional=True,
),
comfy_io.Image.Input(
IO.Image.Input(
"style_image",
tooltip="Style reference image; only 1 image will be used.",
optional=True,
),
comfy_io.Image.Input(
IO.Image.Input(
"character_image",
tooltip="Character reference images; can be a batch of multiple, up to 4 images can be considered.",
optional=True,
),
],
outputs=[comfy_io.Image.Output()],
outputs=[IO.Image.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -240,7 +240,7 @@ class LumaImageGenerationNode(comfy_io.ComfyNode):
image_luma_ref: LumaReferenceChain = None,
style_image: torch.Tensor = None,
character_image: torch.Tensor = None,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=True, min_length=3)
auth_kwargs = {
"auth_token": cls.hidden.auth_token_comfy_org,
@ -306,7 +306,7 @@ class LumaImageGenerationNode(comfy_io.ComfyNode):
async with aiohttp.ClientSession() as session:
async with session.get(response_poll.assets.image) as img_response:
img = process_image_response(await img_response.content.read())
return comfy_io.NodeOutput(img)
return IO.NodeOutput(img)
@classmethod
async def _convert_luma_refs(
@ -334,29 +334,29 @@ class LumaImageGenerationNode(comfy_io.ComfyNode):
return await cls._convert_luma_refs(chain, max_refs=1, auth_kwargs=auth_kwargs)
class LumaImageModifyNode(comfy_io.ComfyNode):
class LumaImageModifyNode(IO.ComfyNode):
"""
Modifies images synchronously based on prompt and aspect ratio.
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="LumaImageModifyNode",
display_name="Luma Image to Image",
category="api node/image/Luma",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input(
IO.Image.Input(
"image",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the image generation",
),
comfy_io.Float.Input(
IO.Float.Input(
"image_weight",
default=0.1,
min=0.0,
@ -364,11 +364,11 @@ class LumaImageModifyNode(comfy_io.ComfyNode):
step=0.01,
tooltip="Weight of the image; the closer to 1.0, the less the image will be modified.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=LumaImageModel,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -377,11 +377,11 @@ class LumaImageModifyNode(comfy_io.ComfyNode):
tooltip="Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
),
],
outputs=[comfy_io.Image.Output()],
outputs=[IO.Image.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -394,7 +394,7 @@ class LumaImageModifyNode(comfy_io.ComfyNode):
image: torch.Tensor,
image_weight: float,
seed,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
auth_kwargs = {
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
@ -442,51 +442,51 @@ class LumaImageModifyNode(comfy_io.ComfyNode):
async with aiohttp.ClientSession() as session:
async with session.get(response_poll.assets.image) as img_response:
img = process_image_response(await img_response.content.read())
return comfy_io.NodeOutput(img)
return IO.NodeOutput(img)
class LumaTextToVideoGenerationNode(comfy_io.ComfyNode):
class LumaTextToVideoGenerationNode(IO.ComfyNode):
"""
Generates videos synchronously based on prompt and output_size.
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="LumaVideoNode",
display_name="Luma Text to Video",
category="api node/video/Luma",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the video generation",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=LumaVideoModel,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"aspect_ratio",
options=LumaAspectRatio,
default=LumaAspectRatio.ratio_16_9,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"resolution",
options=LumaVideoOutputResolution,
default=LumaVideoOutputResolution.res_540p,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"duration",
options=LumaVideoModelOutputDuration,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"loop",
default=False,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -494,17 +494,17 @@ class LumaTextToVideoGenerationNode(comfy_io.ComfyNode):
control_after_generate=True,
tooltip="Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
),
comfy_io.Custom(LumaIO.LUMA_CONCEPTS).Input(
IO.Custom(LumaIO.LUMA_CONCEPTS).Input(
"luma_concepts",
tooltip="Optional Camera Concepts to dictate camera motion via the Luma Concepts node.",
optional=True,
)
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -520,7 +520,7 @@ class LumaTextToVideoGenerationNode(comfy_io.ComfyNode):
loop: bool,
seed,
luma_concepts: LumaConceptChain = None,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=False, min_length=3)
duration = duration if model != LumaVideoModel.ray_1_6 else None
resolution = resolution if model != LumaVideoModel.ray_1_6 else None
@ -571,51 +571,51 @@ class LumaTextToVideoGenerationNode(comfy_io.ComfyNode):
async with aiohttp.ClientSession() as session:
async with session.get(response_poll.assets.video) as vid_response:
return comfy_io.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
return IO.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
class LumaImageToVideoGenerationNode(comfy_io.ComfyNode):
class LumaImageToVideoGenerationNode(IO.ComfyNode):
"""
Generates videos synchronously based on prompt, input images, and output_size.
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="LumaImageToVideoNode",
display_name="Luma Image to Video",
category="api node/video/Luma",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the video generation",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=LumaVideoModel,
),
# comfy_io.Combo.Input(
# IO.Combo.Input(
# "aspect_ratio",
# options=[ratio.value for ratio in LumaAspectRatio],
# default=LumaAspectRatio.ratio_16_9,
# ),
comfy_io.Combo.Input(
IO.Combo.Input(
"resolution",
options=LumaVideoOutputResolution,
default=LumaVideoOutputResolution.res_540p,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"duration",
options=[dur.value for dur in LumaVideoModelOutputDuration],
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"loop",
default=False,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -623,27 +623,27 @@ class LumaImageToVideoGenerationNode(comfy_io.ComfyNode):
control_after_generate=True,
tooltip="Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
),
comfy_io.Image.Input(
IO.Image.Input(
"first_image",
tooltip="First frame of generated video.",
optional=True,
),
comfy_io.Image.Input(
IO.Image.Input(
"last_image",
tooltip="Last frame of generated video.",
optional=True,
),
comfy_io.Custom(LumaIO.LUMA_CONCEPTS).Input(
IO.Custom(LumaIO.LUMA_CONCEPTS).Input(
"luma_concepts",
tooltip="Optional Camera Concepts to dictate camera motion via the Luma Concepts node.",
optional=True,
)
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -660,7 +660,7 @@ class LumaImageToVideoGenerationNode(comfy_io.ComfyNode):
first_image: torch.Tensor = None,
last_image: torch.Tensor = None,
luma_concepts: LumaConceptChain = None,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
if first_image is None and last_image is None:
raise Exception(
"At least one of first_image and last_image requires an input."
@ -716,7 +716,7 @@ class LumaImageToVideoGenerationNode(comfy_io.ComfyNode):
async with aiohttp.ClientSession() as session:
async with session.get(response_poll.assets.video) as vid_response:
return comfy_io.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
return IO.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
@classmethod
async def _convert_to_keyframes(
@ -744,7 +744,7 @@ class LumaImageToVideoGenerationNode(comfy_io.ComfyNode):
class LumaExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
LumaImageGenerationNode,
LumaImageModifyNode,

View File

@ -4,7 +4,7 @@ import logging
import torch
from typing_extensions import override
from comfy_api.latest import ComfyExtension, io as comfy_io
from comfy_api.latest import ComfyExtension, IO
from comfy_api.input_impl.video_types import VideoFromFile
from comfy_api_nodes.apis import (
MinimaxVideoGenerationRequest,
@ -43,7 +43,7 @@ async def _generate_mm_video(
image: Optional[torch.Tensor] = None, # used for ImageToVideo
subject: Optional[torch.Tensor] = None, # used for SubjectToVideo
average_duration: Optional[int] = None,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
if image is None:
validate_string(prompt_text, field_name="prompt_text")
# upload image, if passed in
@ -133,35 +133,35 @@ async def _generate_mm_video(
error_msg = f"Failed to download video from {file_url}"
logging.error(error_msg)
raise Exception(error_msg)
return comfy_io.NodeOutput(VideoFromFile(video_io))
return IO.NodeOutput(VideoFromFile(video_io))
class MinimaxTextToVideoNode(comfy_io.ComfyNode):
class MinimaxTextToVideoNode(IO.ComfyNode):
"""
Generates videos synchronously based on a prompt, and optional parameters using MiniMax's API.
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="MinimaxTextToVideoNode",
display_name="MiniMax Text to Video",
category="api node/video/MiniMax",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt_text",
multiline=True,
default="",
tooltip="Text prompt to guide the video generation",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=["T2V-01", "T2V-01-Director"],
default="T2V-01",
tooltip="Model to use for video generation",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -172,11 +172,11 @@ class MinimaxTextToVideoNode(comfy_io.ComfyNode):
optional=True,
),
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -187,7 +187,7 @@ class MinimaxTextToVideoNode(comfy_io.ComfyNode):
prompt_text: str,
model: str = "T2V-01",
seed: int = 0,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
return await _generate_mm_video(
auth={
"auth_token": cls.hidden.auth_token_comfy_org,
@ -203,36 +203,36 @@ class MinimaxTextToVideoNode(comfy_io.ComfyNode):
)
class MinimaxImageToVideoNode(comfy_io.ComfyNode):
class MinimaxImageToVideoNode(IO.ComfyNode):
"""
Generates videos synchronously based on an image and prompt, and optional parameters using MiniMax's API.
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="MinimaxImageToVideoNode",
display_name="MiniMax Image to Video",
category="api node/video/MiniMax",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input(
IO.Image.Input(
"image",
tooltip="Image to use as first frame of video generation",
),
comfy_io.String.Input(
IO.String.Input(
"prompt_text",
multiline=True,
default="",
tooltip="Text prompt to guide the video generation",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=["I2V-01-Director", "I2V-01", "I2V-01-live"],
default="I2V-01",
tooltip="Model to use for video generation",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -243,11 +243,11 @@ class MinimaxImageToVideoNode(comfy_io.ComfyNode):
optional=True,
),
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -259,7 +259,7 @@ class MinimaxImageToVideoNode(comfy_io.ComfyNode):
prompt_text: str,
model: str = "I2V-01",
seed: int = 0,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
return await _generate_mm_video(
auth={
"auth_token": cls.hidden.auth_token_comfy_org,
@ -275,36 +275,36 @@ class MinimaxImageToVideoNode(comfy_io.ComfyNode):
)
class MinimaxSubjectToVideoNode(comfy_io.ComfyNode):
class MinimaxSubjectToVideoNode(IO.ComfyNode):
"""
Generates videos synchronously based on an image and prompt, and optional parameters using MiniMax's API.
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="MinimaxSubjectToVideoNode",
display_name="MiniMax Subject to Video",
category="api node/video/MiniMax",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input(
IO.Image.Input(
"subject",
tooltip="Image of subject to reference for video generation",
),
comfy_io.String.Input(
IO.String.Input(
"prompt_text",
multiline=True,
default="",
tooltip="Text prompt to guide the video generation",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=["S2V-01"],
default="S2V-01",
tooltip="Model to use for video generation",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -315,11 +315,11 @@ class MinimaxSubjectToVideoNode(comfy_io.ComfyNode):
optional=True,
),
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -331,7 +331,7 @@ class MinimaxSubjectToVideoNode(comfy_io.ComfyNode):
prompt_text: str,
model: str = "S2V-01",
seed: int = 0,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
return await _generate_mm_video(
auth={
"auth_token": cls.hidden.auth_token_comfy_org,
@ -347,24 +347,24 @@ class MinimaxSubjectToVideoNode(comfy_io.ComfyNode):
)
class MinimaxHailuoVideoNode(comfy_io.ComfyNode):
class MinimaxHailuoVideoNode(IO.ComfyNode):
"""Generates videos from prompt, with optional start frame using the new MiniMax Hailuo-02 model."""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="MinimaxHailuoVideoNode",
display_name="MiniMax Hailuo Video",
category="api node/video/MiniMax",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt_text",
multiline=True,
default="",
tooltip="Text prompt to guide the video generation.",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -374,25 +374,25 @@ class MinimaxHailuoVideoNode(comfy_io.ComfyNode):
tooltip="The random seed used for creating the noise.",
optional=True,
),
comfy_io.Image.Input(
IO.Image.Input(
"first_frame_image",
tooltip="Optional image to use as the first frame to generate a video.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"prompt_optimizer",
default=True,
tooltip="Optimize prompt to improve generation quality when needed.",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"duration",
options=[6, 10],
default=6,
tooltip="The length of the output video in seconds.",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"resolution",
options=["768P", "1080P"],
default="768P",
@ -400,11 +400,11 @@ class MinimaxHailuoVideoNode(comfy_io.ComfyNode):
optional=True,
),
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -419,7 +419,7 @@ class MinimaxHailuoVideoNode(comfy_io.ComfyNode):
duration: int = 6,
resolution: str = "768P",
model: str = "MiniMax-Hailuo-02",
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
auth = {
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
@ -513,12 +513,12 @@ class MinimaxHailuoVideoNode(comfy_io.ComfyNode):
error_msg = f"Failed to download video from {file_url}"
logging.error(error_msg)
raise Exception(error_msg)
return comfy_io.NodeOutput(VideoFromFile(video_io))
return IO.NodeOutput(VideoFromFile(video_io))
class MinimaxExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
MinimaxTextToVideoNode,
MinimaxImageToVideoNode,

View File

@ -22,10 +22,11 @@ from comfy_api_nodes.apinode_utils import (
download_url_to_video_output,
upload_images_to_comfyapi,
upload_video_to_comfyapi,
validate_container_format_is_mp4,
)
from comfy_api.input import VideoInput
from comfy_api.latest import ComfyExtension, InputImpl, io as comfy_io
from comfy_api.latest import ComfyExtension, InputImpl, IO
import av
import io
@ -144,7 +145,7 @@ def validate_video_to_video_input(video: VideoInput) -> VideoInput:
"""
width, height = _get_video_dimensions(video)
_validate_video_dimensions(width, height)
_validate_container_format(video)
validate_container_format_is_mp4(video)
return _validate_and_trim_duration(video)
@ -177,15 +178,6 @@ def _validate_video_dimensions(width: int, height: int) -> None:
)
def _validate_container_format(video: VideoInput) -> None:
"""Validates video container format is MP4."""
container_format = video.get_container_format()
if container_format not in ["mp4", "mov,mp4,m4a,3gp,3g2,mj2"]:
raise ValueError(
f"Only MP4 container format supported. Got: {container_format}"
)
def _validate_and_trim_duration(video: VideoInput) -> VideoInput:
"""Validates video duration and trims to 5 seconds if needed."""
duration = video.get_duration()
@ -362,25 +354,25 @@ async def get_response(
)
class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
class MoonvalleyImg2VideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="MoonvalleyImg2VideoNode",
display_name="Moonvalley Marey Image to Video",
category="api node/video/Moonvalley Marey",
description="Moonvalley Marey Image to Video Node",
inputs=[
comfy_io.Image.Input(
IO.Image.Input(
"image",
tooltip="The reference image used to generate the video",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
multiline=True,
default="<synthetic> <scene cut> gopro, bright, contrast, static, overexposed, vignette, "
@ -391,7 +383,7 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
"wobbly, weird, low quality, plastic, stock footage, video camera, boring",
tooltip="Negative prompt text",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"resolution",
options=[
"16:9 (1920 x 1080)",
@ -404,7 +396,7 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
default="16:9 (1920 x 1080)",
tooltip="Resolution of the output video",
),
comfy_io.Float.Input(
IO.Float.Input(
"prompt_adherence",
default=4.5,
min=1.0,
@ -412,17 +404,17 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
step=1.0,
tooltip="Guidance scale for generation control",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=9,
min=0,
max=4294967295,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Random seed value",
control_after_generate=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"steps",
default=33,
min=1,
@ -431,11 +423,11 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
tooltip="Number of denoising steps",
),
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -450,7 +442,7 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
prompt_adherence: float,
seed: int,
steps: int,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_image_dimensions(image, min_width=300, min_height=300, max_height=MAX_HEIGHT, max_width=MAX_WIDTH)
validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
width_height = parse_width_height_from_res(resolution)
@ -500,25 +492,25 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
task_id, auth_kwargs=auth, node_id=cls.hidden.unique_id
)
video = await download_url_to_video_output(final_response.output_url)
return comfy_io.NodeOutput(video)
return IO.NodeOutput(video)
class MoonvalleyVideo2VideoNode(comfy_io.ComfyNode):
class MoonvalleyVideo2VideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="MoonvalleyVideo2VideoNode",
display_name="Moonvalley Marey Video to Video",
category="api node/video/Moonvalley Marey",
description="",
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
tooltip="Describes the video to generate",
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
multiline=True,
default="<synthetic> <scene cut> gopro, bright, contrast, static, overexposed, vignette, "
@ -529,28 +521,28 @@ class MoonvalleyVideo2VideoNode(comfy_io.ComfyNode):
"wobbly, weird, low quality, plastic, stock footage, video camera, boring",
tooltip="Negative prompt text",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=9,
min=0,
max=4294967295,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Random seed value",
control_after_generate=False,
),
comfy_io.Video.Input(
IO.Video.Input(
"video",
tooltip="The reference video used to generate the output video. Must be at least 5 seconds long. "
"Videos longer than 5s will be automatically trimmed. Only MP4 format supported.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"control_type",
options=["Motion Transfer", "Pose Transfer"],
default="Motion Transfer",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"motion_intensity",
default=100,
min=0,
@ -559,21 +551,21 @@ class MoonvalleyVideo2VideoNode(comfy_io.ComfyNode):
tooltip="Only used if control_type is 'Motion Transfer'",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"steps",
default=33,
min=1,
max=100,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Number of inference steps",
),
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -589,7 +581,7 @@ class MoonvalleyVideo2VideoNode(comfy_io.ComfyNode):
motion_intensity: Optional[int] = 100,
steps=33,
prompt_adherence=4.5,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
auth = {
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
@ -641,24 +633,24 @@ class MoonvalleyVideo2VideoNode(comfy_io.ComfyNode):
)
video = await download_url_to_video_output(final_response.output_url)
return comfy_io.NodeOutput(video)
return IO.NodeOutput(video)
class MoonvalleyTxt2VideoNode(comfy_io.ComfyNode):
class MoonvalleyTxt2VideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="MoonvalleyTxt2VideoNode",
display_name="Moonvalley Marey Text to Video",
category="api node/video/Moonvalley Marey",
description="",
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
multiline=True,
default="<synthetic> <scene cut> gopro, bright, contrast, static, overexposed, vignette, "
@ -669,7 +661,7 @@ class MoonvalleyTxt2VideoNode(comfy_io.ComfyNode):
"wobbly, weird, low quality, plastic, stock footage, video camera, boring",
tooltip="Negative prompt text",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"resolution",
options=[
"16:9 (1920 x 1080)",
@ -682,7 +674,7 @@ class MoonvalleyTxt2VideoNode(comfy_io.ComfyNode):
default="16:9 (1920 x 1080)",
tooltip="Resolution of the output video",
),
comfy_io.Float.Input(
IO.Float.Input(
"prompt_adherence",
default=4.0,
min=1.0,
@ -690,17 +682,17 @@ class MoonvalleyTxt2VideoNode(comfy_io.ComfyNode):
step=1.0,
tooltip="Guidance scale for generation control",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=9,
min=0,
max=4294967295,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Random seed value",
),
comfy_io.Int.Input(
IO.Int.Input(
"steps",
default=33,
min=1,
@ -709,11 +701,11 @@ class MoonvalleyTxt2VideoNode(comfy_io.ComfyNode):
tooltip="Inference steps",
),
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -727,7 +719,7 @@ class MoonvalleyTxt2VideoNode(comfy_io.ComfyNode):
prompt_adherence: float,
seed: int,
steps: int,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
width_height = parse_width_height_from_res(resolution)
@ -768,12 +760,12 @@ class MoonvalleyTxt2VideoNode(comfy_io.ComfyNode):
)
video = await download_url_to_video_output(final_response.output_url)
return comfy_io.NodeOutput(video)
return IO.NodeOutput(video)
class MoonvalleyExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
MoonvalleyImg2VideoNode,
MoonvalleyTxt2VideoNode,

View File

@ -12,11 +12,12 @@ from typing import Optional, TypeVar
import torch
from typing_extensions import override
from comfy_api.latest import ComfyExtension, comfy_io
from comfy_api.latest import ComfyExtension, IO
from comfy_api.input_impl.video_types import VideoCodec, VideoContainer, VideoInput
from comfy_api_nodes.apinode_utils import (
download_url_to_video_output,
tensor_to_bytesio,
validate_string,
)
from comfy_api_nodes.apis import pika_defs
from comfy_api_nodes.apis.client import (
@ -46,7 +47,7 @@ async def execute_task(
initial_operation: SynchronousOperation[R, pika_defs.PikaGenerateResponse],
auth_kwargs: Optional[dict[str, str]] = None,
node_id: Optional[str] = None,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
task_id = (await initial_operation.execute()).video_id
final_response: pika_defs.PikaVideoResponse = await PollingOperation(
poll_endpoint=ApiEndpoint(
@ -71,39 +72,39 @@ async def execute_task(
raise Exception(error_msg)
video_url = final_response.url
logging.info("Pika task %s succeeded. Video URL: %s", task_id, video_url)
return comfy_io.NodeOutput(await download_url_to_video_output(video_url))
return IO.NodeOutput(await download_url_to_video_output(video_url))
def get_base_inputs_types() -> list[comfy_io.Input]:
def get_base_inputs_types() -> list[IO.Input]:
"""Get the base required inputs types common to all Pika nodes."""
return [
comfy_io.String.Input("prompt_text", multiline=True),
comfy_io.String.Input("negative_prompt", multiline=True),
comfy_io.Int.Input("seed", min=0, max=0xFFFFFFFF, control_after_generate=True),
comfy_io.Combo.Input("resolution", options=["1080p", "720p"], default="1080p"),
comfy_io.Combo.Input("duration", options=[5, 10], default=5),
IO.String.Input("prompt_text", multiline=True),
IO.String.Input("negative_prompt", multiline=True),
IO.Int.Input("seed", min=0, max=0xFFFFFFFF, control_after_generate=True),
IO.Combo.Input("resolution", options=["1080p", "720p"], default="1080p"),
IO.Combo.Input("duration", options=[5, 10], default=5),
]
class PikaImageToVideo(comfy_io.ComfyNode):
class PikaImageToVideo(IO.ComfyNode):
"""Pika 2.2 Image to Video Node."""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="PikaImageToVideoNode2_2",
display_name="Pika Image to Video",
description="Sends an image and prompt to the Pika API v2.2 to generate a video.",
category="api node/video/Pika",
inputs=[
comfy_io.Image.Input("image", tooltip="The image to convert to video"),
IO.Image.Input("image", tooltip="The image to convert to video"),
*get_base_inputs_types(),
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -117,7 +118,7 @@ class PikaImageToVideo(comfy_io.ComfyNode):
seed: int,
resolution: str,
duration: int,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
image_bytes_io = tensor_to_bytesio(image)
pika_files = {"image": ("image.png", image_bytes_io, "image/png")}
pika_request_data = pika_defs.PikaBodyGenerate22I2vGenerate22I2vPost(
@ -146,19 +147,19 @@ class PikaImageToVideo(comfy_io.ComfyNode):
return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
class PikaTextToVideoNode(comfy_io.ComfyNode):
class PikaTextToVideoNode(IO.ComfyNode):
"""Pika Text2Video v2.2 Node."""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="PikaTextToVideoNode2_2",
display_name="Pika Text to Video",
description="Sends a text prompt to the Pika API v2.2 to generate a video.",
category="api node/video/Pika",
inputs=[
*get_base_inputs_types(),
comfy_io.Float.Input(
IO.Float.Input(
"aspect_ratio",
step=0.001,
min=0.4,
@ -167,11 +168,11 @@ class PikaTextToVideoNode(comfy_io.ComfyNode):
tooltip="Aspect ratio (width / height)",
)
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -185,7 +186,7 @@ class PikaTextToVideoNode(comfy_io.ComfyNode):
resolution: str,
duration: int,
aspect_ratio: float,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
auth = {
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
@ -211,24 +212,24 @@ class PikaTextToVideoNode(comfy_io.ComfyNode):
return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
class PikaScenes(comfy_io.ComfyNode):
class PikaScenes(IO.ComfyNode):
"""PikaScenes v2.2 Node."""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="PikaScenesV2_2",
display_name="Pika Scenes (Video Image Composition)",
description="Combine your images to create a video with the objects in them. Upload multiple images as ingredients and generate a high-quality video that incorporates all of them.",
category="api node/video/Pika",
inputs=[
*get_base_inputs_types(),
comfy_io.Combo.Input(
IO.Combo.Input(
"ingredients_mode",
options=["creative", "precise"],
default="creative",
),
comfy_io.Float.Input(
IO.Float.Input(
"aspect_ratio",
step=0.001,
min=0.4,
@ -236,37 +237,37 @@ class PikaScenes(comfy_io.ComfyNode):
default=1.7777777777777777,
tooltip="Aspect ratio (width / height)",
),
comfy_io.Image.Input(
IO.Image.Input(
"image_ingredient_1",
optional=True,
tooltip="Image that will be used as ingredient to create a video.",
),
comfy_io.Image.Input(
IO.Image.Input(
"image_ingredient_2",
optional=True,
tooltip="Image that will be used as ingredient to create a video.",
),
comfy_io.Image.Input(
IO.Image.Input(
"image_ingredient_3",
optional=True,
tooltip="Image that will be used as ingredient to create a video.",
),
comfy_io.Image.Input(
IO.Image.Input(
"image_ingredient_4",
optional=True,
tooltip="Image that will be used as ingredient to create a video.",
),
comfy_io.Image.Input(
IO.Image.Input(
"image_ingredient_5",
optional=True,
tooltip="Image that will be used as ingredient to create a video.",
),
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -286,7 +287,7 @@ class PikaScenes(comfy_io.ComfyNode):
image_ingredient_3: Optional[torch.Tensor] = None,
image_ingredient_4: Optional[torch.Tensor] = None,
image_ingredient_5: Optional[torch.Tensor] = None,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
all_image_bytes_io = []
for image in [
image_ingredient_1,
@ -332,33 +333,33 @@ class PikaScenes(comfy_io.ComfyNode):
return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
class PikAdditionsNode(comfy_io.ComfyNode):
class PikAdditionsNode(IO.ComfyNode):
"""Pika Pikadditions Node. Add an image into a video."""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="Pikadditions",
display_name="Pikadditions (Video Object Insertion)",
description="Add any object or image into your video. Upload a video and specify what you'd like to add to create a seamlessly integrated result.",
category="api node/video/Pika",
inputs=[
comfy_io.Video.Input("video", tooltip="The video to add an image to."),
comfy_io.Image.Input("image", tooltip="The image to add to the video."),
comfy_io.String.Input("prompt_text", multiline=True),
comfy_io.String.Input("negative_prompt", multiline=True),
comfy_io.Int.Input(
IO.Video.Input("video", tooltip="The video to add an image to."),
IO.Image.Input("image", tooltip="The image to add to the video."),
IO.String.Input("prompt_text", multiline=True),
IO.String.Input("negative_prompt", multiline=True),
IO.Int.Input(
"seed",
min=0,
max=0xFFFFFFFF,
control_after_generate=True,
),
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -371,7 +372,7 @@ class PikAdditionsNode(comfy_io.ComfyNode):
prompt_text: str,
negative_prompt: str,
seed: int,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
video_bytes_io = BytesIO()
video.save_to(video_bytes_io, format=VideoContainer.MP4, codec=VideoCodec.H264)
video_bytes_io.seek(0)
@ -406,43 +407,43 @@ class PikAdditionsNode(comfy_io.ComfyNode):
return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
class PikaSwapsNode(comfy_io.ComfyNode):
class PikaSwapsNode(IO.ComfyNode):
"""Pika Pikaswaps Node."""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="Pikaswaps",
display_name="Pika Swaps (Video Object Replacement)",
description="Swap out any object or region of your video with a new image or object. Define areas to replace either with a mask or coordinates.",
category="api node/video/Pika",
inputs=[
comfy_io.Video.Input("video", tooltip="The video to swap an object in."),
comfy_io.Image.Input(
IO.Video.Input("video", tooltip="The video to swap an object in."),
IO.Image.Input(
"image",
tooltip="The image used to replace the masked object in the video.",
optional=True,
),
comfy_io.Mask.Input(
IO.Mask.Input(
"mask",
tooltip="Use the mask to define areas in the video to replace.",
optional=True,
),
comfy_io.String.Input("prompt_text", multiline=True, optional=True),
comfy_io.String.Input("negative_prompt", multiline=True, optional=True),
comfy_io.Int.Input("seed", min=0, max=0xFFFFFFFF, control_after_generate=True, optional=True),
comfy_io.String.Input(
IO.String.Input("prompt_text", multiline=True, optional=True),
IO.String.Input("negative_prompt", multiline=True, optional=True),
IO.Int.Input("seed", min=0, max=0xFFFFFFFF, control_after_generate=True, optional=True),
IO.String.Input(
"region_to_modify",
multiline=True,
optional=True,
tooltip="Plaintext description of the object / region to modify.",
),
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -457,7 +458,7 @@ class PikaSwapsNode(comfy_io.ComfyNode):
negative_prompt: str = "",
seed: int = 0,
region_to_modify: str = "",
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
video_bytes_io = BytesIO()
video.save_to(video_bytes_io, format=VideoContainer.MP4, codec=VideoCodec.H264)
video_bytes_io.seek(0)
@ -494,30 +495,30 @@ class PikaSwapsNode(comfy_io.ComfyNode):
return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
class PikaffectsNode(comfy_io.ComfyNode):
class PikaffectsNode(IO.ComfyNode):
"""Pika Pikaffects Node."""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="Pikaffects",
display_name="Pikaffects (Video Effects)",
description="Generate a video with a specific Pikaffect. Supported Pikaffects: Cake-ify, Crumble, Crush, Decapitate, Deflate, Dissolve, Explode, Eye-pop, Inflate, Levitate, Melt, Peel, Poke, Squish, Ta-da, Tear",
category="api node/video/Pika",
inputs=[
comfy_io.Image.Input("image", tooltip="The reference image to apply the Pikaffect to."),
comfy_io.Combo.Input(
IO.Image.Input("image", tooltip="The reference image to apply the Pikaffect to."),
IO.Combo.Input(
"pikaffect", options=pika_defs.Pikaffect, default="Cake-ify"
),
comfy_io.String.Input("prompt_text", multiline=True),
comfy_io.String.Input("negative_prompt", multiline=True),
comfy_io.Int.Input("seed", min=0, max=0xFFFFFFFF, control_after_generate=True),
IO.String.Input("prompt_text", multiline=True),
IO.String.Input("negative_prompt", multiline=True),
IO.Int.Input("seed", min=0, max=0xFFFFFFFF, control_after_generate=True),
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -530,7 +531,7 @@ class PikaffectsNode(comfy_io.ComfyNode):
prompt_text: str,
negative_prompt: str,
seed: int,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
auth = {
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
@ -555,26 +556,26 @@ class PikaffectsNode(comfy_io.ComfyNode):
return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
class PikaStartEndFrameNode(comfy_io.ComfyNode):
class PikaStartEndFrameNode(IO.ComfyNode):
"""PikaFrames v2.2 Node."""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="PikaStartEndFrameNode2_2",
display_name="Pika Start and End Frame to Video",
description="Generate a video by combining your first and last frame. Upload two images to define the start and end points, and let the AI create a smooth transition between them.",
category="api node/video/Pika",
inputs=[
comfy_io.Image.Input("image_start", tooltip="The first image to combine."),
comfy_io.Image.Input("image_end", tooltip="The last image to combine."),
IO.Image.Input("image_start", tooltip="The first image to combine."),
IO.Image.Input("image_end", tooltip="The last image to combine."),
*get_base_inputs_types(),
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -589,7 +590,8 @@ class PikaStartEndFrameNode(comfy_io.ComfyNode):
seed: int,
resolution: str,
duration: int,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt_text, field_name="prompt_text", min_length=1)
pika_files = [
("keyFrames", ("image_start.png", tensor_to_bytesio(image_start), "image/png")),
("keyFrames", ("image_end.png", tensor_to_bytesio(image_end), "image/png")),
@ -621,7 +623,7 @@ class PikaStartEndFrameNode(comfy_io.ComfyNode):
class PikaApiNodesExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
PikaImageToVideo,
PikaTextToVideoNode,

View File

@ -29,7 +29,7 @@ from comfy_api_nodes.apinode_utils import (
validate_string,
)
from comfy_api.input_impl import VideoFromFile
from comfy_api.latest import ComfyExtension, io as comfy_io
from comfy_api.latest import ComfyExtension, IO
import torch
import aiohttp
@ -73,69 +73,69 @@ async def upload_image_to_pixverse(image: torch.Tensor, auth_kwargs=None):
return response_upload.Resp.img_id
class PixverseTemplateNode(comfy_io.ComfyNode):
class PixverseTemplateNode(IO.ComfyNode):
"""
Select template for PixVerse Video generation.
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="PixverseTemplateNode",
display_name="PixVerse Template",
category="api node/video/PixVerse",
inputs=[
comfy_io.Combo.Input("template", options=list(pixverse_templates.keys())),
IO.Combo.Input("template", options=list(pixverse_templates.keys())),
],
outputs=[comfy_io.Custom(PixverseIO.TEMPLATE).Output(display_name="pixverse_template")],
outputs=[IO.Custom(PixverseIO.TEMPLATE).Output(display_name="pixverse_template")],
)
@classmethod
def execute(cls, template: str) -> comfy_io.NodeOutput:
def execute(cls, template: str) -> IO.NodeOutput:
template_id = pixverse_templates.get(template, None)
if template_id is None:
raise Exception(f"Template '{template}' is not recognized.")
# just return the integer
return comfy_io.NodeOutput(template_id)
return IO.NodeOutput(template_id)
class PixverseTextToVideoNode(comfy_io.ComfyNode):
class PixverseTextToVideoNode(IO.ComfyNode):
"""
Generates videos based on prompt and output_size.
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="PixverseTextToVideoNode",
display_name="PixVerse Text to Video",
category="api node/video/PixVerse",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the video generation",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"aspect_ratio",
options=PixverseAspectRatio,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"quality",
options=PixverseQuality,
default=PixverseQuality.res_540p,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"duration_seconds",
options=PixverseDuration,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"motion_mode",
options=PixverseMotionMode,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -143,24 +143,24 @@ class PixverseTextToVideoNode(comfy_io.ComfyNode):
control_after_generate=True,
tooltip="Seed for video generation.",
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
default="",
multiline=True,
tooltip="An optional text description of undesired elements on an image.",
optional=True,
),
comfy_io.Custom(PixverseIO.TEMPLATE).Input(
IO.Custom(PixverseIO.TEMPLATE).Input(
"pixverse_template",
tooltip="An optional template to influence style of generation, created by the PixVerse Template node.",
optional=True,
),
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -176,7 +176,7 @@ class PixverseTextToVideoNode(comfy_io.ComfyNode):
seed,
negative_prompt: str = None,
pixverse_template: int = None,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=False)
# 1080p is limited to 5 seconds duration
# only normal motion_mode supported for 1080p or for non-5 second duration
@ -237,43 +237,43 @@ class PixverseTextToVideoNode(comfy_io.ComfyNode):
async with aiohttp.ClientSession() as session:
async with session.get(response_poll.Resp.url) as vid_response:
return comfy_io.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
return IO.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
class PixverseImageToVideoNode(comfy_io.ComfyNode):
class PixverseImageToVideoNode(IO.ComfyNode):
"""
Generates videos based on prompt and output_size.
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="PixverseImageToVideoNode",
display_name="PixVerse Image to Video",
category="api node/video/PixVerse",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input("image"),
comfy_io.String.Input(
IO.Image.Input("image"),
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the video generation",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"quality",
options=PixverseQuality,
default=PixverseQuality.res_540p,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"duration_seconds",
options=PixverseDuration,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"motion_mode",
options=PixverseMotionMode,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -281,24 +281,24 @@ class PixverseImageToVideoNode(comfy_io.ComfyNode):
control_after_generate=True,
tooltip="Seed for video generation.",
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
default="",
multiline=True,
tooltip="An optional text description of undesired elements on an image.",
optional=True,
),
comfy_io.Custom(PixverseIO.TEMPLATE).Input(
IO.Custom(PixverseIO.TEMPLATE).Input(
"pixverse_template",
tooltip="An optional template to influence style of generation, created by the PixVerse Template node.",
optional=True,
),
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -314,7 +314,7 @@ class PixverseImageToVideoNode(comfy_io.ComfyNode):
seed,
negative_prompt: str = None,
pixverse_template: int = None,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=False)
auth = {
"auth_token": cls.hidden.auth_token_comfy_org,
@ -377,44 +377,44 @@ class PixverseImageToVideoNode(comfy_io.ComfyNode):
async with aiohttp.ClientSession() as session:
async with session.get(response_poll.Resp.url) as vid_response:
return comfy_io.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
return IO.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
class PixverseTransitionVideoNode(comfy_io.ComfyNode):
class PixverseTransitionVideoNode(IO.ComfyNode):
"""
Generates videos based on prompt and output_size.
"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="PixverseTransitionVideoNode",
display_name="PixVerse Transition Video",
category="api node/video/PixVerse",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input("first_frame"),
comfy_io.Image.Input("last_frame"),
comfy_io.String.Input(
IO.Image.Input("first_frame"),
IO.Image.Input("last_frame"),
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt for the video generation",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"quality",
options=PixverseQuality,
default=PixverseQuality.res_540p,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"duration_seconds",
options=PixverseDuration,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"motion_mode",
options=PixverseMotionMode,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
@ -422,7 +422,7 @@ class PixverseTransitionVideoNode(comfy_io.ComfyNode):
control_after_generate=True,
tooltip="Seed for video generation.",
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
default="",
multiline=True,
@ -430,11 +430,11 @@ class PixverseTransitionVideoNode(comfy_io.ComfyNode):
optional=True,
),
],
outputs=[comfy_io.Video.Output()],
outputs=[IO.Video.Output()],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -450,7 +450,7 @@ class PixverseTransitionVideoNode(comfy_io.ComfyNode):
motion_mode: str,
seed,
negative_prompt: str = None,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=False)
auth = {
"auth_token": cls.hidden.auth_token_comfy_org,
@ -514,12 +514,12 @@ class PixverseTransitionVideoNode(comfy_io.ComfyNode):
async with aiohttp.ClientSession() as session:
async with session.get(response_poll.Resp.url) as vid_response:
return comfy_io.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
return IO.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
class PixVerseExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
PixverseTextToVideoNode,
PixverseImageToVideoNode,

View File

@ -32,20 +32,20 @@ from comfy_api_nodes.apis.client import (
SynchronousOperation,
PollingOperation,
)
from comfy_api.latest import ComfyExtension, io as comfy_io
from comfy_api.latest import ComfyExtension, IO
COMMON_PARAMETERS = [
comfy_io.Int.Input(
IO.Int.Input(
"Seed",
default=0,
min=0,
max=65535,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
optional=True,
),
comfy_io.Combo.Input("Material_Type", options=["PBR", "Shaded"], default="PBR", optional=True),
comfy_io.Combo.Input(
IO.Combo.Input("Material_Type", options=["PBR", "Shaded"], default="PBR", optional=True),
IO.Combo.Input(
"Polygon_count",
options=["4K-Quad", "8K-Quad", "18K-Quad", "50K-Quad", "200K-Triangle"],
default="18K-Quad",
@ -259,24 +259,24 @@ async def download_files(url_list, task_uuid):
return model_file_path
class Rodin3D_Regular(comfy_io.ComfyNode):
class Rodin3D_Regular(IO.ComfyNode):
"""Generate 3D Assets using Rodin API"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="Rodin3D_Regular",
display_name="Rodin 3D Generate - Regular Generate",
category="api node/3d/Rodin",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input("Images"),
IO.Image.Input("Images"),
*COMMON_PARAMETERS,
],
outputs=[comfy_io.String.Output(display_name="3D Model Path")],
outputs=[IO.String.Output(display_name="3D Model Path")],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
],
is_api_node=True,
)
@ -288,7 +288,7 @@ class Rodin3D_Regular(comfy_io.ComfyNode):
Seed,
Material_Type,
Polygon_count,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
tier = "Regular"
num_images = Images.shape[0]
m_images = []
@ -312,27 +312,27 @@ class Rodin3D_Regular(comfy_io.ComfyNode):
download_list = await get_rodin_download_list(task_uuid, auth_kwargs=auth)
model = await download_files(download_list, task_uuid)
return comfy_io.NodeOutput(model)
return IO.NodeOutput(model)
class Rodin3D_Detail(comfy_io.ComfyNode):
class Rodin3D_Detail(IO.ComfyNode):
"""Generate 3D Assets using Rodin API"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="Rodin3D_Detail",
display_name="Rodin 3D Generate - Detail Generate",
category="api node/3d/Rodin",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input("Images"),
IO.Image.Input("Images"),
*COMMON_PARAMETERS,
],
outputs=[comfy_io.String.Output(display_name="3D Model Path")],
outputs=[IO.String.Output(display_name="3D Model Path")],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
],
is_api_node=True,
)
@ -344,7 +344,7 @@ class Rodin3D_Detail(comfy_io.ComfyNode):
Seed,
Material_Type,
Polygon_count,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
tier = "Detail"
num_images = Images.shape[0]
m_images = []
@ -368,27 +368,27 @@ class Rodin3D_Detail(comfy_io.ComfyNode):
download_list = await get_rodin_download_list(task_uuid, auth_kwargs=auth)
model = await download_files(download_list, task_uuid)
return comfy_io.NodeOutput(model)
return IO.NodeOutput(model)
class Rodin3D_Smooth(comfy_io.ComfyNode):
class Rodin3D_Smooth(IO.ComfyNode):
"""Generate 3D Assets using Rodin API"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="Rodin3D_Smooth",
display_name="Rodin 3D Generate - Smooth Generate",
category="api node/3d/Rodin",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input("Images"),
IO.Image.Input("Images"),
*COMMON_PARAMETERS,
],
outputs=[comfy_io.String.Output(display_name="3D Model Path")],
outputs=[IO.String.Output(display_name="3D Model Path")],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
],
is_api_node=True,
)
@ -400,7 +400,7 @@ class Rodin3D_Smooth(comfy_io.ComfyNode):
Seed,
Material_Type,
Polygon_count,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
tier = "Smooth"
num_images = Images.shape[0]
m_images = []
@ -424,34 +424,34 @@ class Rodin3D_Smooth(comfy_io.ComfyNode):
download_list = await get_rodin_download_list(task_uuid, auth_kwargs=auth)
model = await download_files(download_list, task_uuid)
return comfy_io.NodeOutput(model)
return IO.NodeOutput(model)
class Rodin3D_Sketch(comfy_io.ComfyNode):
class Rodin3D_Sketch(IO.ComfyNode):
"""Generate 3D Assets using Rodin API"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="Rodin3D_Sketch",
display_name="Rodin 3D Generate - Sketch Generate",
category="api node/3d/Rodin",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input("Images"),
comfy_io.Int.Input(
IO.Image.Input("Images"),
IO.Int.Input(
"Seed",
default=0,
min=0,
max=65535,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
optional=True,
),
],
outputs=[comfy_io.String.Output(display_name="3D Model Path")],
outputs=[IO.String.Output(display_name="3D Model Path")],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
],
is_api_node=True,
)
@ -461,7 +461,7 @@ class Rodin3D_Sketch(comfy_io.ComfyNode):
cls,
Images,
Seed,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
tier = "Sketch"
num_images = Images.shape[0]
m_images = []
@ -487,42 +487,42 @@ class Rodin3D_Sketch(comfy_io.ComfyNode):
download_list = await get_rodin_download_list(task_uuid, auth_kwargs=auth)
model = await download_files(download_list, task_uuid)
return comfy_io.NodeOutput(model)
return IO.NodeOutput(model)
class Rodin3D_Gen2(comfy_io.ComfyNode):
class Rodin3D_Gen2(IO.ComfyNode):
"""Generate 3D Assets using Rodin API"""
@classmethod
def define_schema(cls) -> comfy_io.Schema:
return comfy_io.Schema(
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="Rodin3D_Gen2",
display_name="Rodin 3D Generate - Gen-2 Generate",
category="api node/3d/Rodin",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input("Images"),
comfy_io.Int.Input(
IO.Image.Input("Images"),
IO.Int.Input(
"Seed",
default=0,
min=0,
max=65535,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
optional=True,
),
comfy_io.Combo.Input("Material_Type", options=["PBR", "Shaded"], default="PBR", optional=True),
comfy_io.Combo.Input(
IO.Combo.Input("Material_Type", options=["PBR", "Shaded"], default="PBR", optional=True),
IO.Combo.Input(
"Polygon_count",
options=["4K-Quad", "8K-Quad", "18K-Quad", "50K-Quad", "2K-Triangle", "20K-Triangle", "150K-Triangle", "500K-Triangle"],
default="500K-Triangle",
optional=True,
),
comfy_io.Boolean.Input("TAPose", default=False),
IO.Boolean.Input("TAPose", default=False),
],
outputs=[comfy_io.String.Output(display_name="3D Model Path")],
outputs=[IO.String.Output(display_name="3D Model Path")],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
],
is_api_node=True,
)
@ -535,7 +535,7 @@ class Rodin3D_Gen2(comfy_io.ComfyNode):
Material_Type,
Polygon_count,
TAPose,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
tier = "Gen-2"
num_images = Images.shape[0]
m_images = []
@ -560,12 +560,12 @@ class Rodin3D_Gen2(comfy_io.ComfyNode):
download_list = await get_rodin_download_list(task_uuid, auth_kwargs=auth)
model = await download_files(download_list, task_uuid)
return comfy_io.NodeOutput(model)
return IO.NodeOutput(model)
class Rodin3DExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
Rodin3D_Regular,
Rodin3D_Detail,

View File

@ -48,7 +48,7 @@ from comfy_api_nodes.apinode_utils import (
download_url_to_image_tensor,
)
from comfy_api.input_impl import VideoFromFile
from comfy_api.latest import ComfyExtension, io as comfy_io
from comfy_api.latest import ComfyExtension, IO
from comfy_api_nodes.util.validation_utils import validate_image_dimensions, validate_image_aspect_ratio
PATH_IMAGE_TO_VIDEO = "/proxy/runway/image_to_video"
@ -175,11 +175,11 @@ async def generate_video(
return await download_url_to_video_output(video_url)
class RunwayImageToVideoNodeGen3a(comfy_io.ComfyNode):
class RunwayImageToVideoNodeGen3a(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="RunwayImageToVideoNodeGen3a",
display_name="Runway Image to Video (Gen3a Turbo)",
category="api node/video/Runway",
@ -188,42 +188,42 @@ class RunwayImageToVideoNodeGen3a(comfy_io.ComfyNode):
"your input selections will set your generation up for success: "
"https://help.runwayml.com/hc/en-us/articles/33927968552339-Creating-with-Act-One-on-Gen-3-Alpha-and-Turbo.",
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Text prompt for the generation",
),
comfy_io.Image.Input(
IO.Image.Input(
"start_frame",
tooltip="Start frame to be used for the video",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"duration",
options=Duration,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"ratio",
options=RunwayGen3aAspectRatio,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=4294967295,
step=1,
control_after_generate=True,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Random seed for generation",
),
],
outputs=[
comfy_io.Video.Output(),
IO.Video.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -236,7 +236,7 @@ class RunwayImageToVideoNodeGen3a(comfy_io.ComfyNode):
duration: str,
ratio: str,
seed: int,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, min_length=1)
validate_image_dimensions(start_frame, max_width=7999, max_height=7999)
validate_image_aspect_ratio(start_frame, min_aspect_ratio=0.5, max_aspect_ratio=2.0)
@ -253,7 +253,7 @@ class RunwayImageToVideoNodeGen3a(comfy_io.ComfyNode):
auth_kwargs=auth_kwargs,
)
return comfy_io.NodeOutput(
return IO.NodeOutput(
await generate_video(
RunwayImageToVideoRequest(
promptText=prompt,
@ -275,11 +275,11 @@ class RunwayImageToVideoNodeGen3a(comfy_io.ComfyNode):
)
class RunwayImageToVideoNodeGen4(comfy_io.ComfyNode):
class RunwayImageToVideoNodeGen4(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="RunwayImageToVideoNodeGen4",
display_name="Runway Image to Video (Gen4 Turbo)",
category="api node/video/Runway",
@ -288,42 +288,42 @@ class RunwayImageToVideoNodeGen4(comfy_io.ComfyNode):
"your input selections will set your generation up for success: "
"https://help.runwayml.com/hc/en-us/articles/37327109429011-Creating-with-Gen-4-Video.",
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Text prompt for the generation",
),
comfy_io.Image.Input(
IO.Image.Input(
"start_frame",
tooltip="Start frame to be used for the video",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"duration",
options=Duration,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"ratio",
options=RunwayGen4TurboAspectRatio,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=4294967295,
step=1,
control_after_generate=True,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Random seed for generation",
),
],
outputs=[
comfy_io.Video.Output(),
IO.Video.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -336,7 +336,7 @@ class RunwayImageToVideoNodeGen4(comfy_io.ComfyNode):
duration: str,
ratio: str,
seed: int,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, min_length=1)
validate_image_dimensions(start_frame, max_width=7999, max_height=7999)
validate_image_aspect_ratio(start_frame, min_aspect_ratio=0.5, max_aspect_ratio=2.0)
@ -353,7 +353,7 @@ class RunwayImageToVideoNodeGen4(comfy_io.ComfyNode):
auth_kwargs=auth_kwargs,
)
return comfy_io.NodeOutput(
return IO.NodeOutput(
await generate_video(
RunwayImageToVideoRequest(
promptText=prompt,
@ -376,11 +376,11 @@ class RunwayImageToVideoNodeGen4(comfy_io.ComfyNode):
)
class RunwayFirstLastFrameNode(comfy_io.ComfyNode):
class RunwayFirstLastFrameNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="RunwayFirstLastFrameNode",
display_name="Runway First-Last-Frame to Video",
category="api node/video/Runway",
@ -392,46 +392,46 @@ class RunwayFirstLastFrameNode(comfy_io.ComfyNode):
"will set your generation up for success: "
"https://help.runwayml.com/hc/en-us/articles/34170748696595-Creating-with-Keyframes-on-Gen-3.",
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Text prompt for the generation",
),
comfy_io.Image.Input(
IO.Image.Input(
"start_frame",
tooltip="Start frame to be used for the video",
),
comfy_io.Image.Input(
IO.Image.Input(
"end_frame",
tooltip="End frame to be used for the video. Supported for gen3a_turbo only.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"duration",
options=Duration,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"ratio",
options=RunwayGen3aAspectRatio,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=4294967295,
step=1,
control_after_generate=True,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Random seed for generation",
),
],
outputs=[
comfy_io.Video.Output(),
IO.Video.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -445,7 +445,7 @@ class RunwayFirstLastFrameNode(comfy_io.ComfyNode):
duration: str,
ratio: str,
seed: int,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, min_length=1)
validate_image_dimensions(start_frame, max_width=7999, max_height=7999)
validate_image_dimensions(end_frame, max_width=7999, max_height=7999)
@ -467,7 +467,7 @@ class RunwayFirstLastFrameNode(comfy_io.ComfyNode):
if len(download_urls) != 2:
raise RunwayApiError("Failed to upload one or more images to comfy api.")
return comfy_io.NodeOutput(
return IO.NodeOutput(
await generate_video(
RunwayImageToVideoRequest(
promptText=prompt,
@ -493,40 +493,40 @@ class RunwayFirstLastFrameNode(comfy_io.ComfyNode):
)
class RunwayTextToImageNode(comfy_io.ComfyNode):
class RunwayTextToImageNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="RunwayTextToImageNode",
display_name="Runway Text to Image",
category="api node/image/Runway",
description="Generate an image from a text prompt using Runway's Gen 4 model. "
"You can also include reference image to guide the generation.",
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Text prompt for the generation",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"ratio",
options=[model.value for model in RunwayTextToImageAspectRatioEnum],
),
comfy_io.Image.Input(
IO.Image.Input(
"reference_image",
tooltip="Optional reference image to guide the generation",
optional=True,
),
],
outputs=[
comfy_io.Image.Output(),
IO.Image.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -537,7 +537,7 @@ class RunwayTextToImageNode(comfy_io.ComfyNode):
prompt: str,
ratio: str,
reference_image: Optional[torch.Tensor] = None,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, min_length=1)
auth_kwargs = {
@ -588,12 +588,12 @@ class RunwayTextToImageNode(comfy_io.ComfyNode):
if not final_response.output:
raise RunwayApiError("Runway task succeeded but no image data found in response.")
return comfy_io.NodeOutput(await download_url_to_image_tensor(get_image_url_from_task_status(final_response)))
return IO.NodeOutput(await download_url_to_image_tensor(get_image_url_from_task_status(final_response)))
class RunwayExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
RunwayFirstLastFrameNode,
RunwayImageToVideoNodeGen3a,

View File

@ -3,7 +3,7 @@ from typing_extensions import override
import torch
from pydantic import BaseModel, Field
from comfy_api.latest import ComfyExtension, io as comfy_io
from comfy_api.latest import ComfyExtension, IO
from comfy_api_nodes.apis.client import (
ApiEndpoint,
HttpMethod,
@ -31,27 +31,27 @@ class Sora2GenerationResponse(BaseModel):
status: Optional[str] = Field(None)
class OpenAIVideoSora2(comfy_io.ComfyNode):
class OpenAIVideoSora2(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="OpenAIVideoSora2",
display_name="OpenAI Sora - Video",
category="api node/video/Sora",
description="OpenAI video and audio generation.",
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=["sora-2", "sora-2-pro"],
default="sora-2",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Guiding text; may be empty if an input image is present.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"size",
options=[
"720x1280",
@ -61,22 +61,22 @@ class OpenAIVideoSora2(comfy_io.ComfyNode):
],
default="1280x720",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"duration",
options=[4, 8, 12],
default=8,
),
comfy_io.Image.Input(
IO.Image.Input(
"image",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
optional=True,
tooltip="Seed to determine if node should re-run; "
@ -84,12 +84,12 @@ class OpenAIVideoSora2(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Video.Output(),
IO.Video.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -155,7 +155,7 @@ class OpenAIVideoSora2(comfy_io.ComfyNode):
estimated_duration=45 * (duration / 4) * model_time_multiplier,
)
await poll_operation.execute()
return comfy_io.NodeOutput(
return IO.NodeOutput(
await download_url_to_video_output(
f"/proxy/openai/v1/videos/{initial_response.id}/content",
auth_kwargs=auth,
@ -165,7 +165,7 @@ class OpenAIVideoSora2(comfy_io.ComfyNode):
class OpenAISoraExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
OpenAIVideoSora2,
]

View File

@ -2,7 +2,7 @@ from inspect import cleandoc
from typing import Optional
from typing_extensions import override
from comfy_api.latest import ComfyExtension, Input, io as comfy_io
from comfy_api.latest import ComfyExtension, Input, IO
from comfy_api_nodes.apis.stability_api import (
StabilityUpscaleConservativeRequest,
StabilityUpscaleCreativeRequest,
@ -56,20 +56,20 @@ def get_async_dummy_status(x: StabilityResultsGetResponse):
return StabilityPollStatus.in_progress
class StabilityStableImageUltraNode(comfy_io.ComfyNode):
class StabilityStableImageUltraNode(IO.ComfyNode):
"""
Generates images synchronously based on prompt and resolution.
"""
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="StabilityStableImageUltraNode",
display_name="Stability AI Stable Image Ultra",
category="api node/image/Stability AI",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
@ -80,39 +80,39 @@ class StabilityStableImageUltraNode(comfy_io.ComfyNode):
"is a value between 0 and 1. For example: `The sky was a crisp (blue:0.3) and (green:0.8)`" +
"would convey a sky that was blue and green, but more green than blue.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"aspect_ratio",
options=StabilityAspectRatio,
default=StabilityAspectRatio.ratio_1_1,
tooltip="Aspect ratio of generated image.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"style_preset",
options=get_stability_style_presets(),
tooltip="Optional desired style of generated image.",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=4294967294,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="The random seed used for creating the noise.",
),
comfy_io.Image.Input(
IO.Image.Input(
"image",
optional=True,
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
default="",
tooltip="A blurb of text describing what you do not wish to see in the output image. This is an advanced feature.",
force_input=True,
optional=True,
),
comfy_io.Float.Input(
IO.Float.Input(
"image_denoise",
default=0.5,
min=0.0,
@ -123,12 +123,12 @@ class StabilityStableImageUltraNode(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Image.Output(),
IO.Image.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -143,7 +143,7 @@ class StabilityStableImageUltraNode(comfy_io.ComfyNode):
image: Optional[torch.Tensor] = None,
negative_prompt: str = "",
image_denoise: Optional[float] = 0.5,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=False)
# prepare image binary if image present
image_binary = None
@ -193,44 +193,44 @@ class StabilityStableImageUltraNode(comfy_io.ComfyNode):
image_data = base64.b64decode(response_api.image)
returned_image = bytesio_to_image_tensor(BytesIO(image_data))
return comfy_io.NodeOutput(returned_image)
return IO.NodeOutput(returned_image)
class StabilityStableImageSD_3_5Node(comfy_io.ComfyNode):
class StabilityStableImageSD_3_5Node(IO.ComfyNode):
"""
Generates images synchronously based on prompt and resolution.
"""
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="StabilityStableImageSD_3_5Node",
display_name="Stability AI Stable Diffusion 3.5 Image",
category="api node/image/Stability AI",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="What you wish to see in the output image. A strong, descriptive prompt that clearly defines elements, colors, and subjects will lead to better results.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=Stability_SD3_5_Model,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"aspect_ratio",
options=StabilityAspectRatio,
default=StabilityAspectRatio.ratio_1_1,
tooltip="Aspect ratio of generated image.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"style_preset",
options=get_stability_style_presets(),
tooltip="Optional desired style of generated image.",
),
comfy_io.Float.Input(
IO.Float.Input(
"cfg_scale",
default=4.0,
min=1.0,
@ -238,28 +238,28 @@ class StabilityStableImageSD_3_5Node(comfy_io.ComfyNode):
step=0.1,
tooltip="How strictly the diffusion process adheres to the prompt text (higher values keep your image closer to your prompt)",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=4294967294,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="The random seed used for creating the noise.",
),
comfy_io.Image.Input(
IO.Image.Input(
"image",
optional=True,
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
default="",
tooltip="Keywords of what you do not wish to see in the output image. This is an advanced feature.",
force_input=True,
optional=True,
),
comfy_io.Float.Input(
IO.Float.Input(
"image_denoise",
default=0.5,
min=0.0,
@ -270,12 +270,12 @@ class StabilityStableImageSD_3_5Node(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Image.Output(),
IO.Image.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -292,7 +292,7 @@ class StabilityStableImageSD_3_5Node(comfy_io.ComfyNode):
image: Optional[torch.Tensor] = None,
negative_prompt: str = "",
image_denoise: Optional[float] = 0.5,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=False)
# prepare image binary if image present
image_binary = None
@ -348,30 +348,30 @@ class StabilityStableImageSD_3_5Node(comfy_io.ComfyNode):
image_data = base64.b64decode(response_api.image)
returned_image = bytesio_to_image_tensor(BytesIO(image_data))
return comfy_io.NodeOutput(returned_image)
return IO.NodeOutput(returned_image)
class StabilityUpscaleConservativeNode(comfy_io.ComfyNode):
class StabilityUpscaleConservativeNode(IO.ComfyNode):
"""
Upscale image with minimal alterations to 4K resolution.
"""
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="StabilityUpscaleConservativeNode",
display_name="Stability AI Upscale Conservative",
category="api node/image/Stability AI",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input("image"),
comfy_io.String.Input(
IO.Image.Input("image"),
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="What you wish to see in the output image. A strong, descriptive prompt that clearly defines elements, colors, and subjects will lead to better results.",
),
comfy_io.Float.Input(
IO.Float.Input(
"creativity",
default=0.35,
min=0.2,
@ -379,17 +379,17 @@ class StabilityUpscaleConservativeNode(comfy_io.ComfyNode):
step=0.01,
tooltip="Controls the likelihood of creating additional details not heavily conditioned by the init image.",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=4294967294,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="The random seed used for creating the noise.",
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
default="",
tooltip="Keywords of what you do not wish to see in the output image. This is an advanced feature.",
@ -398,12 +398,12 @@ class StabilityUpscaleConservativeNode(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Image.Output(),
IO.Image.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -416,7 +416,7 @@ class StabilityUpscaleConservativeNode(comfy_io.ComfyNode):
creativity: float,
seed: int,
negative_prompt: str = "",
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=False)
image_binary = tensor_to_bytesio(image, total_pixels=1024*1024).read()
@ -457,30 +457,30 @@ class StabilityUpscaleConservativeNode(comfy_io.ComfyNode):
image_data = base64.b64decode(response_api.image)
returned_image = bytesio_to_image_tensor(BytesIO(image_data))
return comfy_io.NodeOutput(returned_image)
return IO.NodeOutput(returned_image)
class StabilityUpscaleCreativeNode(comfy_io.ComfyNode):
class StabilityUpscaleCreativeNode(IO.ComfyNode):
"""
Upscale image with minimal alterations to 4K resolution.
"""
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="StabilityUpscaleCreativeNode",
display_name="Stability AI Upscale Creative",
category="api node/image/Stability AI",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input("image"),
comfy_io.String.Input(
IO.Image.Input("image"),
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="What you wish to see in the output image. A strong, descriptive prompt that clearly defines elements, colors, and subjects will lead to better results.",
),
comfy_io.Float.Input(
IO.Float.Input(
"creativity",
default=0.3,
min=0.1,
@ -488,22 +488,22 @@ class StabilityUpscaleCreativeNode(comfy_io.ComfyNode):
step=0.01,
tooltip="Controls the likelihood of creating additional details not heavily conditioned by the init image.",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"style_preset",
options=get_stability_style_presets(),
tooltip="Optional desired style of generated image.",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=4294967294,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="The random seed used for creating the noise.",
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
default="",
tooltip="Keywords of what you do not wish to see in the output image. This is an advanced feature.",
@ -512,12 +512,12 @@ class StabilityUpscaleCreativeNode(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Image.Output(),
IO.Image.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -531,7 +531,7 @@ class StabilityUpscaleCreativeNode(comfy_io.ComfyNode):
style_preset: str,
seed: int,
negative_prompt: str = "",
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=False)
image_binary = tensor_to_bytesio(image, total_pixels=1024*1024).read()
@ -591,37 +591,37 @@ class StabilityUpscaleCreativeNode(comfy_io.ComfyNode):
image_data = base64.b64decode(response_poll.result)
returned_image = bytesio_to_image_tensor(BytesIO(image_data))
return comfy_io.NodeOutput(returned_image)
return IO.NodeOutput(returned_image)
class StabilityUpscaleFastNode(comfy_io.ComfyNode):
class StabilityUpscaleFastNode(IO.ComfyNode):
"""
Quickly upscales an image via Stability API call to 4x its original size; intended for upscaling low-quality/compressed images.
"""
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="StabilityUpscaleFastNode",
display_name="Stability AI Upscale Fast",
category="api node/image/Stability AI",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Image.Input("image"),
IO.Image.Input("image"),
],
outputs=[
comfy_io.Image.Output(),
IO.Image.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@classmethod
async def execute(cls, image: torch.Tensor) -> comfy_io.NodeOutput:
async def execute(cls, image: torch.Tensor) -> IO.NodeOutput:
image_binary = tensor_to_bytesio(image, total_pixels=4096*4096).read()
files = {
@ -653,26 +653,26 @@ class StabilityUpscaleFastNode(comfy_io.ComfyNode):
image_data = base64.b64decode(response_api.image)
returned_image = bytesio_to_image_tensor(BytesIO(image_data))
return comfy_io.NodeOutput(returned_image)
return IO.NodeOutput(returned_image)
class StabilityTextToAudio(comfy_io.ComfyNode):
class StabilityTextToAudio(IO.ComfyNode):
"""Generates high-quality music and sound effects from text descriptions."""
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="StabilityTextToAudio",
display_name="Stability AI Text To Audio",
category="api node/audio/Stability AI",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=["stable-audio-2.5"],
),
comfy_io.String.Input("prompt", multiline=True, default=""),
comfy_io.Int.Input(
IO.String.Input("prompt", multiline=True, default=""),
IO.Int.Input(
"duration",
default=190,
min=1,
@ -681,18 +681,18 @@ class StabilityTextToAudio(comfy_io.ComfyNode):
tooltip="Controls the duration in seconds of the generated audio.",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=4294967294,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="The random seed used for generation.",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"steps",
default=8,
min=4,
@ -703,18 +703,18 @@ class StabilityTextToAudio(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Audio.Output(),
IO.Audio.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@classmethod
async def execute(cls, model: str, prompt: str, duration: int, seed: int, steps: int) -> comfy_io.NodeOutput:
async def execute(cls, model: str, prompt: str, duration: int, seed: int, steps: int) -> IO.NodeOutput:
validate_string(prompt, max_length=10000)
payload = StabilityTextToAudioRequest(prompt=prompt, model=model, duration=duration, seed=seed, steps=steps)
operation = SynchronousOperation(
@ -734,27 +734,27 @@ class StabilityTextToAudio(comfy_io.ComfyNode):
response_api = await operation.execute()
if not response_api.audio:
raise ValueError("No audio file was received in response.")
return comfy_io.NodeOutput(audio_bytes_to_audio_input(base64.b64decode(response_api.audio)))
return IO.NodeOutput(audio_bytes_to_audio_input(base64.b64decode(response_api.audio)))
class StabilityAudioToAudio(comfy_io.ComfyNode):
class StabilityAudioToAudio(IO.ComfyNode):
"""Transforms existing audio samples into new high-quality compositions using text instructions."""
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="StabilityAudioToAudio",
display_name="Stability AI Audio To Audio",
category="api node/audio/Stability AI",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=["stable-audio-2.5"],
),
comfy_io.String.Input("prompt", multiline=True, default=""),
comfy_io.Audio.Input("audio", tooltip="Audio must be between 6 and 190 seconds long."),
comfy_io.Int.Input(
IO.String.Input("prompt", multiline=True, default=""),
IO.Audio.Input("audio", tooltip="Audio must be between 6 and 190 seconds long."),
IO.Int.Input(
"duration",
default=190,
min=1,
@ -763,18 +763,18 @@ class StabilityAudioToAudio(comfy_io.ComfyNode):
tooltip="Controls the duration in seconds of the generated audio.",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=4294967294,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="The random seed used for generation.",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"steps",
default=8,
min=4,
@ -783,24 +783,24 @@ class StabilityAudioToAudio(comfy_io.ComfyNode):
tooltip="Controls the number of sampling steps.",
optional=True,
),
comfy_io.Float.Input(
IO.Float.Input(
"strength",
default=1,
min=0.01,
max=1.0,
step=0.01,
display_mode=comfy_io.NumberDisplay.slider,
display_mode=IO.NumberDisplay.slider,
tooltip="Parameter controls how much influence the audio parameter has on the generated audio.",
optional=True,
),
],
outputs=[
comfy_io.Audio.Output(),
IO.Audio.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -808,7 +808,7 @@ class StabilityAudioToAudio(comfy_io.ComfyNode):
@classmethod
async def execute(
cls, model: str, prompt: str, audio: Input.Audio, duration: int, seed: int, steps: int, strength: float
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, max_length=10000)
validate_audio_duration(audio, 6, 190)
payload = StabilityAudioToAudioRequest(
@ -832,27 +832,27 @@ class StabilityAudioToAudio(comfy_io.ComfyNode):
response_api = await operation.execute()
if not response_api.audio:
raise ValueError("No audio file was received in response.")
return comfy_io.NodeOutput(audio_bytes_to_audio_input(base64.b64decode(response_api.audio)))
return IO.NodeOutput(audio_bytes_to_audio_input(base64.b64decode(response_api.audio)))
class StabilityAudioInpaint(comfy_io.ComfyNode):
class StabilityAudioInpaint(IO.ComfyNode):
"""Transforms part of existing audio sample using text instructions."""
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="StabilityAudioInpaint",
display_name="Stability AI Audio Inpaint",
category="api node/audio/Stability AI",
description=cleandoc(cls.__doc__ or ""),
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=["stable-audio-2.5"],
),
comfy_io.String.Input("prompt", multiline=True, default=""),
comfy_io.Audio.Input("audio", tooltip="Audio must be between 6 and 190 seconds long."),
comfy_io.Int.Input(
IO.String.Input("prompt", multiline=True, default=""),
IO.Audio.Input("audio", tooltip="Audio must be between 6 and 190 seconds long."),
IO.Int.Input(
"duration",
default=190,
min=1,
@ -861,18 +861,18 @@ class StabilityAudioInpaint(comfy_io.ComfyNode):
tooltip="Controls the duration in seconds of the generated audio.",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=4294967294,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="The random seed used for generation.",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"steps",
default=8,
min=4,
@ -881,7 +881,7 @@ class StabilityAudioInpaint(comfy_io.ComfyNode):
tooltip="Controls the number of sampling steps.",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"mask_start",
default=30,
min=0,
@ -889,7 +889,7 @@ class StabilityAudioInpaint(comfy_io.ComfyNode):
step=1,
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"mask_end",
default=190,
min=0,
@ -899,12 +899,12 @@ class StabilityAudioInpaint(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Audio.Output(),
IO.Audio.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -920,7 +920,7 @@ class StabilityAudioInpaint(comfy_io.ComfyNode):
steps: int,
mask_start: int,
mask_end: int,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_string(prompt, max_length=10000)
if mask_end <= mask_start:
raise ValueError(f"Value of mask_end({mask_end}) should be greater then mask_start({mask_start})")
@ -953,12 +953,12 @@ class StabilityAudioInpaint(comfy_io.ComfyNode):
response_api = await operation.execute()
if not response_api.audio:
raise ValueError("No audio file was received in response.")
return comfy_io.NodeOutput(audio_bytes_to_audio_input(base64.b64decode(response_api.audio)))
return IO.NodeOutput(audio_bytes_to_audio_input(base64.b64decode(response_api.audio)))
class StabilityExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
StabilityStableImageUltraNode,
StabilityStableImageSD_3_5Node,

View File

@ -6,7 +6,7 @@ from io import BytesIO
from typing import Optional
from typing_extensions import override
from comfy_api.latest import ComfyExtension, io as comfy_io
from comfy_api.latest import ComfyExtension, IO
from comfy_api.input_impl.video_types import VideoFromFile
from comfy_api_nodes.apis import (
VeoGenVidRequest,
@ -27,6 +27,13 @@ from comfy_api_nodes.apinode_utils import (
)
AVERAGE_DURATION_VIDEO_GEN = 32
MODELS_MAP = {
"veo-2.0-generate-001": "veo-2.0-generate-001",
"veo-3.1-generate": "veo-3.1-generate-preview",
"veo-3.1-fast-generate": "veo-3.1-fast-generate-preview",
"veo-3.0-generate-001": "veo-3.0-generate-001",
"veo-3.0-fast-generate-001": "veo-3.0-fast-generate-001",
}
def convert_image_to_base64(image: torch.Tensor):
if image is None:
@ -51,7 +58,7 @@ def get_video_url_from_response(poll_response: VeoGenVidPollResponse) -> Optiona
return None
class VeoVideoGenerationNode(comfy_io.ComfyNode):
class VeoVideoGenerationNode(IO.ComfyNode):
"""
Generates videos from text prompts using Google's Veo API.
@ -61,71 +68,71 @@ class VeoVideoGenerationNode(comfy_io.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="VeoVideoGenerationNode",
display_name="Google Veo 2 Video Generation",
category="api node/video/Veo",
description="Generates videos from text prompts using Google's Veo 2 API",
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Text description of the video",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"aspect_ratio",
options=["16:9", "9:16"],
default="16:9",
tooltip="Aspect ratio of the output video",
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
multiline=True,
default="",
tooltip="Negative text prompt to guide what to avoid in the video",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"duration_seconds",
default=5,
min=5,
max=8,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Duration of the output video in seconds",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"enhance_prompt",
default=True,
tooltip="Whether to enhance the prompt with AI assistance",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"person_generation",
options=["ALLOW", "BLOCK"],
default="ALLOW",
tooltip="Whether to allow generating people in the video",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=0xFFFFFFFF,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed for video generation (0 for random)",
optional=True,
),
comfy_io.Image.Input(
IO.Image.Input(
"image",
tooltip="Optional reference image to guide video generation",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=["veo-2.0-generate-001"],
default="veo-2.0-generate-001",
@ -134,12 +141,12 @@ class VeoVideoGenerationNode(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Video.Output(),
IO.Video.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -158,6 +165,7 @@ class VeoVideoGenerationNode(comfy_io.ComfyNode):
model="veo-2.0-generate-001",
generate_audio=False,
):
model = MODELS_MAP[model]
# Prepare the instances for the request
instances = []
@ -302,7 +310,7 @@ class VeoVideoGenerationNode(comfy_io.ComfyNode):
video_io = BytesIO(video_data)
# Return VideoFromFile object
return comfy_io.NodeOutput(VideoFromFile(video_io))
return IO.NodeOutput(VideoFromFile(video_io))
class Veo3VideoGenerationNode(VeoVideoGenerationNode):
@ -319,78 +327,80 @@ class Veo3VideoGenerationNode(VeoVideoGenerationNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="Veo3VideoGenerationNode",
display_name="Google Veo 3 Video Generation",
category="api node/video/Veo",
description="Generates videos from text prompts using Google's Veo 3 API",
inputs=[
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Text description of the video",
),
comfy_io.Combo.Input(
IO.Combo.Input(
"aspect_ratio",
options=["16:9", "9:16"],
default="16:9",
tooltip="Aspect ratio of the output video",
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
multiline=True,
default="",
tooltip="Negative text prompt to guide what to avoid in the video",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"duration_seconds",
default=8,
min=8,
max=8,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Duration of the output video in seconds (Veo 3 only supports 8 seconds)",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"enhance_prompt",
default=True,
tooltip="Whether to enhance the prompt with AI assistance",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"person_generation",
options=["ALLOW", "BLOCK"],
default="ALLOW",
tooltip="Whether to allow generating people in the video",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=0xFFFFFFFF,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed for video generation (0 for random)",
optional=True,
),
comfy_io.Image.Input(
IO.Image.Input(
"image",
tooltip="Optional reference image to guide video generation",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=["veo-3.0-generate-001", "veo-3.0-fast-generate-001"],
options=[
"veo-3.1-generate", "veo-3.1-fast-generate", "veo-3.0-generate-001", "veo-3.0-fast-generate-001"
],
default="veo-3.0-generate-001",
tooltip="Veo 3 model to use for video generation",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"generate_audio",
default=False,
tooltip="Generate audio for the video. Supported by all Veo 3 models.",
@ -398,12 +408,12 @@ class Veo3VideoGenerationNode(VeoVideoGenerationNode):
),
],
outputs=[
comfy_io.Video.Output(),
IO.Video.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -411,7 +421,7 @@ class Veo3VideoGenerationNode(VeoVideoGenerationNode):
class VeoExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
VeoVideoGenerationNode,
Veo3VideoGenerationNode,

View File

@ -6,7 +6,7 @@ from typing_extensions import override
import torch
from pydantic import BaseModel, Field
from comfy_api.latest import ComfyExtension, io as comfy_io
from comfy_api.latest import ComfyExtension, IO
from comfy_api_nodes.util.validation_utils import (
validate_aspect_ratio_closeness,
validate_image_dimensions,
@ -161,63 +161,63 @@ async def execute_task(
)
class ViduTextToVideoNode(comfy_io.ComfyNode):
class ViduTextToVideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="ViduTextToVideoNode",
display_name="Vidu Text To Video Generation",
category="api node/video/Vidu",
description="Generate video from text prompt",
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=VideoModelName,
default=VideoModelName.vidu_q1,
tooltip="Model name",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
tooltip="A textual description for video generation",
),
comfy_io.Int.Input(
IO.Int.Input(
"duration",
default=5,
min=5,
max=5,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Duration of the output video in seconds",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed for video generation (0 for random)",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"aspect_ratio",
options=AspectRatio,
default=AspectRatio.r_16_9,
tooltip="The aspect ratio of the output video",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"resolution",
options=Resolution,
default=Resolution.r_1080p,
tooltip="Supported values may vary by model & duration",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"movement_amplitude",
options=MovementAmplitude,
default=MovementAmplitude.auto,
@ -226,12 +226,12 @@ class ViduTextToVideoNode(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Video.Output(),
IO.Video.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -246,7 +246,7 @@ class ViduTextToVideoNode(comfy_io.ComfyNode):
aspect_ratio: str,
resolution: str,
movement_amplitude: str,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
if not prompt:
raise ValueError("The prompt field is required and cannot be empty.")
payload = TaskCreationRequest(
@ -263,65 +263,65 @@ class ViduTextToVideoNode(comfy_io.ComfyNode):
"comfy_api_key": cls.hidden.api_key_comfy_org,
}
results = await execute_task(VIDU_TEXT_TO_VIDEO, auth, payload, 320, cls.hidden.unique_id)
return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
class ViduImageToVideoNode(comfy_io.ComfyNode):
class ViduImageToVideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="ViduImageToVideoNode",
display_name="Vidu Image To Video Generation",
category="api node/video/Vidu",
description="Generate video from image and optional prompt",
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=VideoModelName,
default=VideoModelName.vidu_q1,
tooltip="Model name",
),
comfy_io.Image.Input(
IO.Image.Input(
"image",
tooltip="An image to be used as the start frame of the generated video",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="A textual description for video generation",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"duration",
default=5,
min=5,
max=5,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Duration of the output video in seconds",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed for video generation (0 for random)",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"resolution",
options=Resolution,
default=Resolution.r_1080p,
tooltip="Supported values may vary by model & duration",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"movement_amplitude",
options=MovementAmplitude,
default=MovementAmplitude.auto.value,
@ -330,12 +330,12 @@ class ViduImageToVideoNode(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Video.Output(),
IO.Video.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -350,7 +350,7 @@ class ViduImageToVideoNode(comfy_io.ComfyNode):
seed: int,
resolution: str,
movement_amplitude: str,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
if get_number_of_images(image) > 1:
raise ValueError("Only one input image is allowed.")
validate_image_aspect_ratio_range(image, (1, 4), (4, 1))
@ -373,70 +373,70 @@ class ViduImageToVideoNode(comfy_io.ComfyNode):
auth_kwargs=auth,
)
results = await execute_task(VIDU_IMAGE_TO_VIDEO, auth, payload, 120, cls.hidden.unique_id)
return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
class ViduReferenceVideoNode(comfy_io.ComfyNode):
class ViduReferenceVideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="ViduReferenceVideoNode",
display_name="Vidu Reference To Video Generation",
category="api node/video/Vidu",
description="Generate video from multiple images and prompt",
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=VideoModelName,
default=VideoModelName.vidu_q1,
tooltip="Model name",
),
comfy_io.Image.Input(
IO.Image.Input(
"images",
tooltip="Images to use as references to generate a video with consistent subjects (max 7 images).",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
tooltip="A textual description for video generation",
),
comfy_io.Int.Input(
IO.Int.Input(
"duration",
default=5,
min=5,
max=5,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Duration of the output video in seconds",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed for video generation (0 for random)",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"aspect_ratio",
options=AspectRatio,
default=AspectRatio.r_16_9,
tooltip="The aspect ratio of the output video",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"resolution",
options=[model.value for model in Resolution],
default=Resolution.r_1080p.value,
tooltip="Supported values may vary by model & duration",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"movement_amplitude",
options=[model.value for model in MovementAmplitude],
default=MovementAmplitude.auto.value,
@ -445,12 +445,12 @@ class ViduReferenceVideoNode(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Video.Output(),
IO.Video.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -466,7 +466,7 @@ class ViduReferenceVideoNode(comfy_io.ComfyNode):
aspect_ratio: str,
resolution: str,
movement_amplitude: str,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
if not prompt:
raise ValueError("The prompt field is required and cannot be empty.")
a = get_number_of_images(images)
@ -495,68 +495,68 @@ class ViduReferenceVideoNode(comfy_io.ComfyNode):
auth_kwargs=auth,
)
results = await execute_task(VIDU_REFERENCE_VIDEO, auth, payload, 120, cls.hidden.unique_id)
return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
class ViduStartEndToVideoNode(comfy_io.ComfyNode):
class ViduStartEndToVideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="ViduStartEndToVideoNode",
display_name="Vidu Start End To Video Generation",
category="api node/video/Vidu",
description="Generate a video from start and end frames and a prompt",
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=[model.value for model in VideoModelName],
default=VideoModelName.vidu_q1.value,
tooltip="Model name",
),
comfy_io.Image.Input(
IO.Image.Input(
"first_frame",
tooltip="Start frame",
),
comfy_io.Image.Input(
IO.Image.Input(
"end_frame",
tooltip="End frame",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
tooltip="A textual description for video generation",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"duration",
default=5,
min=5,
max=5,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Duration of the output video in seconds",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed for video generation (0 for random)",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"resolution",
options=[model.value for model in Resolution],
default=Resolution.r_1080p.value,
tooltip="Supported values may vary by model & duration",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"movement_amplitude",
options=[model.value for model in MovementAmplitude],
default=MovementAmplitude.auto.value,
@ -565,12 +565,12 @@ class ViduStartEndToVideoNode(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Video.Output(),
IO.Video.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -586,7 +586,7 @@ class ViduStartEndToVideoNode(comfy_io.ComfyNode):
seed: int,
resolution: str,
movement_amplitude: str,
) -> comfy_io.NodeOutput:
) -> IO.NodeOutput:
validate_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False)
payload = TaskCreationRequest(
model_name=model,
@ -605,12 +605,12 @@ class ViduStartEndToVideoNode(comfy_io.ComfyNode):
for frame in (first_frame, end_frame)
]
results = await execute_task(VIDU_START_END_VIDEO, auth, payload, 96, cls.hidden.unique_id)
return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
class ViduExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
ViduTextToVideoNode,
ViduImageToVideoNode,

View File

@ -4,7 +4,7 @@ from typing_extensions import override
import torch
from pydantic import BaseModel, Field
from comfy_api.latest import ComfyExtension, Input, io as comfy_io
from comfy_api.latest import ComfyExtension, Input, IO
from comfy_api_nodes.apis.client import (
ApiEndpoint,
HttpMethod,
@ -195,35 +195,35 @@ async def process_task(
).execute()
class WanTextToImageApi(comfy_io.ComfyNode):
class WanTextToImageApi(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="WanTextToImageApi",
display_name="Wan Text to Image",
category="api node/image/Wan",
description="Generates image based on text prompt.",
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=["wan2.5-t2i-preview"],
default="wan2.5-t2i-preview",
tooltip="Model to use.",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
multiline=True,
default="",
tooltip="Negative text prompt to guide what to avoid.",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"width",
default=1024,
min=768,
@ -231,7 +231,7 @@ class WanTextToImageApi(comfy_io.ComfyNode):
step=32,
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"height",
default=1024,
min=768,
@ -239,24 +239,24 @@ class WanTextToImageApi(comfy_io.ComfyNode):
step=32,
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed to use for generation.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"prompt_extend",
default=True,
tooltip="Whether to enhance the prompt with AI assistance.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"watermark",
default=True,
tooltip="Whether to add an \"AI generated\" watermark to the result.",
@ -264,12 +264,12 @@ class WanTextToImageApi(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Image.Output(),
IO.Image.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -309,36 +309,36 @@ class WanTextToImageApi(comfy_io.ComfyNode):
estimated_duration=9,
poll_interval=3,
)
return comfy_io.NodeOutput(await download_url_to_image_tensor(str(response.output.results[0].url)))
return IO.NodeOutput(await download_url_to_image_tensor(str(response.output.results[0].url)))
class WanImageToImageApi(comfy_io.ComfyNode):
class WanImageToImageApi(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="WanImageToImageApi",
display_name="Wan Image to Image",
category="api node/image/Wan",
description="Generates an image from one or two input images and a text prompt. "
"The output image is currently fixed at 1.6 MP; its aspect ratio matches the input image(s).",
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=["wan2.5-i2i-preview"],
default="wan2.5-i2i-preview",
tooltip="Model to use.",
),
comfy_io.Image.Input(
IO.Image.Input(
"image",
tooltip="Single-image editing or multi-image fusion, maximum 2 images.",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
multiline=True,
default="",
@ -346,7 +346,7 @@ class WanImageToImageApi(comfy_io.ComfyNode):
optional=True,
),
# redo this later as an optional combo of recommended resolutions
# comfy_io.Int.Input(
# IO.Int.Input(
# "width",
# default=1280,
# min=384,
@ -354,7 +354,7 @@ class WanImageToImageApi(comfy_io.ComfyNode):
# step=16,
# optional=True,
# ),
# comfy_io.Int.Input(
# IO.Int.Input(
# "height",
# default=1280,
# min=384,
@ -362,18 +362,18 @@ class WanImageToImageApi(comfy_io.ComfyNode):
# step=16,
# optional=True,
# ),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed to use for generation.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"watermark",
default=True,
tooltip="Whether to add an \"AI generated\" watermark to the result.",
@ -381,12 +381,12 @@ class WanImageToImageApi(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Image.Output(),
IO.Image.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -431,38 +431,38 @@ class WanImageToImageApi(comfy_io.ComfyNode):
estimated_duration=42,
poll_interval=3,
)
return comfy_io.NodeOutput(await download_url_to_image_tensor(str(response.output.results[0].url)))
return IO.NodeOutput(await download_url_to_image_tensor(str(response.output.results[0].url)))
class WanTextToVideoApi(comfy_io.ComfyNode):
class WanTextToVideoApi(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="WanTextToVideoApi",
display_name="Wan Text to Video",
category="api node/video/Wan",
description="Generates video based on text prompt.",
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=["wan2.5-t2v-preview"],
default="wan2.5-t2v-preview",
tooltip="Model to use.",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
multiline=True,
default="",
tooltip="Negative text prompt to guide what to avoid.",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"size",
options=[
"480p: 1:1 (624x624)",
@ -482,45 +482,45 @@ class WanTextToVideoApi(comfy_io.ComfyNode):
default="480p: 1:1 (624x624)",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"duration",
default=5,
min=5,
max=10,
step=5,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Available durations: 5 and 10 seconds",
optional=True,
),
comfy_io.Audio.Input(
IO.Audio.Input(
"audio",
optional=True,
tooltip="Audio must contain a clear, loud voice, without extraneous noise, background music.",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed to use for generation.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"generate_audio",
default=False,
optional=True,
tooltip="If there is no audio input, generate audio automatically.",
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"prompt_extend",
default=True,
tooltip="Whether to enhance the prompt with AI assistance.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"watermark",
default=True,
tooltip="Whether to add an \"AI generated\" watermark to the result.",
@ -528,12 +528,12 @@ class WanTextToVideoApi(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Video.Output(),
IO.Video.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -582,41 +582,41 @@ class WanTextToVideoApi(comfy_io.ComfyNode):
estimated_duration=120 * int(duration / 5),
poll_interval=6,
)
return comfy_io.NodeOutput(await download_url_to_video_output(response.output.video_url))
return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
class WanImageToVideoApi(comfy_io.ComfyNode):
class WanImageToVideoApi(IO.ComfyNode):
@classmethod
def define_schema(cls):
return comfy_io.Schema(
return IO.Schema(
node_id="WanImageToVideoApi",
display_name="Wan Image to Video",
category="api node/video/Wan",
description="Generates video based on the first frame and text prompt.",
inputs=[
comfy_io.Combo.Input(
IO.Combo.Input(
"model",
options=["wan2.5-i2v-preview"],
default="wan2.5-i2v-preview",
tooltip="Model to use.",
),
comfy_io.Image.Input(
IO.Image.Input(
"image",
),
comfy_io.String.Input(
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
),
comfy_io.String.Input(
IO.String.Input(
"negative_prompt",
multiline=True,
default="",
tooltip="Negative text prompt to guide what to avoid.",
optional=True,
),
comfy_io.Combo.Input(
IO.Combo.Input(
"resolution",
options=[
"480P",
@ -626,45 +626,45 @@ class WanImageToVideoApi(comfy_io.ComfyNode):
default="480P",
optional=True,
),
comfy_io.Int.Input(
IO.Int.Input(
"duration",
default=5,
min=5,
max=10,
step=5,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
tooltip="Available durations: 5 and 10 seconds",
optional=True,
),
comfy_io.Audio.Input(
IO.Audio.Input(
"audio",
optional=True,
tooltip="Audio must contain a clear, loud voice, without extraneous noise, background music.",
),
comfy_io.Int.Input(
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=comfy_io.NumberDisplay.number,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed to use for generation.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"generate_audio",
default=False,
optional=True,
tooltip="If there is no audio input, generate audio automatically.",
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"prompt_extend",
default=True,
tooltip="Whether to enhance the prompt with AI assistance.",
optional=True,
),
comfy_io.Boolean.Input(
IO.Boolean.Input(
"watermark",
default=True,
tooltip="Whether to add an \"AI generated\" watermark to the result.",
@ -672,12 +672,12 @@ class WanImageToVideoApi(comfy_io.ComfyNode):
),
],
outputs=[
comfy_io.Video.Output(),
IO.Video.Output(),
],
hidden=[
comfy_io.Hidden.auth_token_comfy_org,
comfy_io.Hidden.api_key_comfy_org,
comfy_io.Hidden.unique_id,
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@ -731,12 +731,12 @@ class WanImageToVideoApi(comfy_io.ComfyNode):
estimated_duration=120 * int(duration / 5),
poll_interval=6,
)
return comfy_io.NodeOutput(await download_url_to_video_output(response.output.video_url))
return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
class WanApiExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
WanTextToImageApi,
WanImageToImageApi,

View File

@ -265,6 +265,26 @@ class HierarchicalCache(BasicCache):
assert cache is not None
return await cache._ensure_subcache(node_id, children_ids)
class NullCache:
async def set_prompt(self, dynprompt, node_ids, is_changed_cache):
pass
def all_node_ids(self):
return []
def clean_unused(self):
pass
def get(self, node_id):
return None
def set(self, node_id, value):
pass
async def ensure_subcache_for(self, node_id, children_ids):
return self
class LRUCache(BasicCache):
def __init__(self, key_class, max_size=100):
super().__init__(key_class)
@ -316,157 +336,3 @@ class LRUCache(BasicCache):
self._mark_used(child_id)
self.children[cache_key].append(self.cache_key_set.get_data_key(child_id))
return self
class DependencyAwareCache(BasicCache):
"""
A cache implementation that tracks dependencies between nodes and manages
their execution and caching accordingly. It extends the BasicCache class.
Nodes are removed from this cache once all of their descendants have been
executed.
"""
def __init__(self, key_class):
"""
Initialize the DependencyAwareCache.
Args:
key_class: The class used for generating cache keys.
"""
super().__init__(key_class)
self.descendants = {} # Maps node_id -> set of descendant node_ids
self.ancestors = {} # Maps node_id -> set of ancestor node_ids
self.executed_nodes = set() # Tracks nodes that have been executed
async def set_prompt(self, dynprompt, node_ids, is_changed_cache):
"""
Clear the entire cache and rebuild the dependency graph.
Args:
dynprompt: The dynamic prompt object containing node information.
node_ids: List of node IDs to initialize the cache for.
is_changed_cache: Flag indicating if the cache has changed.
"""
# Clear all existing cache data
self.cache.clear()
self.subcaches.clear()
self.descendants.clear()
self.ancestors.clear()
self.executed_nodes.clear()
# Call the parent method to initialize the cache with the new prompt
await super().set_prompt(dynprompt, node_ids, is_changed_cache)
# Rebuild the dependency graph
self._build_dependency_graph(dynprompt, node_ids)
def _build_dependency_graph(self, dynprompt, node_ids):
"""
Build the dependency graph for all nodes.
Args:
dynprompt: The dynamic prompt object containing node information.
node_ids: List of node IDs to build the graph for.
"""
self.descendants.clear()
self.ancestors.clear()
for node_id in node_ids:
self.descendants[node_id] = set()
self.ancestors[node_id] = set()
for node_id in node_ids:
inputs = dynprompt.get_node(node_id)["inputs"]
for input_data in inputs.values():
if is_link(input_data): # Check if the input is a link to another node
ancestor_id = input_data[0]
self.descendants[ancestor_id].add(node_id)
self.ancestors[node_id].add(ancestor_id)
def set(self, node_id, value):
"""
Mark a node as executed and store its value in the cache.
Args:
node_id: The ID of the node to store.
value: The value to store for the node.
"""
self._set_immediate(node_id, value)
self.executed_nodes.add(node_id)
self._cleanup_ancestors(node_id)
def get(self, node_id):
"""
Retrieve the cached value for a node.
Args:
node_id: The ID of the node to retrieve.
Returns:
The cached value for the node.
"""
return self._get_immediate(node_id)
async def ensure_subcache_for(self, node_id, children_ids):
"""
Ensure a subcache exists for a node and update dependencies.
Args:
node_id: The ID of the parent node.
children_ids: List of child node IDs to associate with the parent node.
Returns:
The subcache object for the node.
"""
subcache = await super()._ensure_subcache(node_id, children_ids)
for child_id in children_ids:
self.descendants[node_id].add(child_id)
self.ancestors[child_id].add(node_id)
return subcache
def _cleanup_ancestors(self, node_id):
"""
Check if ancestors of a node can be removed from the cache.
Args:
node_id: The ID of the node whose ancestors are to be checked.
"""
for ancestor_id in self.ancestors.get(node_id, []):
if ancestor_id in self.executed_nodes:
# Remove ancestor if all its descendants have been executed
if all(descendant in self.executed_nodes for descendant in self.descendants[ancestor_id]):
self._remove_node(ancestor_id)
def _remove_node(self, node_id):
"""
Remove a node from the cache.
Args:
node_id: The ID of the node to remove.
"""
cache_key = self.cache_key_set.get_data_key(node_id)
if cache_key in self.cache:
del self.cache[cache_key]
subcache_key = self.cache_key_set.get_subcache_key(node_id)
if subcache_key in self.subcaches:
del self.subcaches[subcache_key]
def clean_unused(self):
"""
Clean up unused nodes. This is a no-op for this cache implementation.
"""
pass
def recursive_debug_dump(self):
"""
Dump the cache and dependency graph for debugging.
Returns:
A list containing the cache state and dependency graph.
"""
result = super().recursive_debug_dump()
result.append({
"descendants": self.descendants,
"ancestors": self.ancestors,
"executed_nodes": list(self.executed_nodes),
})
return result

View File

@ -153,8 +153,9 @@ class TopologicalSort:
continue
_, _, input_info = self.get_input_info(unique_id, input_name)
is_lazy = input_info is not None and "lazy" in input_info and input_info["lazy"]
if (include_lazy or not is_lazy) and not self.is_cached(from_node_id):
node_ids.append(from_node_id)
if (include_lazy or not is_lazy):
if not self.is_cached(from_node_id):
node_ids.append(from_node_id)
links.append((from_node_id, from_socket, unique_id))
for link in links:
@ -194,10 +195,34 @@ class ExecutionList(TopologicalSort):
super().__init__(dynprompt)
self.output_cache = output_cache
self.staged_node_id = None
self.execution_cache = {}
self.execution_cache_listeners = {}
def is_cached(self, node_id):
return self.output_cache.get(node_id) is not None
def cache_link(self, from_node_id, to_node_id):
if not to_node_id in self.execution_cache:
self.execution_cache[to_node_id] = {}
self.execution_cache[to_node_id][from_node_id] = self.output_cache.get(from_node_id)
if not from_node_id in self.execution_cache_listeners:
self.execution_cache_listeners[from_node_id] = set()
self.execution_cache_listeners[from_node_id].add(to_node_id)
def get_output_cache(self, from_node_id, to_node_id):
if not to_node_id in self.execution_cache:
return None
return self.execution_cache[to_node_id].get(from_node_id)
def cache_update(self, node_id, value):
if node_id in self.execution_cache_listeners:
for to_node_id in self.execution_cache_listeners[node_id]:
self.execution_cache[to_node_id][node_id] = value
def add_strong_link(self, from_node_id, from_socket, to_node_id):
super().add_strong_link(from_node_id, from_socket, to_node_id)
self.cache_link(from_node_id, to_node_id)
async def stage_node_execution(self):
assert self.staged_node_id is None
if self.is_empty():
@ -277,6 +302,8 @@ class ExecutionList(TopologicalSort):
def complete_node_execution(self):
node_id = self.staged_node_id
self.pop_node(node_id)
self.execution_cache.pop(node_id, None)
self.execution_cache_listeners.pop(node_id, None)
self.staged_node_id = None
def get_nodes_in_cycle(self):

View File

@ -142,9 +142,10 @@ def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=Non
for key, value in metadata.items():
output_container.metadata[key] = value
layout = 'mono' if waveform.shape[0] == 1 else 'stereo'
# Set up the output stream with appropriate properties
if format == "opus":
out_stream = output_container.add_stream("libopus", rate=sample_rate)
out_stream = output_container.add_stream("libopus", rate=sample_rate, layout=layout)
if quality == "64k":
out_stream.bit_rate = 64000
elif quality == "96k":
@ -156,7 +157,7 @@ def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=Non
elif quality == "320k":
out_stream.bit_rate = 320000
elif format == "mp3":
out_stream = output_container.add_stream("libmp3lame", rate=sample_rate)
out_stream = output_container.add_stream("libmp3lame", rate=sample_rate, layout=layout)
if quality == "V0":
#TODO i would really love to support V3 and V5 but there doesn't seem to be a way to set the qscale level, the property below is a bool
out_stream.codec_context.qscale = 1
@ -165,9 +166,9 @@ def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=Non
elif quality == "320k":
out_stream.bit_rate = 320000
else: #format == "flac":
out_stream = output_container.add_stream("flac", rate=sample_rate)
out_stream = output_container.add_stream("flac", rate=sample_rate, layout=layout)
frame = av.AudioFrame.from_ndarray(waveform.movedim(0, 1).reshape(1, -1).float().numpy(), format='flt', layout='mono' if waveform.shape[0] == 1 else 'stereo')
frame = av.AudioFrame.from_ndarray(waveform.movedim(0, 1).reshape(1, -1).float().numpy(), format='flt', layout=layout)
frame.sample_rate = sample_rate
frame.pts = 0
output_container.mux(out_stream.encode(frame))

View File

@ -1,20 +1,26 @@
from comfy.cldm.control_types import UNION_CONTROLNET_TYPES
import nodes
import comfy.utils
from typing_extensions import override
from comfy_api.latest import ComfyExtension, io
class SetUnionControlNetType:
class SetUnionControlNetType(io.ComfyNode):
@classmethod
def INPUT_TYPES(s):
return {"required": {"control_net": ("CONTROL_NET", ),
"type": (["auto"] + list(UNION_CONTROLNET_TYPES.keys()),)
}}
def define_schema(cls):
return io.Schema(
node_id="SetUnionControlNetType",
category="conditioning/controlnet",
inputs=[
io.ControlNet.Input("control_net"),
io.Combo.Input("type", options=["auto"] + list(UNION_CONTROLNET_TYPES.keys())),
],
outputs=[
io.ControlNet.Output(),
],
)
CATEGORY = "conditioning/controlnet"
RETURN_TYPES = ("CONTROL_NET",)
FUNCTION = "set_controlnet_type"
def set_controlnet_type(self, control_net, type):
@classmethod
def execute(cls, control_net, type) -> io.NodeOutput:
control_net = control_net.copy()
type_number = UNION_CONTROLNET_TYPES.get(type, -1)
if type_number >= 0:
@ -22,27 +28,36 @@ class SetUnionControlNetType:
else:
control_net.set_extra_arg("control_type", [])
return (control_net,)
return io.NodeOutput(control_net)
class ControlNetInpaintingAliMamaApply(nodes.ControlNetApplyAdvanced):
set_controlnet_type = execute # TODO: remove
class ControlNetInpaintingAliMamaApply(io.ComfyNode):
@classmethod
def INPUT_TYPES(s):
return {"required": {"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"control_net": ("CONTROL_NET", ),
"vae": ("VAE", ),
"image": ("IMAGE", ),
"mask": ("MASK", ),
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
"start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
"end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001})
}}
def define_schema(cls):
return io.Schema(
node_id="ControlNetInpaintingAliMamaApply",
category="conditioning/controlnet",
inputs=[
io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"),
io.ControlNet.Input("control_net"),
io.Vae.Input("vae"),
io.Image.Input("image"),
io.Mask.Input("mask"),
io.Float.Input("strength", default=1.0, min=0.0, max=10.0, step=0.01),
io.Float.Input("start_percent", default=0.0, min=0.0, max=1.0, step=0.001),
io.Float.Input("end_percent", default=1.0, min=0.0, max=1.0, step=0.001),
],
outputs=[
io.Conditioning.Output(display_name="positive"),
io.Conditioning.Output(display_name="negative"),
],
)
FUNCTION = "apply_inpaint_controlnet"
CATEGORY = "conditioning/controlnet"
def apply_inpaint_controlnet(self, positive, negative, control_net, vae, image, mask, strength, start_percent, end_percent):
@classmethod
def execute(cls, positive, negative, control_net, vae, image, mask, strength, start_percent, end_percent) -> io.NodeOutput:
extra_concat = []
if control_net.concat_mask:
mask = 1.0 - mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1]))
@ -50,11 +65,20 @@ class ControlNetInpaintingAliMamaApply(nodes.ControlNetApplyAdvanced):
image = image * mask_apply.movedim(1, -1).repeat(1, 1, 1, image.shape[3])
extra_concat = [mask]
return self.apply_controlnet(positive, negative, control_net, image, strength, start_percent, end_percent, vae=vae, extra_concat=extra_concat)
result = nodes.ControlNetApplyAdvanced().apply_controlnet(positive, negative, control_net, image, strength, start_percent, end_percent, vae=vae, extra_concat=extra_concat)
return io.NodeOutput(result[0], result[1])
apply_inpaint_controlnet = execute # TODO: remove
class ControlNetExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[io.ComfyNode]]:
return [
SetUnionControlNetType,
ControlNetInpaintingAliMamaApply,
]
NODE_CLASS_MAPPINGS = {
"SetUnionControlNetType": SetUnionControlNetType,
"ControlNetInpaintingAliMamaApply": ControlNetInpaintingAliMamaApply,
}
async def comfy_entrypoint() -> ControlNetExtension:
return ControlNetExtension()

View File

@ -244,6 +244,8 @@ class EasyCacheHolder:
self.total_steps_skipped += 1
batch_offset = x.shape[0] // len(uuids)
for i, uuid in enumerate(uuids):
# slice out only what is relevant to this cond
batch_slice = [slice(i*batch_offset,(i+1)*batch_offset)]
# if cached dims don't match x dims, cut off excess and hope for the best (cosmos world2video)
if x.shape[1:] != self.uuid_cache_diffs[uuid].shape[1:]:
if not self.allow_mismatch:
@ -261,9 +263,8 @@ class EasyCacheHolder:
slicing.append(slice(None, dim_u))
else:
slicing.append(slice(None))
slicing = [slice(i*batch_offset,(i+1)*batch_offset)] + slicing
x = x[slicing]
x += self.uuid_cache_diffs[uuid].to(x.device)
batch_slice = batch_slice + slicing
x[batch_slice] += self.uuid_cache_diffs[uuid].to(x.device)
return x
def update_cache_diff(self, output: torch.Tensor, x: torch.Tensor, uuids: list[UUID]):

View File

@ -1,5 +1,7 @@
import torch
from typing_extensions import override
from comfy.k_diffusion.sampling import sigma_to_half_log_snr
from comfy_api.latest import ComfyExtension, io
@ -63,12 +65,105 @@ class EpsilonScaling(io.ComfyNode):
return io.NodeOutput(model_clone)
def compute_tsr_rescaling_factor(
snr: torch.Tensor, tsr_k: float, tsr_variance: float
) -> torch.Tensor:
"""Compute the rescaling score ratio in Temporal Score Rescaling.
See equation (6) in https://arxiv.org/pdf/2510.01184v1.
"""
posinf_mask = torch.isposinf(snr)
rescaling_factor = (snr * tsr_variance + 1) / (snr * tsr_variance / tsr_k + 1)
return torch.where(posinf_mask, tsr_k, rescaling_factor) # when snr → inf, r = tsr_k
class TemporalScoreRescaling(io.ComfyNode):
@classmethod
def define_schema(cls):
return io.Schema(
node_id="TemporalScoreRescaling",
display_name="TSR - Temporal Score Rescaling",
category="model_patches/unet",
inputs=[
io.Model.Input("model"),
io.Float.Input(
"tsr_k",
tooltip=(
"Controls the rescaling strength.\n"
"Lower k produces more detailed results; higher k produces smoother results in image generation. Setting k = 1 disables rescaling."
),
default=0.95,
min=0.01,
max=100.0,
step=0.001,
display_mode=io.NumberDisplay.number,
),
io.Float.Input(
"tsr_sigma",
tooltip=(
"Controls how early rescaling takes effect.\n"
"Larger values take effect earlier."
),
default=1.0,
min=0.01,
max=100.0,
step=0.001,
display_mode=io.NumberDisplay.number,
),
],
outputs=[
io.Model.Output(
display_name="patched_model",
),
],
description=(
"[Post-CFG Function]\n"
"TSR - Temporal Score Rescaling (2510.01184)\n\n"
"Rescaling the model's score or noise to steer the sampling diversity.\n"
),
)
@classmethod
def execute(cls, model, tsr_k, tsr_sigma) -> io.NodeOutput:
tsr_variance = tsr_sigma**2
def temporal_score_rescaling(args):
denoised = args["denoised"]
x = args["input"]
sigma = args["sigma"]
curr_model = args["model"]
# No rescaling (r = 1) or no noise
if tsr_k == 1 or sigma == 0:
return denoised
model_sampling = curr_model.current_patcher.get_model_object("model_sampling")
half_log_snr = sigma_to_half_log_snr(sigma, model_sampling)
snr = (2 * half_log_snr).exp()
# No rescaling needed (r = 1)
if snr == 0:
return denoised
rescaling_r = compute_tsr_rescaling_factor(snr, tsr_k, tsr_variance)
# Derived from scaled_denoised = (x - r * sigma * noise) / alpha
alpha = sigma * half_log_snr.exp()
return torch.lerp(x / alpha, denoised, rescaling_r)
m = model.clone()
m.set_model_sampler_post_cfg_function(temporal_score_rescaling)
return io.NodeOutput(m)
class EpsilonScalingExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[io.ComfyNode]]:
return [
EpsilonScaling,
TemporalScoreRescaling,
]
async def comfy_entrypoint() -> EpsilonScalingExtension:
return EpsilonScalingExtension()

View File

@ -2,42 +2,60 @@ import nodes
import node_helpers
import torch
import comfy.model_management
from typing_extensions import override
from comfy_api.latest import ComfyExtension, io
class CLIPTextEncodeHunyuanDiT:
class CLIPTextEncodeHunyuanDiT(io.ComfyNode):
@classmethod
def INPUT_TYPES(s):
return {"required": {
"clip": ("CLIP", ),
"bert": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"mt5xl": ("STRING", {"multiline": True, "dynamicPrompts": True}),
}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "encode"
def define_schema(cls):
return io.Schema(
node_id="CLIPTextEncodeHunyuanDiT",
category="advanced/conditioning",
inputs=[
io.Clip.Input("clip"),
io.String.Input("bert", multiline=True, dynamic_prompts=True),
io.String.Input("mt5xl", multiline=True, dynamic_prompts=True),
],
outputs=[
io.Conditioning.Output(),
],
)
CATEGORY = "advanced/conditioning"
def encode(self, clip, bert, mt5xl):
@classmethod
def execute(cls, clip, bert, mt5xl) -> io.NodeOutput:
tokens = clip.tokenize(bert)
tokens["mt5xl"] = clip.tokenize(mt5xl)["mt5xl"]
return (clip.encode_from_tokens_scheduled(tokens), )
return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))
class EmptyHunyuanLatentVideo:
encode = execute # TODO: remove
class EmptyHunyuanLatentVideo(io.ComfyNode):
@classmethod
def INPUT_TYPES(s):
return {"required": { "width": ("INT", {"default": 848, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
"height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
"length": ("INT", {"default": 25, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
RETURN_TYPES = ("LATENT",)
FUNCTION = "generate"
def define_schema(cls):
return io.Schema(
node_id="EmptyHunyuanLatentVideo",
category="latent/video",
inputs=[
io.Int.Input("width", default=848, min=16, max=nodes.MAX_RESOLUTION, step=16),
io.Int.Input("height", default=480, min=16, max=nodes.MAX_RESOLUTION, step=16),
io.Int.Input("length", default=25, min=1, max=nodes.MAX_RESOLUTION, step=4),
io.Int.Input("batch_size", default=1, min=1, max=4096),
],
outputs=[
io.Latent.Output(),
],
)
CATEGORY = "latent/video"
def generate(self, width, height, length, batch_size=1):
@classmethod
def execute(cls, width, height, length, batch_size=1) -> io.NodeOutput:
latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
return ({"samples":latent}, )
return io.NodeOutput({"samples":latent})
generate = execute # TODO: remove
PROMPT_TEMPLATE_ENCODE_VIDEO_I2V = (
"<|start_header_id|>system<|end_header_id|>\n\n<image>\nDescribe the video by detailing the following aspects according to the reference image: "
@ -50,45 +68,61 @@ PROMPT_TEMPLATE_ENCODE_VIDEO_I2V = (
"<|start_header_id|>assistant<|end_header_id|>\n\n"
)
class TextEncodeHunyuanVideo_ImageToVideo:
class TextEncodeHunyuanVideo_ImageToVideo(io.ComfyNode):
@classmethod
def INPUT_TYPES(s):
return {"required": {
"clip": ("CLIP", ),
"clip_vision_output": ("CLIP_VISION_OUTPUT", ),
"prompt": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"image_interleave": ("INT", {"default": 2, "min": 1, "max": 512, "tooltip": "How much the image influences things vs the text prompt. Higher number means more influence from the text prompt."}),
}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "encode"
def define_schema(cls):
return io.Schema(
node_id="TextEncodeHunyuanVideo_ImageToVideo",
category="advanced/conditioning",
inputs=[
io.Clip.Input("clip"),
io.ClipVisionOutput.Input("clip_vision_output"),
io.String.Input("prompt", multiline=True, dynamic_prompts=True),
io.Int.Input(
"image_interleave",
default=2,
min=1,
max=512,
tooltip="How much the image influences things vs the text prompt. Higher number means more influence from the text prompt.",
),
],
outputs=[
io.Conditioning.Output(),
],
)
CATEGORY = "advanced/conditioning"
def encode(self, clip, clip_vision_output, prompt, image_interleave):
@classmethod
def execute(cls, clip, clip_vision_output, prompt, image_interleave) -> io.NodeOutput:
tokens = clip.tokenize(prompt, llama_template=PROMPT_TEMPLATE_ENCODE_VIDEO_I2V, image_embeds=clip_vision_output.mm_projected, image_interleave=image_interleave)
return (clip.encode_from_tokens_scheduled(tokens), )
return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))
class HunyuanImageToVideo:
encode = execute # TODO: remove
class HunyuanImageToVideo(io.ComfyNode):
@classmethod
def INPUT_TYPES(s):
return {"required": {"positive": ("CONDITIONING", ),
"vae": ("VAE", ),
"width": ("INT", {"default": 848, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
"height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
"length": ("INT", {"default": 53, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
"guidance_type": (["v1 (concat)", "v2 (replace)", "custom"], )
},
"optional": {"start_image": ("IMAGE", ),
}}
def define_schema(cls):
return io.Schema(
node_id="HunyuanImageToVideo",
category="conditioning/video_models",
inputs=[
io.Conditioning.Input("positive"),
io.Vae.Input("vae"),
io.Int.Input("width", default=848, min=16, max=nodes.MAX_RESOLUTION, step=16),
io.Int.Input("height", default=480, min=16, max=nodes.MAX_RESOLUTION, step=16),
io.Int.Input("length", default=53, min=1, max=nodes.MAX_RESOLUTION, step=4),
io.Int.Input("batch_size", default=1, min=1, max=4096),
io.Combo.Input("guidance_type", options=["v1 (concat)", "v2 (replace)", "custom"]),
io.Image.Input("start_image", optional=True),
],
outputs=[
io.Conditioning.Output(display_name="positive"),
io.Latent.Output(display_name="latent"),
],
)
RETURN_TYPES = ("CONDITIONING", "LATENT")
RETURN_NAMES = ("positive", "latent")
FUNCTION = "encode"
CATEGORY = "conditioning/video_models"
def encode(self, positive, vae, width, height, length, batch_size, guidance_type, start_image=None):
@classmethod
def execute(cls, positive, vae, width, height, length, batch_size, guidance_type, start_image=None) -> io.NodeOutput:
latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
out_latent = {}
@ -111,51 +145,76 @@ class HunyuanImageToVideo:
positive = node_helpers.conditioning_set_values(positive, cond)
out_latent["samples"] = latent
return (positive, out_latent)
return io.NodeOutput(positive, out_latent)
class EmptyHunyuanImageLatent:
encode = execute # TODO: remove
class EmptyHunyuanImageLatent(io.ComfyNode):
@classmethod
def INPUT_TYPES(s):
return {"required": { "width": ("INT", {"default": 2048, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
"height": ("INT", {"default": 2048, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
RETURN_TYPES = ("LATENT",)
FUNCTION = "generate"
def define_schema(cls):
return io.Schema(
node_id="EmptyHunyuanImageLatent",
category="latent",
inputs=[
io.Int.Input("width", default=2048, min=64, max=nodes.MAX_RESOLUTION, step=32),
io.Int.Input("height", default=2048, min=64, max=nodes.MAX_RESOLUTION, step=32),
io.Int.Input("batch_size", default=1, min=1, max=4096),
],
outputs=[
io.Latent.Output(),
],
)
CATEGORY = "latent"
def generate(self, width, height, batch_size=1):
@classmethod
def execute(cls, width, height, batch_size=1) -> io.NodeOutput:
latent = torch.zeros([batch_size, 64, height // 32, width // 32], device=comfy.model_management.intermediate_device())
return ({"samples":latent}, )
return io.NodeOutput({"samples":latent})
class HunyuanRefinerLatent:
generate = execute # TODO: remove
class HunyuanRefinerLatent(io.ComfyNode):
@classmethod
def INPUT_TYPES(s):
return {"required": {"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"latent": ("LATENT", ),
"noise_augmentation": ("FLOAT", {"default": 0.10, "min": 0.0, "max": 1.0, "step": 0.01}),
}}
def define_schema(cls):
return io.Schema(
node_id="HunyuanRefinerLatent",
inputs=[
io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"),
io.Latent.Input("latent"),
io.Float.Input("noise_augmentation", default=0.10, min=0.0, max=1.0, step=0.01),
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
RETURN_NAMES = ("positive", "negative", "latent")
],
outputs=[
io.Conditioning.Output(display_name="positive"),
io.Conditioning.Output(display_name="negative"),
io.Latent.Output(display_name="latent"),
],
)
FUNCTION = "execute"
def execute(self, positive, negative, latent, noise_augmentation):
@classmethod
def execute(cls, positive, negative, latent, noise_augmentation) -> io.NodeOutput:
latent = latent["samples"]
positive = node_helpers.conditioning_set_values(positive, {"concat_latent_image": latent, "noise_augmentation": noise_augmentation})
negative = node_helpers.conditioning_set_values(negative, {"concat_latent_image": latent, "noise_augmentation": noise_augmentation})
out_latent = {}
out_latent["samples"] = torch.zeros([latent.shape[0], 32, latent.shape[-3], latent.shape[-2], latent.shape[-1]], device=comfy.model_management.intermediate_device())
return (positive, negative, out_latent)
return io.NodeOutput(positive, negative, out_latent)
NODE_CLASS_MAPPINGS = {
"CLIPTextEncodeHunyuanDiT": CLIPTextEncodeHunyuanDiT,
"TextEncodeHunyuanVideo_ImageToVideo": TextEncodeHunyuanVideo_ImageToVideo,
"EmptyHunyuanLatentVideo": EmptyHunyuanLatentVideo,
"HunyuanImageToVideo": HunyuanImageToVideo,
"EmptyHunyuanImageLatent": EmptyHunyuanImageLatent,
"HunyuanRefinerLatent": HunyuanRefinerLatent,
}
class HunyuanExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[io.ComfyNode]]:
return [
CLIPTextEncodeHunyuanDiT,
TextEncodeHunyuanVideo_ImageToVideo,
EmptyHunyuanLatentVideo,
HunyuanImageToVideo,
EmptyHunyuanImageLatent,
HunyuanRefinerLatent,
]
async def comfy_entrypoint() -> HunyuanExtension:
return HunyuanExtension()

View File

@ -25,7 +25,7 @@ class PreviewAny():
value = str(source)
elif source is not None:
try:
value = json.dumps(source)
value = json.dumps(source, indent=4)
except Exception:
try:
value = str(source)

View File

@ -1,3 +1,3 @@
# This file is automatically generated by the build process when version is
# updated in pyproject.toml.
__version__ = "0.3.64"
__version__ = "0.3.65"

View File

@ -1,6 +1,6 @@
import os
import importlib.util
from comfy.cli_args import args
from comfy.cli_args import args, PerformanceFeature
import subprocess
#Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
@ -75,8 +75,9 @@ if not args.cuda_malloc:
spec.loader.exec_module(module)
version = module.__version__
if int(version[0]) >= 2 and "+cu" in version: #enable by default for torch version 2.0 and up only on cuda torch
args.cuda_malloc = cuda_malloc_supported()
if int(version[0]) >= 2 and "+cu" in version: # enable by default for torch version 2.0 and up only on cuda torch
if PerformanceFeature.AutoTune not in args.fast: # Autotune has issues with cuda malloc
args.cuda_malloc = cuda_malloc_supported()
except:
pass

View File

@ -18,7 +18,7 @@ from comfy_execution.caching import (
BasicCache,
CacheKeySetID,
CacheKeySetInputSignature,
DependencyAwareCache,
NullCache,
HierarchicalCache,
LRUCache,
)
@ -91,13 +91,13 @@ class IsChangedCache:
class CacheType(Enum):
CLASSIC = 0
LRU = 1
DEPENDENCY_AWARE = 2
NONE = 2
class CacheSet:
def __init__(self, cache_type=None, cache_size=None):
if cache_type == CacheType.DEPENDENCY_AWARE:
self.init_dependency_aware_cache()
if cache_type == CacheType.NONE:
self.init_null_cache()
logging.info("Disabling intermediate node cache.")
elif cache_type == CacheType.LRU:
if cache_size is None:
@ -120,11 +120,12 @@ class CacheSet:
self.ui = LRUCache(CacheKeySetInputSignature, max_size=cache_size)
self.objects = HierarchicalCache(CacheKeySetID)
# only hold cached items while the decendents have not executed
def init_dependency_aware_cache(self):
self.outputs = DependencyAwareCache(CacheKeySetInputSignature)
self.ui = DependencyAwareCache(CacheKeySetInputSignature)
self.objects = DependencyAwareCache(CacheKeySetID)
def init_null_cache(self):
self.outputs = NullCache()
#The UI cache is expected to be iterable at the end of each workflow
#so it must cache at least a full workflow. Use Heirachical
self.ui = HierarchicalCache(CacheKeySetInputSignature)
self.objects = NullCache()
def recursive_debug_dump(self):
result = {
@ -135,7 +136,7 @@ class CacheSet:
SENSITIVE_EXTRA_DATA_KEYS = ("auth_token_comfy_org", "api_key_comfy_org")
def get_input_data(inputs, class_def, unique_id, outputs=None, dynprompt=None, extra_data={}):
def get_input_data(inputs, class_def, unique_id, execution_list=None, dynprompt=None, extra_data={}):
is_v3 = issubclass(class_def, _ComfyNodeInternal)
if is_v3:
valid_inputs, schema = class_def.INPUT_TYPES(include_hidden=False, return_schema=True)
@ -153,10 +154,10 @@ def get_input_data(inputs, class_def, unique_id, outputs=None, dynprompt=None, e
if is_link(input_data) and (not input_info or not input_info.get("rawLink", False)):
input_unique_id = input_data[0]
output_index = input_data[1]
if outputs is None:
if execution_list is None:
mark_missing()
continue # This might be a lazily-evaluated input
cached_output = outputs.get(input_unique_id)
cached_output = execution_list.get_output_cache(input_unique_id, unique_id)
if cached_output is None:
mark_missing()
continue
@ -405,6 +406,7 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
cached_output = caches.ui.get(unique_id) or {}
server.send_sync("executed", { "node": unique_id, "display_node": display_node_id, "output": cached_output.get("output",None), "prompt_id": prompt_id }, server.client_id)
get_progress_state().finish_progress(unique_id)
execution_list.cache_update(unique_id, caches.outputs.get(unique_id))
return (ExecutionResult.SUCCESS, None, None)
input_data_all = None
@ -434,7 +436,7 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
for r in result:
if is_link(r):
source_node, source_output = r[0], r[1]
node_output = caches.outputs.get(source_node)[source_output]
node_output = execution_list.get_output_cache(source_node, unique_id)[source_output]
for o in node_output:
resolved_output.append(o)
@ -446,7 +448,7 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
has_subgraph = False
else:
get_progress_state().start_progress(unique_id)
input_data_all, missing_keys, hidden_inputs = get_input_data(inputs, class_def, unique_id, caches.outputs, dynprompt, extra_data)
input_data_all, missing_keys, hidden_inputs = get_input_data(inputs, class_def, unique_id, execution_list, dynprompt, extra_data)
if server.client_id is not None:
server.last_node_id = display_node_id
server.send_sync("executing", { "node": unique_id, "display_node": display_node_id, "prompt_id": prompt_id }, server.client_id)
@ -549,11 +551,15 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
subcache.clean_unused()
for node_id in new_output_ids:
execution_list.add_node(node_id)
execution_list.cache_link(node_id, unique_id)
for link in new_output_links:
execution_list.add_strong_link(link[0], link[1], unique_id)
pending_subgraph_results[unique_id] = cached_outputs
return (ExecutionResult.PENDING, None, None)
caches.outputs.set(unique_id, output_data)
execution_list.cache_update(unique_id, output_data)
except comfy.model_management.InterruptProcessingException as iex:
logging.info("Processing interrupted")

View File

@ -1,25 +1,5 @@
#Rename this to extra_model_paths.yaml and ComfyUI will load it
#config for a1111 ui
#all you have to do is change the base_path to where yours is installed
a111:
base_path: path/to/stable-diffusion-webui/
checkpoints: models/Stable-diffusion
configs: models/Stable-diffusion
vae: models/VAE
loras: |
models/Lora
models/LyCORIS
upscale_models: |
models/ESRGAN
models/RealESRGAN
models/SwinIR
embeddings: embeddings
hypernetworks: models/hypernetworks
controlnet: models/ControlNet
#config for comfyui
#your base path should be either an existing comfy install or a central folder where you store all of your models, loras, etc.
@ -28,7 +8,9 @@ a111:
# # You can use is_default to mark that these folders should be listed first, and used as the default dirs for eg downloads
# #is_default: true
# checkpoints: models/checkpoints/
# clip: models/clip/
# text_encoders: |
# models/text_encoders/
# models/clip/ # legacy location still supported
# clip_vision: models/clip_vision/
# configs: models/configs/
# controlnet: models/controlnet/
@ -39,6 +21,32 @@ a111:
# loras: models/loras/
# upscale_models: models/upscale_models/
# vae: models/vae/
# audio_encoders: models/audio_encoders/
# model_patches: models/model_patches/
#config for a1111 ui
#all you have to do is uncomment this (remove the #) and change the base_path to where yours is installed
#a111:
# base_path: path/to/stable-diffusion-webui/
# checkpoints: models/Stable-diffusion
# configs: models/Stable-diffusion
# vae: models/VAE
# loras: |
# models/Lora
# models/LyCORIS
# upscale_models: |
# models/ESRGAN
# models/RealESRGAN
# models/SwinIR
# embeddings: embeddings
# hypernetworks: models/hypernetworks
# controlnet: models/ControlNet
# For a full list of supported keys (style_models, vae_approx, hypernetworks, photomaker,
# model_patches, audio_encoders, classifiers, etc.) see folder_paths.py.
#other_ui:
# base_path: path/to/ui

View File

@ -173,7 +173,7 @@ def prompt_worker(q, server_instance):
if args.cache_lru > 0:
cache_type = execution.CacheType.LRU
elif args.cache_none:
cache_type = execution.CacheType.DEPENDENCY_AWARE
cache_type = execution.CacheType.NONE
e = execution.PromptExecutor(server_instance, cache_type=cache_type, cache_size=args.cache_lru)
last_gc_collect = 0

View File

@ -1,6 +1,6 @@
[project]
name = "ComfyUI"
version = "0.3.64"
version = "0.3.65"
readme = "README.md"
license = { file = "LICENSE" }
requires-python = ">=3.9"

View File

@ -1,6 +1,6 @@
comfyui-frontend-package==1.27.10
comfyui-workflow-templates==0.1.94
comfyui-embedded-docs==0.2.6
comfyui-frontend-package==1.28.7
comfyui-workflow-templates==0.1.95
comfyui-embedded-docs==0.3.0
torch
torchsde
torchvision

View File

@ -48,6 +48,28 @@ async def send_socket_catch_exception(function, message):
except (aiohttp.ClientError, aiohttp.ClientPayloadError, ConnectionResetError, BrokenPipeError, ConnectionError) as err:
logging.warning("send error: {}".format(err))
# Track deprecated paths that have been warned about to only warn once per file
_deprecated_paths_warned = set()
@web.middleware
async def deprecation_warning(request: web.Request, handler):
"""Middleware to warn about deprecated frontend API paths"""
path = request.path
if (path.startswith('/scripts/') or path.startswith('/extensions/core/')):
# Only warn once per unique file path
if path not in _deprecated_paths_warned:
_deprecated_paths_warned.add(path)
logging.warning(
f"[DEPRECATION WARNING] Detected import of deprecated legacy API: {path}. "
f"This is likely caused by a custom node extension using outdated APIs. "
f"Please update your extensions or contact the extension author for an updated version."
)
response: web.Response = await handler(request)
return response
@web.middleware
async def compress_body(request: web.Request, handler):
accept_encoding = request.headers.get("Accept-Encoding", "")
@ -159,7 +181,7 @@ class PromptServer():
self.client_session:Optional[aiohttp.ClientSession] = None
self.number = 0
middlewares = [cache_control]
middlewares = [cache_control, deprecation_warning]
if args.enable_compress_response_body:
middlewares.append(compress_body)

View File

@ -152,12 +152,12 @@ class TestExecution:
# Initialize server and client
#
@fixture(scope="class", autouse=True, params=[
# (use_lru, lru_size)
(False, 0),
(True, 0),
(True, 100),
{ "extra_args" : [], "should_cache_results" : True },
{ "extra_args" : ["--cache-lru", 0], "should_cache_results" : True },
{ "extra_args" : ["--cache-lru", 100], "should_cache_results" : True },
{ "extra_args" : ["--cache-none"], "should_cache_results" : False },
])
def _server(self, args_pytest, request):
def server(self, args_pytest, request):
# Start server
pargs = [
'python','main.py',
@ -167,12 +167,10 @@ class TestExecution:
'--extra-model-paths-config', 'tests/execution/extra_model_paths.yaml',
'--cpu',
]
use_lru, lru_size = request.param
if use_lru:
pargs += ['--cache-lru', str(lru_size)]
pargs += [ str(param) for param in request.param["extra_args"] ]
print("Running server with args:", pargs) # noqa: T201
p = subprocess.Popen(pargs)
yield
yield request.param
p.kill()
torch.cuda.empty_cache()
@ -193,7 +191,7 @@ class TestExecution:
return comfy_client
@fixture(scope="class", autouse=True)
def shared_client(self, args_pytest, _server):
def shared_client(self, args_pytest, server):
client = self.start_client(args_pytest["listen"], args_pytest["port"])
yield client
del client
@ -225,7 +223,7 @@ class TestExecution:
assert result.did_run(mask)
assert result.did_run(lazy_mix)
def test_full_cache(self, client: ComfyClient, builder: GraphBuilder):
def test_full_cache(self, client: ComfyClient, builder: GraphBuilder, server):
g = builder
input1 = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
input2 = g.node("StubImage", content="NOISE", height=512, width=512, batch_size=1)
@ -237,9 +235,12 @@ class TestExecution:
client.run(g)
result2 = client.run(g)
for node_id, node in g.nodes.items():
assert not result2.did_run(node), f"Node {node_id} ran, but should have been cached"
if server["should_cache_results"]:
assert not result2.did_run(node), f"Node {node_id} ran, but should have been cached"
else:
assert result2.did_run(node), f"Node {node_id} was cached, but should have been run"
def test_partial_cache(self, client: ComfyClient, builder: GraphBuilder):
def test_partial_cache(self, client: ComfyClient, builder: GraphBuilder, server):
g = builder
input1 = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
input2 = g.node("StubImage", content="NOISE", height=512, width=512, batch_size=1)
@ -251,8 +252,12 @@ class TestExecution:
client.run(g)
mask.inputs['value'] = 0.4
result2 = client.run(g)
assert not result2.did_run(input1), "Input1 should have been cached"
assert not result2.did_run(input2), "Input2 should have been cached"
if server["should_cache_results"]:
assert not result2.did_run(input1), "Input1 should have been cached"
assert not result2.did_run(input2), "Input2 should have been cached"
else:
assert result2.did_run(input1), "Input1 should have been rerun"
assert result2.did_run(input2), "Input2 should have been rerun"
def test_error(self, client: ComfyClient, builder: GraphBuilder):
g = builder
@ -411,7 +416,7 @@ class TestExecution:
input2 = g.node("StubImage", id="removeme", content="WHITE", height=512, width=512, batch_size=1)
client.run(g)
def test_custom_is_changed(self, client: ComfyClient, builder: GraphBuilder):
def test_custom_is_changed(self, client: ComfyClient, builder: GraphBuilder, server):
g = builder
# Creating the nodes in this specific order previously caused a bug
save = g.node("SaveImage")
@ -427,7 +432,10 @@ class TestExecution:
result3 = client.run(g)
result4 = client.run(g)
assert result1.did_run(is_changed), "is_changed should have been run"
assert not result2.did_run(is_changed), "is_changed should have been cached"
if server["should_cache_results"]:
assert not result2.did_run(is_changed), "is_changed should have been cached"
else:
assert result2.did_run(is_changed), "is_changed should have been re-run"
assert result3.did_run(is_changed), "is_changed should have been re-run"
assert result4.did_run(is_changed), "is_changed should not have been cached"
@ -514,7 +522,7 @@ class TestExecution:
assert len(images2) == 1, "Should have 1 image"
# This tests that only constant outputs are used in the call to `IS_CHANGED`
def test_is_changed_with_outputs(self, client: ComfyClient, builder: GraphBuilder):
def test_is_changed_with_outputs(self, client: ComfyClient, builder: GraphBuilder, server):
g = builder
input1 = g.node("StubConstantImage", value=0.5, height=512, width=512, batch_size=1)
test_node = g.node("TestIsChangedWithConstants", image=input1.out(0), value=0.5)
@ -530,7 +538,11 @@ class TestExecution:
images = result.get_images(output)
assert len(images) == 1, "Should have 1 image"
assert numpy.array(images[0]).min() == 63 and numpy.array(images[0]).max() == 63, "Image should have value 0.25"
assert not result.did_run(test_node), "The execution should have been cached"
if server["should_cache_results"]:
assert not result.did_run(test_node), "The execution should have been cached"
else:
assert result.did_run(test_node), "The execution should have been re-run"
def test_parallel_sleep_nodes(self, client: ComfyClient, builder: GraphBuilder, skip_timing_checks):
# Warmup execution to ensure server is fully initialized