mirror of
https://git.datalinker.icu/comfyanonymous/ComfyUI
synced 2025-12-13 16:04:34 +08:00
convert nodes_lt.py to V3 schema (#10084)
This commit is contained in:
parent
11bab7be76
commit
d9c0a4053d
@ -1,4 +1,3 @@
|
|||||||
import io
|
|
||||||
import nodes
|
import nodes
|
||||||
import node_helpers
|
import node_helpers
|
||||||
import torch
|
import torch
|
||||||
@ -8,46 +7,60 @@ import comfy.utils
|
|||||||
import math
|
import math
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import av
|
import av
|
||||||
|
from io import BytesIO
|
||||||
|
from typing_extensions import override
|
||||||
from comfy.ldm.lightricks.symmetric_patchifier import SymmetricPatchifier, latent_to_pixel_coords
|
from comfy.ldm.lightricks.symmetric_patchifier import SymmetricPatchifier, latent_to_pixel_coords
|
||||||
|
from comfy_api.latest import ComfyExtension, io
|
||||||
|
|
||||||
class EmptyLTXVLatentVideo:
|
class EmptyLTXVLatentVideo(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": { "width": ("INT", {"default": 768, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
|
return io.Schema(
|
||||||
"height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
|
node_id="EmptyLTXVLatentVideo",
|
||||||
"length": ("INT", {"default": 97, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
category="latent/video/ltxv",
|
||||||
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
|
inputs=[
|
||||||
RETURN_TYPES = ("LATENT",)
|
io.Int.Input("width", default=768, min=64, max=nodes.MAX_RESOLUTION, step=32),
|
||||||
FUNCTION = "generate"
|
io.Int.Input("height", default=512, min=64, max=nodes.MAX_RESOLUTION, step=32),
|
||||||
|
io.Int.Input("length", default=97, min=1, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
|
io.Int.Input("batch_size", default=1, min=1, max=4096),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Latent.Output(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
CATEGORY = "latent/video/ltxv"
|
@classmethod
|
||||||
|
def execute(cls, width, height, length, batch_size=1) -> io.NodeOutput:
|
||||||
def generate(self, width, height, length, batch_size=1):
|
|
||||||
latent = torch.zeros([batch_size, 128, ((length - 1) // 8) + 1, height // 32, width // 32], device=comfy.model_management.intermediate_device())
|
latent = torch.zeros([batch_size, 128, ((length - 1) // 8) + 1, height // 32, width // 32], device=comfy.model_management.intermediate_device())
|
||||||
return ({"samples": latent}, )
|
return io.NodeOutput({"samples": latent})
|
||||||
|
|
||||||
|
|
||||||
class LTXVImgToVideo:
|
class LTXVImgToVideo(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": {"positive": ("CONDITIONING", ),
|
return io.Schema(
|
||||||
"negative": ("CONDITIONING", ),
|
node_id="LTXVImgToVideo",
|
||||||
"vae": ("VAE",),
|
category="conditioning/video_models",
|
||||||
"image": ("IMAGE",),
|
inputs=[
|
||||||
"width": ("INT", {"default": 768, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
|
io.Conditioning.Input("positive"),
|
||||||
"height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
|
io.Conditioning.Input("negative"),
|
||||||
"length": ("INT", {"default": 97, "min": 9, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
io.Vae.Input("vae"),
|
||||||
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
|
io.Image.Input("image"),
|
||||||
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0}),
|
io.Int.Input("width", default=768, min=64, max=nodes.MAX_RESOLUTION, step=32),
|
||||||
}}
|
io.Int.Input("height", default=512, min=64, max=nodes.MAX_RESOLUTION, step=32),
|
||||||
|
io.Int.Input("length", default=97, min=9, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
|
io.Int.Input("batch_size", default=1, min=1, max=4096),
|
||||||
|
io.Float.Input("strength", default=1.0, min=0.0, max=1.0),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Conditioning.Output(display_name="positive"),
|
||||||
|
io.Conditioning.Output(display_name="negative"),
|
||||||
|
io.Latent.Output(display_name="latent"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
|
@classmethod
|
||||||
RETURN_NAMES = ("positive", "negative", "latent")
|
def execute(cls, positive, negative, image, vae, width, height, length, batch_size, strength) -> io.NodeOutput:
|
||||||
|
|
||||||
CATEGORY = "conditioning/video_models"
|
|
||||||
FUNCTION = "generate"
|
|
||||||
|
|
||||||
def generate(self, positive, negative, image, vae, width, height, length, batch_size, strength):
|
|
||||||
pixels = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
|
pixels = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
|
||||||
encode_pixels = pixels[:, :, :, :3]
|
encode_pixels = pixels[:, :, :, :3]
|
||||||
t = vae.encode(encode_pixels)
|
t = vae.encode(encode_pixels)
|
||||||
@ -62,7 +75,7 @@ class LTXVImgToVideo:
|
|||||||
)
|
)
|
||||||
conditioning_latent_frames_mask[:, :, :t.shape[2]] = 1.0 - strength
|
conditioning_latent_frames_mask[:, :, :t.shape[2]] = 1.0 - strength
|
||||||
|
|
||||||
return (positive, negative, {"samples": latent, "noise_mask": conditioning_latent_frames_mask}, )
|
return io.NodeOutput(positive, negative, {"samples": latent, "noise_mask": conditioning_latent_frames_mask})
|
||||||
|
|
||||||
|
|
||||||
def conditioning_get_any_value(conditioning, key, default=None):
|
def conditioning_get_any_value(conditioning, key, default=None):
|
||||||
@ -93,35 +106,46 @@ def get_keyframe_idxs(cond):
|
|||||||
num_keyframes = torch.unique(keyframe_idxs[:, 0]).shape[0]
|
num_keyframes = torch.unique(keyframe_idxs[:, 0]).shape[0]
|
||||||
return keyframe_idxs, num_keyframes
|
return keyframe_idxs, num_keyframes
|
||||||
|
|
||||||
class LTXVAddGuide:
|
class LTXVAddGuide(io.ComfyNode):
|
||||||
|
NUM_PREFIX_FRAMES = 2
|
||||||
|
PATCHIFIER = SymmetricPatchifier(1)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": {"positive": ("CONDITIONING", ),
|
return io.Schema(
|
||||||
"negative": ("CONDITIONING", ),
|
node_id="LTXVAddGuide",
|
||||||
"vae": ("VAE",),
|
category="conditioning/video_models",
|
||||||
"latent": ("LATENT",),
|
inputs=[
|
||||||
"image": ("IMAGE", {"tooltip": "Image or video to condition the latent video on. Must be 8*n + 1 frames."
|
io.Conditioning.Input("positive"),
|
||||||
"If the video is not 8*n + 1 frames, it will be cropped to the nearest 8*n + 1 frames."}),
|
io.Conditioning.Input("negative"),
|
||||||
"frame_idx": ("INT", {"default": 0, "min": -9999, "max": 9999,
|
io.Vae.Input("vae"),
|
||||||
"tooltip": "Frame index to start the conditioning at. For single-frame images or "
|
io.Latent.Input("latent"),
|
||||||
"videos with 1-8 frames, any frame_idx value is acceptable. For videos with 9+ "
|
io.Image.Input(
|
||||||
"frames, frame_idx must be divisible by 8, otherwise it will be rounded down to "
|
"image",
|
||||||
"the nearest multiple of 8. Negative values are counted from the end of the video."}),
|
tooltip="Image or video to condition the latent video on. Must be 8*n + 1 frames. "
|
||||||
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
|
"If the video is not 8*n + 1 frames, it will be cropped to the nearest 8*n + 1 frames.",
|
||||||
}
|
),
|
||||||
}
|
io.Int.Input(
|
||||||
|
"frame_idx",
|
||||||
|
default=0,
|
||||||
|
min=-9999,
|
||||||
|
max=9999,
|
||||||
|
tooltip="Frame index to start the conditioning at. "
|
||||||
|
"For single-frame images or videos with 1-8 frames, any frame_idx value is acceptable. "
|
||||||
|
"For videos with 9+ frames, frame_idx must be divisible by 8, otherwise it will be rounded "
|
||||||
|
"down to the nearest multiple of 8. Negative values are counted from the end of the video.",
|
||||||
|
),
|
||||||
|
io.Float.Input("strength", default=1.0, min=0.0, max=1.0, step=0.01),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Conditioning.Output(display_name="positive"),
|
||||||
|
io.Conditioning.Output(display_name="negative"),
|
||||||
|
io.Latent.Output(display_name="latent"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
|
@classmethod
|
||||||
RETURN_NAMES = ("positive", "negative", "latent")
|
def encode(cls, vae, latent_width, latent_height, images, scale_factors):
|
||||||
|
|
||||||
CATEGORY = "conditioning/video_models"
|
|
||||||
FUNCTION = "generate"
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self._num_prefix_frames = 2
|
|
||||||
self._patchifier = SymmetricPatchifier(1)
|
|
||||||
|
|
||||||
def encode(self, vae, latent_width, latent_height, images, scale_factors):
|
|
||||||
time_scale_factor, width_scale_factor, height_scale_factor = scale_factors
|
time_scale_factor, width_scale_factor, height_scale_factor = scale_factors
|
||||||
images = images[:(images.shape[0] - 1) // time_scale_factor * time_scale_factor + 1]
|
images = images[:(images.shape[0] - 1) // time_scale_factor * time_scale_factor + 1]
|
||||||
pixels = comfy.utils.common_upscale(images.movedim(-1, 1), latent_width * width_scale_factor, latent_height * height_scale_factor, "bilinear", crop="disabled").movedim(1, -1)
|
pixels = comfy.utils.common_upscale(images.movedim(-1, 1), latent_width * width_scale_factor, latent_height * height_scale_factor, "bilinear", crop="disabled").movedim(1, -1)
|
||||||
@ -129,7 +153,8 @@ class LTXVAddGuide:
|
|||||||
t = vae.encode(encode_pixels)
|
t = vae.encode(encode_pixels)
|
||||||
return encode_pixels, t
|
return encode_pixels, t
|
||||||
|
|
||||||
def get_latent_index(self, cond, latent_length, guide_length, frame_idx, scale_factors):
|
@classmethod
|
||||||
|
def get_latent_index(cls, cond, latent_length, guide_length, frame_idx, scale_factors):
|
||||||
time_scale_factor, _, _ = scale_factors
|
time_scale_factor, _, _ = scale_factors
|
||||||
_, num_keyframes = get_keyframe_idxs(cond)
|
_, num_keyframes = get_keyframe_idxs(cond)
|
||||||
latent_count = latent_length - num_keyframes
|
latent_count = latent_length - num_keyframes
|
||||||
@ -141,9 +166,10 @@ class LTXVAddGuide:
|
|||||||
|
|
||||||
return frame_idx, latent_idx
|
return frame_idx, latent_idx
|
||||||
|
|
||||||
def add_keyframe_index(self, cond, frame_idx, guiding_latent, scale_factors):
|
@classmethod
|
||||||
|
def add_keyframe_index(cls, cond, frame_idx, guiding_latent, scale_factors):
|
||||||
keyframe_idxs, _ = get_keyframe_idxs(cond)
|
keyframe_idxs, _ = get_keyframe_idxs(cond)
|
||||||
_, latent_coords = self._patchifier.patchify(guiding_latent)
|
_, latent_coords = cls.PATCHIFIER.patchify(guiding_latent)
|
||||||
pixel_coords = latent_to_pixel_coords(latent_coords, scale_factors, causal_fix=frame_idx == 0) # we need the causal fix only if we're placing the new latents at index 0
|
pixel_coords = latent_to_pixel_coords(latent_coords, scale_factors, causal_fix=frame_idx == 0) # we need the causal fix only if we're placing the new latents at index 0
|
||||||
pixel_coords[:, 0] += frame_idx
|
pixel_coords[:, 0] += frame_idx
|
||||||
if keyframe_idxs is None:
|
if keyframe_idxs is None:
|
||||||
@ -152,8 +178,9 @@ class LTXVAddGuide:
|
|||||||
keyframe_idxs = torch.cat([keyframe_idxs, pixel_coords], dim=2)
|
keyframe_idxs = torch.cat([keyframe_idxs, pixel_coords], dim=2)
|
||||||
return node_helpers.conditioning_set_values(cond, {"keyframe_idxs": keyframe_idxs})
|
return node_helpers.conditioning_set_values(cond, {"keyframe_idxs": keyframe_idxs})
|
||||||
|
|
||||||
def append_keyframe(self, positive, negative, frame_idx, latent_image, noise_mask, guiding_latent, strength, scale_factors):
|
@classmethod
|
||||||
_, latent_idx = self.get_latent_index(
|
def append_keyframe(cls, positive, negative, frame_idx, latent_image, noise_mask, guiding_latent, strength, scale_factors):
|
||||||
|
_, latent_idx = cls.get_latent_index(
|
||||||
cond=positive,
|
cond=positive,
|
||||||
latent_length=latent_image.shape[2],
|
latent_length=latent_image.shape[2],
|
||||||
guide_length=guiding_latent.shape[2],
|
guide_length=guiding_latent.shape[2],
|
||||||
@ -162,8 +189,8 @@ class LTXVAddGuide:
|
|||||||
)
|
)
|
||||||
noise_mask[:, :, latent_idx:latent_idx + guiding_latent.shape[2]] = 1.0
|
noise_mask[:, :, latent_idx:latent_idx + guiding_latent.shape[2]] = 1.0
|
||||||
|
|
||||||
positive = self.add_keyframe_index(positive, frame_idx, guiding_latent, scale_factors)
|
positive = cls.add_keyframe_index(positive, frame_idx, guiding_latent, scale_factors)
|
||||||
negative = self.add_keyframe_index(negative, frame_idx, guiding_latent, scale_factors)
|
negative = cls.add_keyframe_index(negative, frame_idx, guiding_latent, scale_factors)
|
||||||
|
|
||||||
mask = torch.full(
|
mask = torch.full(
|
||||||
(noise_mask.shape[0], 1, guiding_latent.shape[2], noise_mask.shape[3], noise_mask.shape[4]),
|
(noise_mask.shape[0], 1, guiding_latent.shape[2], noise_mask.shape[3], noise_mask.shape[4]),
|
||||||
@ -176,7 +203,8 @@ class LTXVAddGuide:
|
|||||||
noise_mask = torch.cat([noise_mask, mask], dim=2)
|
noise_mask = torch.cat([noise_mask, mask], dim=2)
|
||||||
return positive, negative, latent_image, noise_mask
|
return positive, negative, latent_image, noise_mask
|
||||||
|
|
||||||
def replace_latent_frames(self, latent_image, noise_mask, guiding_latent, latent_idx, strength):
|
@classmethod
|
||||||
|
def replace_latent_frames(cls, latent_image, noise_mask, guiding_latent, latent_idx, strength):
|
||||||
cond_length = guiding_latent.shape[2]
|
cond_length = guiding_latent.shape[2]
|
||||||
assert latent_image.shape[2] >= latent_idx + cond_length, "Conditioning frames exceed the length of the latent sequence."
|
assert latent_image.shape[2] >= latent_idx + cond_length, "Conditioning frames exceed the length of the latent sequence."
|
||||||
|
|
||||||
@ -195,20 +223,21 @@ class LTXVAddGuide:
|
|||||||
|
|
||||||
return latent_image, noise_mask
|
return latent_image, noise_mask
|
||||||
|
|
||||||
def generate(self, positive, negative, vae, latent, image, frame_idx, strength):
|
@classmethod
|
||||||
|
def execute(cls, positive, negative, vae, latent, image, frame_idx, strength) -> io.NodeOutput:
|
||||||
scale_factors = vae.downscale_index_formula
|
scale_factors = vae.downscale_index_formula
|
||||||
latent_image = latent["samples"]
|
latent_image = latent["samples"]
|
||||||
noise_mask = get_noise_mask(latent)
|
noise_mask = get_noise_mask(latent)
|
||||||
|
|
||||||
_, _, latent_length, latent_height, latent_width = latent_image.shape
|
_, _, latent_length, latent_height, latent_width = latent_image.shape
|
||||||
image, t = self.encode(vae, latent_width, latent_height, image, scale_factors)
|
image, t = cls.encode(vae, latent_width, latent_height, image, scale_factors)
|
||||||
|
|
||||||
frame_idx, latent_idx = self.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors)
|
frame_idx, latent_idx = cls.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors)
|
||||||
assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence."
|
assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence."
|
||||||
|
|
||||||
num_prefix_frames = min(self._num_prefix_frames, t.shape[2])
|
num_prefix_frames = min(cls.NUM_PREFIX_FRAMES, t.shape[2])
|
||||||
|
|
||||||
positive, negative, latent_image, noise_mask = self.append_keyframe(
|
positive, negative, latent_image, noise_mask = cls.append_keyframe(
|
||||||
positive,
|
positive,
|
||||||
negative,
|
negative,
|
||||||
frame_idx,
|
frame_idx,
|
||||||
@ -223,9 +252,9 @@ class LTXVAddGuide:
|
|||||||
|
|
||||||
t = t[:, :, num_prefix_frames:]
|
t = t[:, :, num_prefix_frames:]
|
||||||
if t.shape[2] == 0:
|
if t.shape[2] == 0:
|
||||||
return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
|
return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask})
|
||||||
|
|
||||||
latent_image, noise_mask = self.replace_latent_frames(
|
latent_image, noise_mask = cls.replace_latent_frames(
|
||||||
latent_image,
|
latent_image,
|
||||||
noise_mask,
|
noise_mask,
|
||||||
t,
|
t,
|
||||||
@ -233,34 +262,35 @@ class LTXVAddGuide:
|
|||||||
strength,
|
strength,
|
||||||
)
|
)
|
||||||
|
|
||||||
return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
|
return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask})
|
||||||
|
|
||||||
|
|
||||||
class LTXVCropGuides:
|
class LTXVCropGuides(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": {"positive": ("CONDITIONING", ),
|
return io.Schema(
|
||||||
"negative": ("CONDITIONING", ),
|
node_id="LTXVCropGuides",
|
||||||
"latent": ("LATENT",),
|
category="conditioning/video_models",
|
||||||
}
|
inputs=[
|
||||||
}
|
io.Conditioning.Input("positive"),
|
||||||
|
io.Conditioning.Input("negative"),
|
||||||
|
io.Latent.Input("latent"),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Conditioning.Output(display_name="positive"),
|
||||||
|
io.Conditioning.Output(display_name="negative"),
|
||||||
|
io.Latent.Output(display_name="latent"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
|
@classmethod
|
||||||
RETURN_NAMES = ("positive", "negative", "latent")
|
def execute(cls, positive, negative, latent) -> io.NodeOutput:
|
||||||
|
|
||||||
CATEGORY = "conditioning/video_models"
|
|
||||||
FUNCTION = "crop"
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self._patchifier = SymmetricPatchifier(1)
|
|
||||||
|
|
||||||
def crop(self, positive, negative, latent):
|
|
||||||
latent_image = latent["samples"].clone()
|
latent_image = latent["samples"].clone()
|
||||||
noise_mask = get_noise_mask(latent)
|
noise_mask = get_noise_mask(latent)
|
||||||
|
|
||||||
_, num_keyframes = get_keyframe_idxs(positive)
|
_, num_keyframes = get_keyframe_idxs(positive)
|
||||||
if num_keyframes == 0:
|
if num_keyframes == 0:
|
||||||
return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
|
return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
|
||||||
|
|
||||||
latent_image = latent_image[:, :, :-num_keyframes]
|
latent_image = latent_image[:, :, :-num_keyframes]
|
||||||
noise_mask = noise_mask[:, :, :-num_keyframes]
|
noise_mask = noise_mask[:, :, :-num_keyframes]
|
||||||
@ -268,44 +298,52 @@ class LTXVCropGuides:
|
|||||||
positive = node_helpers.conditioning_set_values(positive, {"keyframe_idxs": None})
|
positive = node_helpers.conditioning_set_values(positive, {"keyframe_idxs": None})
|
||||||
negative = node_helpers.conditioning_set_values(negative, {"keyframe_idxs": None})
|
negative = node_helpers.conditioning_set_values(negative, {"keyframe_idxs": None})
|
||||||
|
|
||||||
return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
|
return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask})
|
||||||
|
|
||||||
|
|
||||||
class LTXVConditioning:
|
class LTXVConditioning(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": {"positive": ("CONDITIONING", ),
|
return io.Schema(
|
||||||
"negative": ("CONDITIONING", ),
|
node_id="LTXVConditioning",
|
||||||
"frame_rate": ("FLOAT", {"default": 25.0, "min": 0.0, "max": 1000.0, "step": 0.01}),
|
category="conditioning/video_models",
|
||||||
}}
|
inputs=[
|
||||||
RETURN_TYPES = ("CONDITIONING", "CONDITIONING")
|
io.Conditioning.Input("positive"),
|
||||||
RETURN_NAMES = ("positive", "negative")
|
io.Conditioning.Input("negative"),
|
||||||
FUNCTION = "append"
|
io.Float.Input("frame_rate", default=25.0, min=0.0, max=1000.0, step=0.01),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Conditioning.Output(display_name="positive"),
|
||||||
|
io.Conditioning.Output(display_name="negative"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
CATEGORY = "conditioning/video_models"
|
@classmethod
|
||||||
|
def execute(cls, positive, negative, frame_rate) -> io.NodeOutput:
|
||||||
def append(self, positive, negative, frame_rate):
|
|
||||||
positive = node_helpers.conditioning_set_values(positive, {"frame_rate": frame_rate})
|
positive = node_helpers.conditioning_set_values(positive, {"frame_rate": frame_rate})
|
||||||
negative = node_helpers.conditioning_set_values(negative, {"frame_rate": frame_rate})
|
negative = node_helpers.conditioning_set_values(negative, {"frame_rate": frame_rate})
|
||||||
return (positive, negative)
|
return io.NodeOutput(positive, negative)
|
||||||
|
|
||||||
|
|
||||||
class ModelSamplingLTXV:
|
class ModelSamplingLTXV(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": { "model": ("MODEL",),
|
return io.Schema(
|
||||||
"max_shift": ("FLOAT", {"default": 2.05, "min": 0.0, "max": 100.0, "step":0.01}),
|
node_id="ModelSamplingLTXV",
|
||||||
"base_shift": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 100.0, "step":0.01}),
|
category="advanced/model",
|
||||||
},
|
inputs=[
|
||||||
"optional": {"latent": ("LATENT",), }
|
io.Model.Input("model"),
|
||||||
}
|
io.Float.Input("max_shift", default=2.05, min=0.0, max=100.0, step=0.01),
|
||||||
|
io.Float.Input("base_shift", default=0.95, min=0.0, max=100.0, step=0.01),
|
||||||
|
io.Latent.Input("latent", optional=True),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Model.Output(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("MODEL",)
|
@classmethod
|
||||||
FUNCTION = "patch"
|
def execute(cls, model, max_shift, base_shift, latent=None) -> io.NodeOutput:
|
||||||
|
|
||||||
CATEGORY = "advanced/model"
|
|
||||||
|
|
||||||
def patch(self, model, max_shift, base_shift, latent=None):
|
|
||||||
m = model.clone()
|
m = model.clone()
|
||||||
|
|
||||||
if latent is None:
|
if latent is None:
|
||||||
@ -329,37 +367,41 @@ class ModelSamplingLTXV:
|
|||||||
model_sampling.set_parameters(shift=shift)
|
model_sampling.set_parameters(shift=shift)
|
||||||
m.add_object_patch("model_sampling", model_sampling)
|
m.add_object_patch("model_sampling", model_sampling)
|
||||||
|
|
||||||
return (m, )
|
return io.NodeOutput(m)
|
||||||
|
|
||||||
|
|
||||||
class LTXVScheduler:
|
class LTXVScheduler(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required":
|
return io.Schema(
|
||||||
{"steps": ("INT", {"default": 20, "min": 1, "max": 10000}),
|
node_id="LTXVScheduler",
|
||||||
"max_shift": ("FLOAT", {"default": 2.05, "min": 0.0, "max": 100.0, "step":0.01}),
|
category="sampling/custom_sampling/schedulers",
|
||||||
"base_shift": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 100.0, "step":0.01}),
|
inputs=[
|
||||||
"stretch": ("BOOLEAN", {
|
io.Int.Input("steps", default=20, min=1, max=10000),
|
||||||
"default": True,
|
io.Float.Input("max_shift", default=2.05, min=0.0, max=100.0, step=0.01),
|
||||||
"tooltip": "Stretch the sigmas to be in the range [terminal, 1]."
|
io.Float.Input("base_shift", default=0.95, min=0.0, max=100.0, step=0.01),
|
||||||
}),
|
io.Boolean.Input(
|
||||||
"terminal": (
|
id="stretch",
|
||||||
"FLOAT",
|
default=True,
|
||||||
{
|
tooltip="Stretch the sigmas to be in the range [terminal, 1].",
|
||||||
"default": 0.1, "min": 0.0, "max": 0.99, "step": 0.01,
|
),
|
||||||
"tooltip": "The terminal value of the sigmas after stretching."
|
io.Float.Input(
|
||||||
},
|
id="terminal",
|
||||||
),
|
default=0.1,
|
||||||
},
|
min=0.0,
|
||||||
"optional": {"latent": ("LATENT",), }
|
max=0.99,
|
||||||
}
|
step=0.01,
|
||||||
|
tooltip="The terminal value of the sigmas after stretching.",
|
||||||
|
),
|
||||||
|
io.Latent.Input("latent", optional=True),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Sigmas.Output(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("SIGMAS",)
|
@classmethod
|
||||||
CATEGORY = "sampling/custom_sampling/schedulers"
|
def execute(cls, steps, max_shift, base_shift, stretch, terminal, latent=None) -> io.NodeOutput:
|
||||||
|
|
||||||
FUNCTION = "get_sigmas"
|
|
||||||
|
|
||||||
def get_sigmas(self, steps, max_shift, base_shift, stretch, terminal, latent=None):
|
|
||||||
if latent is None:
|
if latent is None:
|
||||||
tokens = 4096
|
tokens = 4096
|
||||||
else:
|
else:
|
||||||
@ -389,7 +431,7 @@ class LTXVScheduler:
|
|||||||
stretched = 1.0 - (one_minus_z / scale_factor)
|
stretched = 1.0 - (one_minus_z / scale_factor)
|
||||||
sigmas[non_zero_mask] = stretched
|
sigmas[non_zero_mask] = stretched
|
||||||
|
|
||||||
return (sigmas,)
|
return io.NodeOutput(sigmas)
|
||||||
|
|
||||||
def encode_single_frame(output_file, image_array: np.ndarray, crf):
|
def encode_single_frame(output_file, image_array: np.ndarray, crf):
|
||||||
container = av.open(output_file, "w", format="mp4")
|
container = av.open(output_file, "w", format="mp4")
|
||||||
@ -423,52 +465,54 @@ def preprocess(image: torch.Tensor, crf=29):
|
|||||||
return image
|
return image
|
||||||
|
|
||||||
image_array = (image[:(image.shape[0] // 2) * 2, :(image.shape[1] // 2) * 2] * 255.0).byte().cpu().numpy()
|
image_array = (image[:(image.shape[0] // 2) * 2, :(image.shape[1] // 2) * 2] * 255.0).byte().cpu().numpy()
|
||||||
with io.BytesIO() as output_file:
|
with BytesIO() as output_file:
|
||||||
encode_single_frame(output_file, image_array, crf)
|
encode_single_frame(output_file, image_array, crf)
|
||||||
video_bytes = output_file.getvalue()
|
video_bytes = output_file.getvalue()
|
||||||
with io.BytesIO(video_bytes) as video_file:
|
with BytesIO(video_bytes) as video_file:
|
||||||
image_array = decode_single_frame(video_file)
|
image_array = decode_single_frame(video_file)
|
||||||
tensor = torch.tensor(image_array, dtype=image.dtype, device=image.device) / 255.0
|
tensor = torch.tensor(image_array, dtype=image.dtype, device=image.device) / 255.0
|
||||||
return tensor
|
return tensor
|
||||||
|
|
||||||
|
|
||||||
class LTXVPreprocess:
|
class LTXVPreprocess(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {
|
return io.Schema(
|
||||||
"required": {
|
node_id="LTXVPreprocess",
|
||||||
"image": ("IMAGE",),
|
category="image",
|
||||||
"img_compression": (
|
inputs=[
|
||||||
"INT",
|
io.Image.Input("image"),
|
||||||
{
|
io.Int.Input(
|
||||||
"default": 35,
|
id="img_compression", default=35, min=0, max=100, tooltip="Amount of compression to apply on image."
|
||||||
"min": 0,
|
|
||||||
"max": 100,
|
|
||||||
"tooltip": "Amount of compression to apply on image.",
|
|
||||||
},
|
|
||||||
),
|
),
|
||||||
}
|
],
|
||||||
}
|
outputs=[
|
||||||
|
io.Image.Output(display_name="output_image"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
FUNCTION = "preprocess"
|
@classmethod
|
||||||
RETURN_TYPES = ("IMAGE",)
|
def execute(cls, image, img_compression) -> io.NodeOutput:
|
||||||
RETURN_NAMES = ("output_image",)
|
|
||||||
CATEGORY = "image"
|
|
||||||
|
|
||||||
def preprocess(self, image, img_compression):
|
|
||||||
output_images = []
|
output_images = []
|
||||||
for i in range(image.shape[0]):
|
for i in range(image.shape[0]):
|
||||||
output_images.append(preprocess(image[i], img_compression))
|
output_images.append(preprocess(image[i], img_compression))
|
||||||
return (torch.stack(output_images),)
|
return io.NodeOutput(torch.stack(output_images))
|
||||||
|
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
class LtxvExtension(ComfyExtension):
|
||||||
"EmptyLTXVLatentVideo": EmptyLTXVLatentVideo,
|
@override
|
||||||
"LTXVImgToVideo": LTXVImgToVideo,
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
"ModelSamplingLTXV": ModelSamplingLTXV,
|
return [
|
||||||
"LTXVConditioning": LTXVConditioning,
|
EmptyLTXVLatentVideo,
|
||||||
"LTXVScheduler": LTXVScheduler,
|
LTXVImgToVideo,
|
||||||
"LTXVAddGuide": LTXVAddGuide,
|
ModelSamplingLTXV,
|
||||||
"LTXVPreprocess": LTXVPreprocess,
|
LTXVConditioning,
|
||||||
"LTXVCropGuides": LTXVCropGuides,
|
LTXVScheduler,
|
||||||
}
|
LTXVAddGuide,
|
||||||
|
LTXVPreprocess,
|
||||||
|
LTXVCropGuides,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def comfy_entrypoint() -> LtxvExtension:
|
||||||
|
return LtxvExtension()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user