mirror of
https://git.datalinker.icu/kijai/ComfyUI-CogVideoXWrapper.git
synced 2025-12-08 20:34:23 +08:00
Add start/end percent to image_conds
This commit is contained in:
parent
b9688f3cd2
commit
5cc570a467
14
nodes.py
14
nodes.py
@ -221,6 +221,8 @@ class CogVideoImageEncode:
|
||||
"enable_tiling": ("BOOLEAN", {"default": False, "tooltip": "Enable tiling for the VAE to reduce memory usage"}),
|
||||
"noise_aug_strength": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001, "tooltip": "Augment image with noise"}),
|
||||
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
|
||||
"start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01}),
|
||||
"end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
|
||||
},
|
||||
}
|
||||
|
||||
@ -229,7 +231,7 @@ class CogVideoImageEncode:
|
||||
FUNCTION = "encode"
|
||||
CATEGORY = "CogVideoWrapper"
|
||||
|
||||
def encode(self, vae, start_image, end_image=None, enable_tiling=False, noise_aug_strength=0.0, strength=1.0):
|
||||
def encode(self, vae, start_image, end_image=None, enable_tiling=False, noise_aug_strength=0.0, strength=1.0, start_percent=0.0, end_percent=1.0):
|
||||
device = mm.get_torch_device()
|
||||
offload_device = mm.unet_offload_device()
|
||||
generator = torch.Generator(device=device).manual_seed(0)
|
||||
@ -277,7 +279,11 @@ class CogVideoImageEncode:
|
||||
log.info(f"Encoded latents shape: {final_latents.shape}")
|
||||
vae.to(offload_device)
|
||||
|
||||
return ({"samples": final_latents}, )
|
||||
return ({
|
||||
"samples": final_latents,
|
||||
"start_percent": start_percent,
|
||||
"end_percent": end_percent
|
||||
}, )
|
||||
|
||||
class CogVideoImageEncodeFunInP:
|
||||
@classmethod
|
||||
@ -608,6 +614,8 @@ class CogVideoSampler:
|
||||
if image_cond_latents is not None:
|
||||
assert supports_image_conds, "Image condition latents only supported for I2V and Interpolation models"
|
||||
image_conds = image_cond_latents["samples"]
|
||||
image_cond_start_percent = image_cond_latents.get("start_percent", 0.0)
|
||||
image_cond_end_percent = image_cond_latents.get("end_percent", 1.0)
|
||||
if "1.5" in model_name or "1_5" in model_name:
|
||||
image_conds = image_conds / 0.7 # needed for 1.5 models
|
||||
else:
|
||||
@ -704,6 +712,8 @@ class CogVideoSampler:
|
||||
freenoise=context_options["freenoise"] if context_options is not None else None,
|
||||
controlnet=controlnet,
|
||||
tora=tora_trajectory if tora_trajectory is not None else None,
|
||||
image_cond_start_percent=image_cond_start_percent,
|
||||
image_cond_end_percent=image_cond_end_percent
|
||||
)
|
||||
if not model["cpu_offloading"] and model["manual_offloading"]:
|
||||
pipe.transformer.to(offload_device)
|
||||
|
||||
@ -349,6 +349,8 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
||||
freenoise: Optional[bool] = True,
|
||||
controlnet: Optional[dict] = None,
|
||||
tora: Optional[dict] = None,
|
||||
image_cond_start_percent: float = 0.0,
|
||||
image_cond_end_percent: float = 1.0,
|
||||
|
||||
):
|
||||
"""
|
||||
@ -708,8 +710,13 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
||||
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
|
||||
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
||||
|
||||
current_step_percentage = i / num_inference_steps
|
||||
|
||||
if image_cond_latents is not None:
|
||||
latent_image_input = torch.cat([image_cond_latents] * 2) if do_classifier_free_guidance else image_cond_latents
|
||||
if not image_cond_start_percent <= current_step_percentage <= image_cond_end_percent:
|
||||
latent_image_input = torch.zeros_like(latent_model_input)
|
||||
else:
|
||||
latent_image_input = torch.cat([image_cond_latents] * 2) if do_classifier_free_guidance else image_cond_latents
|
||||
if fun_mask is not None: #for fun img2vid and interpolation
|
||||
fun_inpaint_mask = torch.cat([fun_mask] * 2) if do_classifier_free_guidance else fun_mask
|
||||
masks_input = torch.cat([fun_inpaint_mask, latent_image_input], dim=2)
|
||||
@ -726,7 +733,7 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
||||
# broadcast to batch dimension in a way that's compatible with ONNX/Core ML
|
||||
timestep = t.expand(latent_model_input.shape[0])
|
||||
|
||||
current_step_percentage = i / num_inference_steps
|
||||
|
||||
|
||||
if controlnet is not None:
|
||||
controlnet_states = None
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user