mirror of
https://git.datalinker.icu/kijai/ComfyUI-CogVideoXWrapper.git
synced 2025-12-08 20:34:23 +08:00
Add start/end percent to image_conds
This commit is contained in:
parent
b9688f3cd2
commit
5cc570a467
14
nodes.py
14
nodes.py
@ -221,6 +221,8 @@ class CogVideoImageEncode:
|
|||||||
"enable_tiling": ("BOOLEAN", {"default": False, "tooltip": "Enable tiling for the VAE to reduce memory usage"}),
|
"enable_tiling": ("BOOLEAN", {"default": False, "tooltip": "Enable tiling for the VAE to reduce memory usage"}),
|
||||||
"noise_aug_strength": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001, "tooltip": "Augment image with noise"}),
|
"noise_aug_strength": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001, "tooltip": "Augment image with noise"}),
|
||||||
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
|
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
|
||||||
|
"start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01}),
|
||||||
|
"end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -229,7 +231,7 @@ class CogVideoImageEncode:
|
|||||||
FUNCTION = "encode"
|
FUNCTION = "encode"
|
||||||
CATEGORY = "CogVideoWrapper"
|
CATEGORY = "CogVideoWrapper"
|
||||||
|
|
||||||
def encode(self, vae, start_image, end_image=None, enable_tiling=False, noise_aug_strength=0.0, strength=1.0):
|
def encode(self, vae, start_image, end_image=None, enable_tiling=False, noise_aug_strength=0.0, strength=1.0, start_percent=0.0, end_percent=1.0):
|
||||||
device = mm.get_torch_device()
|
device = mm.get_torch_device()
|
||||||
offload_device = mm.unet_offload_device()
|
offload_device = mm.unet_offload_device()
|
||||||
generator = torch.Generator(device=device).manual_seed(0)
|
generator = torch.Generator(device=device).manual_seed(0)
|
||||||
@ -277,7 +279,11 @@ class CogVideoImageEncode:
|
|||||||
log.info(f"Encoded latents shape: {final_latents.shape}")
|
log.info(f"Encoded latents shape: {final_latents.shape}")
|
||||||
vae.to(offload_device)
|
vae.to(offload_device)
|
||||||
|
|
||||||
return ({"samples": final_latents}, )
|
return ({
|
||||||
|
"samples": final_latents,
|
||||||
|
"start_percent": start_percent,
|
||||||
|
"end_percent": end_percent
|
||||||
|
}, )
|
||||||
|
|
||||||
class CogVideoImageEncodeFunInP:
|
class CogVideoImageEncodeFunInP:
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -608,6 +614,8 @@ class CogVideoSampler:
|
|||||||
if image_cond_latents is not None:
|
if image_cond_latents is not None:
|
||||||
assert supports_image_conds, "Image condition latents only supported for I2V and Interpolation models"
|
assert supports_image_conds, "Image condition latents only supported for I2V and Interpolation models"
|
||||||
image_conds = image_cond_latents["samples"]
|
image_conds = image_cond_latents["samples"]
|
||||||
|
image_cond_start_percent = image_cond_latents.get("start_percent", 0.0)
|
||||||
|
image_cond_end_percent = image_cond_latents.get("end_percent", 1.0)
|
||||||
if "1.5" in model_name or "1_5" in model_name:
|
if "1.5" in model_name or "1_5" in model_name:
|
||||||
image_conds = image_conds / 0.7 # needed for 1.5 models
|
image_conds = image_conds / 0.7 # needed for 1.5 models
|
||||||
else:
|
else:
|
||||||
@ -704,6 +712,8 @@ class CogVideoSampler:
|
|||||||
freenoise=context_options["freenoise"] if context_options is not None else None,
|
freenoise=context_options["freenoise"] if context_options is not None else None,
|
||||||
controlnet=controlnet,
|
controlnet=controlnet,
|
||||||
tora=tora_trajectory if tora_trajectory is not None else None,
|
tora=tora_trajectory if tora_trajectory is not None else None,
|
||||||
|
image_cond_start_percent=image_cond_start_percent,
|
||||||
|
image_cond_end_percent=image_cond_end_percent
|
||||||
)
|
)
|
||||||
if not model["cpu_offloading"] and model["manual_offloading"]:
|
if not model["cpu_offloading"] and model["manual_offloading"]:
|
||||||
pipe.transformer.to(offload_device)
|
pipe.transformer.to(offload_device)
|
||||||
|
|||||||
@ -349,6 +349,8 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|||||||
freenoise: Optional[bool] = True,
|
freenoise: Optional[bool] = True,
|
||||||
controlnet: Optional[dict] = None,
|
controlnet: Optional[dict] = None,
|
||||||
tora: Optional[dict] = None,
|
tora: Optional[dict] = None,
|
||||||
|
image_cond_start_percent: float = 0.0,
|
||||||
|
image_cond_end_percent: float = 1.0,
|
||||||
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@ -708,7 +710,12 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|||||||
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
|
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
|
||||||
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
||||||
|
|
||||||
|
current_step_percentage = i / num_inference_steps
|
||||||
|
|
||||||
if image_cond_latents is not None:
|
if image_cond_latents is not None:
|
||||||
|
if not image_cond_start_percent <= current_step_percentage <= image_cond_end_percent:
|
||||||
|
latent_image_input = torch.zeros_like(latent_model_input)
|
||||||
|
else:
|
||||||
latent_image_input = torch.cat([image_cond_latents] * 2) if do_classifier_free_guidance else image_cond_latents
|
latent_image_input = torch.cat([image_cond_latents] * 2) if do_classifier_free_guidance else image_cond_latents
|
||||||
if fun_mask is not None: #for fun img2vid and interpolation
|
if fun_mask is not None: #for fun img2vid and interpolation
|
||||||
fun_inpaint_mask = torch.cat([fun_mask] * 2) if do_classifier_free_guidance else fun_mask
|
fun_inpaint_mask = torch.cat([fun_mask] * 2) if do_classifier_free_guidance else fun_mask
|
||||||
@ -726,7 +733,7 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|||||||
# broadcast to batch dimension in a way that's compatible with ONNX/Core ML
|
# broadcast to batch dimension in a way that's compatible with ONNX/Core ML
|
||||||
timestep = t.expand(latent_model_input.shape[0])
|
timestep = t.expand(latent_model_input.shape[0])
|
||||||
|
|
||||||
current_step_percentage = i / num_inference_steps
|
|
||||||
|
|
||||||
if controlnet is not None:
|
if controlnet is not None:
|
||||||
controlnet_states = None
|
controlnet_states = None
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user