mirror of
https://git.datalinker.icu/kijai/ComfyUI-CogVideoXWrapper.git
synced 2025-12-08 20:34:23 +08:00
Add CogVideoX-Fun-V1.1-5b-Control
https://huggingface.co/alibaba-pai/CogVideoX-Fun-V1.1-5b-Control
This commit is contained in:
parent
c9efefe736
commit
ecd067260c
@ -108,6 +108,7 @@ class DownloadAndLoadCogVideoModel:
|
||||
"alibaba-pai/CogVideoX-Fun-V1.1-5b-InP",
|
||||
"alibaba-pai/CogVideoX-Fun-V1.1-2b-Pose",
|
||||
"alibaba-pai/CogVideoX-Fun-V1.1-5b-Pose",
|
||||
"alibaba-pai/CogVideoX-Fun-V1.1-5b-Control",
|
||||
"feizhengcong/CogvideoX-Interpolation",
|
||||
"NimVideo/cogvideox-2b-img2vid"
|
||||
],
|
||||
@ -233,7 +234,7 @@ class DownloadAndLoadCogVideoModel:
|
||||
transformer,
|
||||
scheduler,
|
||||
dtype=dtype,
|
||||
is_fun_inpaint=True if "fun" in model.lower() and "pose" not in model.lower() else False
|
||||
is_fun_inpaint="fun" in model.lower() and not ("pose" in model.lower() or "control" in model.lower())
|
||||
)
|
||||
if "cogvideox-2b-img2vid" in model:
|
||||
pipe.input_with_padding = False
|
||||
@ -255,7 +256,6 @@ class DownloadAndLoadCogVideoModel:
|
||||
adapter_weight = l['strength']
|
||||
pipe.load_lora_weights(l['path'], weight_name=l['path'].split("/")[-1], lora_rank=lora_rank, adapter_name=adapter_name)
|
||||
|
||||
#transformer = load_lora_into_transformer(lora, transformer)
|
||||
adapter_list.append(adapter_name)
|
||||
adapter_weights.append(adapter_weight)
|
||||
for l in lora:
|
||||
@ -549,7 +549,12 @@ class DownloadAndLoadCogVideoGGUFModel:
|
||||
vae = AutoencoderKLCogVideoX.from_config(vae_config).to(vae_dtype).to(offload_device)
|
||||
vae.load_state_dict(vae_sd)
|
||||
del vae_sd
|
||||
pipe = CogVideoXPipeline(transformer, scheduler, dtype=vae_dtype)
|
||||
pipe = CogVideoXPipeline(
|
||||
transformer,
|
||||
scheduler,
|
||||
dtype=vae_dtype,
|
||||
is_fun_inpaint="fun" in model.lower() and not ("pose" in model.lower() or "control" in model.lower())
|
||||
)
|
||||
|
||||
if enable_sequential_cpu_offload:
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
@ -675,7 +680,6 @@ class CogVideoXModelLoader:
|
||||
set_module_tensor_to_device(transformer, name, device=transformer_load_device, dtype=base_dtype, value=sd[name])
|
||||
del sd
|
||||
|
||||
|
||||
#scheduler
|
||||
with open(scheduler_config_path) as f:
|
||||
scheduler_config = json.load(f)
|
||||
@ -692,14 +696,12 @@ class CogVideoXModelLoader:
|
||||
module.fuse_projections(fuse=True)
|
||||
transformer.attention_mode = attention_mode
|
||||
|
||||
if "fun" in model_type:
|
||||
if not "pose" in model_type:
|
||||
raise NotImplementedError("Fun models besides pose are not supported with this loader yet")
|
||||
pipe = CogVideoX_Fun_Pipeline_Inpaint(vae, transformer, scheduler)
|
||||
else:
|
||||
pipe = CogVideoXPipeline(transformer, scheduler, dtype=base_dtype)
|
||||
else:
|
||||
pipe = CogVideoXPipeline(transformer, scheduler, dtype=base_dtype)
|
||||
pipe = CogVideoXPipeline(
|
||||
transformer,
|
||||
scheduler,
|
||||
dtype=base_dtype,
|
||||
is_fun_inpaint="fun" in model.lower() and not ("pose" in model.lower() or "control" in model.lower())
|
||||
)
|
||||
|
||||
if enable_sequential_cpu_offload:
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
@ -796,11 +798,6 @@ class CogVideoXModelLoader:
|
||||
|
||||
manual_offloading = False # to disable manual .to(device) calls
|
||||
log.info(f"Quantized transformer blocks to {quantization}")
|
||||
|
||||
# if load_device == "offload_device":
|
||||
# pipe.transformer.to(offload_device)
|
||||
# else:
|
||||
# pipe.transformer.to(device)
|
||||
|
||||
pipeline = {
|
||||
"pipe": pipe,
|
||||
@ -812,7 +809,6 @@ class CogVideoXModelLoader:
|
||||
"model_name": model,
|
||||
"manual_offloading": manual_offloading,
|
||||
}
|
||||
|
||||
return (pipeline,)
|
||||
|
||||
#region VAE
|
||||
|
||||
10
nodes.py
10
nodes.py
@ -343,13 +343,10 @@ class CogVideoImageEncodeFunInP:
|
||||
|
||||
bs = 1
|
||||
new_mask_pixel_values = []
|
||||
print("input_image shape: ",input_image.shape)
|
||||
for i in range(0, input_image.shape[0], bs):
|
||||
mask_pixel_values_bs = input_image[i : i + bs]
|
||||
mask_pixel_values_bs = vae.encode(mask_pixel_values_bs)[0]
|
||||
print("mask_pixel_values_bs: ",mask_pixel_values_bs.parameters.shape)
|
||||
mask_pixel_values_bs = mask_pixel_values_bs.mode()
|
||||
print("mask_pixel_values_bs: ",mask_pixel_values_bs.shape, mask_pixel_values_bs.min(), mask_pixel_values_bs.max())
|
||||
new_mask_pixel_values.append(mask_pixel_values_bs)
|
||||
masked_image_latents = torch.cat(new_mask_pixel_values, dim = 0)
|
||||
masked_image_latents = masked_image_latents.permute(0, 2, 1, 3, 4) # B, T, C, H, W
|
||||
@ -601,8 +598,7 @@ class CogVideoSampler:
|
||||
|
||||
model_name = model.get("model_name", "")
|
||||
supports_image_conds = True if "I2V" in model_name or "interpolation" in model_name.lower() or "fun" in model_name.lower() else False
|
||||
|
||||
if "fun" in model_name.lower() and "pose" not in model_name.lower() and image_cond_latents is not None:
|
||||
if "fun" in model_name.lower() and not ("pose" in model_name.lower() or "control" in model_name.lower()) and image_cond_latents is not None:
|
||||
assert image_cond_latents["mask"] is not None, "For fun inpaint models use CogVideoImageEncodeFunInP"
|
||||
fun_mask = image_cond_latents["mask"]
|
||||
else:
|
||||
@ -855,8 +851,8 @@ class CogVideoXFunResizeToClosestBucket:
|
||||
from .cogvideox_fun.utils import ASPECT_RATIO_512, get_closest_ratio
|
||||
|
||||
B, H, W, C = images.shape
|
||||
# Count most suitable height and width
|
||||
aspect_ratio_sample_size = {key : [x / 512 * base_resolution for x in ASPECT_RATIO_512[key]] for key in ASPECT_RATIO_512.keys()}
|
||||
# Find most suitable height and width
|
||||
aspect_ratio_sample_size = {key : [x / 512 * base_resolution for x in ASPECT_RATIO_512[key]] for key in ASPECT_RATIO_512.keys()}
|
||||
|
||||
closest_size, closest_ratio = get_closest_ratio(H, W, ratios=aspect_ratio_sample_size)
|
||||
height, width = [int(x / 16) * 16 for x in closest_size]
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
# WORK IN PROGRESS
|
||||
|
||||
Spreadsheet (WIP) of supported models and their supported features: https://docs.google.com/spreadsheets/d/16eA6mSL8XkTcu9fSWkPSHfRIqyAKJbR1O99xnuGdCKY/edit?usp=sharing
|
||||
|
||||
## BREAKING Update8
|
||||
|
||||
This is big one, and unfortunately to do the necessary cleanup and refactoring this will break every old workflow as they are.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user