mirror of
https://git.datalinker.icu/kijai/ComfyUI-CogVideoXWrapper.git
synced 2025-12-08 20:34:23 +08:00
1031 lines
46 KiB
Python
1031 lines
46 KiB
Python
import os
|
|
import json
|
|
import folder_paths
|
|
import comfy.model_management as mm
|
|
from typing import Union
|
|
|
|
def patched_write_atomic(
|
|
path_: str,
|
|
content: Union[str, bytes],
|
|
make_dirs: bool = False,
|
|
encode_utf_8: bool = False,
|
|
) -> None:
|
|
# Write into temporary file first to avoid conflicts between threads
|
|
# Avoid using a named temporary file, as those have restricted permissions
|
|
from pathlib import Path
|
|
import os
|
|
import shutil
|
|
import threading
|
|
assert isinstance(
|
|
content, (str, bytes)
|
|
), "Only strings and byte arrays can be saved in the cache"
|
|
path = Path(path_)
|
|
if make_dirs:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
tmp_path = path.parent / f".{os.getpid()}.{threading.get_ident()}.tmp"
|
|
write_mode = "w" if isinstance(content, str) else "wb"
|
|
with tmp_path.open(write_mode, encoding="utf-8" if encode_utf_8 else None) as f:
|
|
f.write(content)
|
|
shutil.copy2(src=tmp_path, dst=path) #changed to allow overwriting cache files
|
|
os.remove(tmp_path)
|
|
try:
|
|
import torch._inductor.codecache
|
|
torch._inductor.codecache.write_atomic = patched_write_atomic
|
|
except:
|
|
pass
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
|
|
from diffusers.models import AutoencoderKLCogVideoX
|
|
from diffusers.schedulers import CogVideoXDDIMScheduler
|
|
from .custom_cogvideox_transformer_3d import CogVideoXTransformer3DModel
|
|
from .pipeline_cogvideox import CogVideoXPipeline
|
|
from contextlib import nullcontext
|
|
|
|
from accelerate import init_empty_weights
|
|
from accelerate.utils import set_module_tensor_to_device
|
|
|
|
from .utils import remove_specific_blocks, log
|
|
from comfy.utils import load_torch_file
|
|
|
|
script_directory = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
class CogVideoLoraSelect:
|
|
@classmethod
|
|
def INPUT_TYPES(s):
|
|
return {
|
|
"required": {
|
|
"lora": (folder_paths.get_filename_list("cogvideox_loras"),
|
|
{"tooltip": "LORA models are expected to be in ComfyUI/models/CogVideo/loras with .safetensors extension"}),
|
|
"strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.0001, "tooltip": "LORA strength, set to 0.0 to unmerge the LORA"}),
|
|
},
|
|
"optional": {
|
|
"prev_lora":("COGLORA", {"default": None, "tooltip": "For loading multiple LoRAs"}),
|
|
"fuse_lora": ("BOOLEAN", {"default": False, "tooltip": "Fuse the LoRA weights into the transformer"}),
|
|
}
|
|
}
|
|
|
|
RETURN_TYPES = ("COGLORA",)
|
|
RETURN_NAMES = ("lora", )
|
|
FUNCTION = "getlorapath"
|
|
CATEGORY = "CogVideoWrapper"
|
|
|
|
def getlorapath(self, lora, strength, prev_lora=None, fuse_lora=False):
|
|
cog_loras_list = []
|
|
|
|
cog_lora = {
|
|
"path": folder_paths.get_full_path("cogvideox_loras", lora),
|
|
"strength": strength,
|
|
"name": lora.split(".")[0],
|
|
"fuse_lora": fuse_lora
|
|
}
|
|
if prev_lora is not None:
|
|
cog_loras_list.extend(prev_lora)
|
|
|
|
cog_loras_list.append(cog_lora)
|
|
print(cog_loras_list)
|
|
return (cog_loras_list,)
|
|
|
|
#region DownloadAndLoadCogVideoModel
|
|
class DownloadAndLoadCogVideoModel:
|
|
@classmethod
|
|
def INPUT_TYPES(s):
|
|
return {
|
|
"required": {
|
|
"model": (
|
|
[
|
|
"THUDM/CogVideoX-2b",
|
|
"THUDM/CogVideoX-5b",
|
|
"THUDM/CogVideoX-5b-I2V",
|
|
"kijai/CogVideoX-5b-1.5-T2V",
|
|
"kijai/CogVideoX-5b-1.5-I2V",
|
|
"bertjiazheng/KoolCogVideoX-5b",
|
|
"kijai/CogVideoX-Fun-2b",
|
|
"kijai/CogVideoX-Fun-5b",
|
|
"kijai/CogVideoX-5b-Tora",
|
|
"alibaba-pai/CogVideoX-Fun-V1.1-2b-InP",
|
|
"alibaba-pai/CogVideoX-Fun-V1.1-5b-InP",
|
|
"alibaba-pai/CogVideoX-Fun-V1.1-2b-Pose",
|
|
"alibaba-pai/CogVideoX-Fun-V1.1-5b-Pose",
|
|
"feizhengcong/CogvideoX-Interpolation",
|
|
"NimVideo/cogvideox-2b-img2vid"
|
|
],
|
|
),
|
|
|
|
},
|
|
"optional": {
|
|
"precision": (["fp16", "fp32", "bf16"],
|
|
{"default": "bf16", "tooltip": "official recommendation is that 2b model should be fp16, 5b model should be bf16"}
|
|
),
|
|
"quantization": (['disabled', 'fp8_e4m3fn', 'fp8_e4m3fn_fastmode', 'torchao_fp8dq', "torchao_fp8dqrow", "torchao_int8dq", "torchao_fp6"], {"default": 'disabled', "tooltip": "enabled casts the transformer to torch.float8_e4m3fn, fastmode is only for latest nvidia GPUs and requires torch 2.4.0 and cu124 minimum"}),
|
|
"enable_sequential_cpu_offload": ("BOOLEAN", {"default": False, "tooltip": "significantly reducing memory usage and slows down the inference"}),
|
|
"block_edit": ("TRANSFORMERBLOCKS", {"default": None}),
|
|
"lora": ("COGLORA", {"default": None}),
|
|
"compile_args":("COMPILEARGS", ),
|
|
"attention_mode": (["sdpa", "sageattn", "fused_sdpa", "fused_sageattn", "comfy"], {"default": "sdpa"}),
|
|
"load_device": (["main_device", "offload_device"], {"default": "main_device"}),
|
|
}
|
|
}
|
|
|
|
RETURN_TYPES = ("COGVIDEOMODEL", "VAE",)
|
|
RETURN_NAMES = ("model", "vae", )
|
|
FUNCTION = "loadmodel"
|
|
CATEGORY = "CogVideoWrapper"
|
|
DESCRIPTION = "Downloads and loads the selected CogVideo model from Huggingface to 'ComfyUI/models/CogVideo'"
|
|
|
|
def loadmodel(self, model, precision, quantization="disabled", compile="disabled",
|
|
enable_sequential_cpu_offload=False, block_edit=None, lora=None, compile_args=None,
|
|
attention_mode="sdpa", load_device="main_device"):
|
|
|
|
if precision == "fp16" and "1.5" in model:
|
|
raise ValueError("1.5 models do not currently work in fp16")
|
|
|
|
device = mm.get_torch_device()
|
|
offload_device = mm.unet_offload_device()
|
|
manual_offloading = True
|
|
transformer_load_device = device if load_device == "main_device" else offload_device
|
|
mm.soft_empty_cache()
|
|
|
|
dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[precision]
|
|
download_path = folder_paths.get_folder_paths("CogVideo")[0]
|
|
|
|
if "Fun" in model:
|
|
if not "1.1" in model:
|
|
repo_id = "kijai/CogVideoX-Fun-pruned"
|
|
if "2b" in model:
|
|
base_path = os.path.join(folder_paths.models_dir, "CogVideoX_Fun", "CogVideoX-Fun-2b-InP") # location of the official model
|
|
if not os.path.exists(base_path):
|
|
base_path = os.path.join(download_path, "CogVideoX-Fun-2b-InP")
|
|
elif "5b" in model:
|
|
base_path = os.path.join(folder_paths.models_dir, "CogVideoX_Fun", "CogVideoX-Fun-5b-InP") # location of the official model
|
|
if not os.path.exists(base_path):
|
|
base_path = os.path.join(download_path, "CogVideoX-Fun-5b-InP")
|
|
elif "1.1" in model:
|
|
repo_id = model
|
|
base_path = os.path.join(folder_paths.models_dir, "CogVideoX_Fun", (model.split("/")[-1])) # location of the official model
|
|
if not os.path.exists(base_path):
|
|
base_path = os.path.join(download_path, (model.split("/")[-1]))
|
|
download_path = base_path
|
|
subfolder = "transformer"
|
|
allow_patterns = ["*transformer*", "*scheduler*", "*vae*"]
|
|
|
|
elif "2b" in model:
|
|
if 'img2vid' in model:
|
|
base_path = os.path.join(download_path, "cogvideox-2b-img2vid")
|
|
download_path = base_path
|
|
repo_id = model
|
|
else:
|
|
base_path = os.path.join(download_path, "CogVideo2B")
|
|
download_path = base_path
|
|
repo_id = model
|
|
subfolder = "transformer"
|
|
allow_patterns = ["*transformer*", "*scheduler*", "*vae*"]
|
|
elif "1.5-T2V" in model or "1.5-I2V" in model:
|
|
base_path = os.path.join(download_path, "CogVideoX-5b-1.5")
|
|
download_path = base_path
|
|
subfolder = "transformer_T2V" if "1.5-T2V" in model else "transformer_I2V"
|
|
allow_patterns = [f"*{subfolder}*", "*vae*", "*scheduler*"]
|
|
repo_id = "kijai/CogVideoX-5b-1.5"
|
|
else:
|
|
base_path = os.path.join(download_path, (model.split("/")[-1]))
|
|
download_path = base_path
|
|
repo_id = model
|
|
subfolder = "transformer"
|
|
allow_patterns = ["*transformer*", "*scheduler*", "*vae*"]
|
|
|
|
if "2b" in model:
|
|
scheduler_path = os.path.join(script_directory, 'configs', 'scheduler_config_2b.json')
|
|
else:
|
|
scheduler_path = os.path.join(script_directory, 'configs', 'scheduler_config_5b.json')
|
|
|
|
if not os.path.exists(base_path) or not os.path.exists(os.path.join(base_path, subfolder)):
|
|
log.info(f"Downloading model to: {base_path}")
|
|
from huggingface_hub import snapshot_download
|
|
|
|
snapshot_download(
|
|
repo_id=repo_id,
|
|
allow_patterns=allow_patterns,
|
|
ignore_patterns=["*text_encoder*", "*tokenizer*"],
|
|
local_dir=download_path,
|
|
local_dir_use_symlinks=False,
|
|
)
|
|
|
|
transformer = CogVideoXTransformer3DModel.from_pretrained(base_path, subfolder=subfolder)
|
|
transformer = transformer.to(dtype).to(transformer_load_device)
|
|
|
|
if "1.5" in model:
|
|
transformer.config.sample_height = 300
|
|
transformer.config.sample_width = 300
|
|
|
|
if block_edit is not None:
|
|
transformer = remove_specific_blocks(transformer, block_edit)
|
|
|
|
with open(scheduler_path) as f:
|
|
scheduler_config = json.load(f)
|
|
scheduler = CogVideoXDDIMScheduler.from_config(scheduler_config)
|
|
|
|
# VAE
|
|
vae = AutoencoderKLCogVideoX.from_pretrained(base_path, subfolder="vae").to(dtype).to(offload_device)
|
|
|
|
#pipeline
|
|
pipe = CogVideoXPipeline(
|
|
transformer,
|
|
scheduler,
|
|
dtype=dtype,
|
|
is_fun_inpaint=True if "fun" in model.lower() and "pose" not in model.lower() else False
|
|
)
|
|
if "cogvideox-2b-img2vid" in model:
|
|
pipe.input_with_padding = False
|
|
|
|
#LoRAs
|
|
if lora is not None:
|
|
try:
|
|
adapter_list = []
|
|
adapter_weights = []
|
|
for l in lora:
|
|
fuse = True if l["fuse_lora"] else False
|
|
lora_sd = load_torch_file(l["path"])
|
|
for key, val in lora_sd.items():
|
|
if "lora_B" in key:
|
|
lora_rank = val.shape[1]
|
|
break
|
|
log.info(f"Merging rank {lora_rank} LoRA weights from {l['path']} with strength {l['strength']}")
|
|
adapter_name = l['path'].split("/")[-1].split(".")[0]
|
|
adapter_weight = l['strength']
|
|
pipe.load_lora_weights(l['path'], weight_name=l['path'].split("/")[-1], lora_rank=lora_rank, adapter_name=adapter_name)
|
|
|
|
#transformer = load_lora_into_transformer(lora, transformer)
|
|
adapter_list.append(adapter_name)
|
|
adapter_weights.append(adapter_weight)
|
|
for l in lora:
|
|
pipe.set_adapters(adapter_list, adapter_weights=adapter_weights)
|
|
if fuse:
|
|
lora_scale = 1
|
|
dimension_loras = ["orbit", "dimensionx"] # for now dimensionx loras need scaling
|
|
if any(item in lora[-1]["path"].lower() for item in dimension_loras):
|
|
lora_scale = lora_scale / lora_rank
|
|
pipe.fuse_lora(lora_scale=lora_scale, components=["transformer"])
|
|
except: #Fun trainer LoRAs are loaded differently
|
|
from .lora_utils import merge_lora
|
|
for l in lora:
|
|
log.info(f"Merging LoRA weights from {l['path']} with strength {l['strength']}")
|
|
transformer = merge_lora(transformer, l["path"], l["strength"])
|
|
|
|
if "fused" in attention_mode:
|
|
from diffusers.models.attention import Attention
|
|
transformer.fuse_qkv_projections = True
|
|
for module in transformer.modules():
|
|
if isinstance(module, Attention):
|
|
module.fuse_projections(fuse=True)
|
|
transformer.attention_mode = attention_mode
|
|
|
|
if compile_args is not None:
|
|
pipe.transformer.to(memory_format=torch.channels_last)
|
|
|
|
#fp8
|
|
if quantization == "fp8_e4m3fn" or quantization == "fp8_e4m3fn_fastmode":
|
|
params_to_keep = {"patch_embed", "lora", "pos_embedding", "time_embedding", "norm_k", "norm_q", "to_k.bias", "to_q.bias", "to_v.bias"}
|
|
if "1.5" in model:
|
|
params_to_keep.update({"norm1.linear.weight", "ofs_embedding", "norm_final", "norm_out", "proj_out"})
|
|
for name, param in pipe.transformer.named_parameters():
|
|
if not any(keyword in name for keyword in params_to_keep):
|
|
param.data = param.data.to(torch.float8_e4m3fn)
|
|
|
|
if quantization == "fp8_e4m3fn_fastmode":
|
|
from .fp8_optimization import convert_fp8_linear
|
|
if "1.5" in model:
|
|
params_to_keep.update({"ff"}) #otherwise NaNs
|
|
convert_fp8_linear(pipe.transformer, dtype, params_to_keep=params_to_keep)
|
|
|
|
# compilation
|
|
if compile_args is not None:
|
|
torch._dynamo.config.cache_size_limit = compile_args["dynamo_cache_size_limit"]
|
|
for i, block in enumerate(pipe.transformer.transformer_blocks):
|
|
if "CogVideoXBlock" in str(block):
|
|
pipe.transformer.transformer_blocks[i] = torch.compile(block, fullgraph=compile_args["fullgraph"], dynamic=compile_args["dynamic"], backend=compile_args["backend"], mode=compile_args["mode"])
|
|
|
|
if "torchao" in quantization:
|
|
try:
|
|
from torchao.quantization import (
|
|
quantize_,
|
|
fpx_weight_only,
|
|
float8_dynamic_activation_float8_weight,
|
|
int8_dynamic_activation_int8_weight
|
|
)
|
|
except:
|
|
raise ImportError("torchao is not installed, please install torchao to use fp8dq")
|
|
|
|
def filter_fn(module: nn.Module, fqn: str) -> bool:
|
|
target_submodules = {'attn1', 'ff'} # avoid norm layers, 1.5 at least won't work with quantized norm1 #todo: test other models
|
|
if any(sub in fqn for sub in target_submodules):
|
|
return isinstance(module, nn.Linear)
|
|
return False
|
|
|
|
if "fp6" in quantization: #slower for some reason on 4090
|
|
quant_func = fpx_weight_only(3, 2)
|
|
elif "fp8dq" in quantization: #very fast on 4090 when compiled
|
|
quant_func = float8_dynamic_activation_float8_weight()
|
|
elif 'fp8dqrow' in quantization:
|
|
from torchao.quantization.quant_api import PerRow
|
|
quant_func = float8_dynamic_activation_float8_weight(granularity=PerRow())
|
|
elif 'int8dq' in quantization:
|
|
quant_func = int8_dynamic_activation_int8_weight()
|
|
|
|
for i, block in enumerate(pipe.transformer.transformer_blocks):
|
|
if "CogVideoXBlock" in str(block):
|
|
quantize_(block, quant_func, filter_fn=filter_fn)
|
|
|
|
manual_offloading = False # to disable manual .to(device) calls
|
|
|
|
if enable_sequential_cpu_offload:
|
|
pipe.enable_sequential_cpu_offload()
|
|
manual_offloading = False
|
|
|
|
# CogVideoXBlock(
|
|
# (norm1): CogVideoXLayerNormZero(
|
|
# (silu): SiLU()
|
|
# (linear): Linear(in_features=512, out_features=18432, bias=True)
|
|
# (norm): LayerNorm((3072,), eps=1e-05, elementwise_affine=True)
|
|
# )
|
|
# (attn1): Attention(
|
|
# (norm_q): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
|
|
# (norm_k): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
|
|
# (to_q): Linear(in_features=3072, out_features=3072, bias=True)
|
|
# (to_k): Linear(in_features=3072, out_features=3072, bias=True)
|
|
# (to_v): Linear(in_features=3072, out_features=3072, bias=True)
|
|
# (to_out): ModuleList(
|
|
# (0): Linear(in_features=3072, out_features=3072, bias=True)
|
|
# (1): Dropout(p=0.0, inplace=False)
|
|
# )
|
|
# )
|
|
# (norm2): CogVideoXLayerNormZero(
|
|
# (silu): SiLU()
|
|
# (linear): Linear(in_features=512, out_features=18432, bias=True)
|
|
# (norm): LayerNorm((3072,), eps=1e-05, elementwise_affine=True)
|
|
# )
|
|
# (ff): FeedForward(
|
|
# (net): ModuleList(
|
|
# (0): GELU(
|
|
# (proj): Linear(in_features=3072, out_features=12288, bias=True)
|
|
# )
|
|
# (1): Dropout(p=0.0, inplace=False)
|
|
# (2): Linear(in_features=12288, out_features=3072, bias=True)
|
|
# (3): Dropout(p=0.0, inplace=False)
|
|
# )
|
|
# )
|
|
# )
|
|
|
|
# if compile == "onediff":
|
|
# from onediffx import compile_pipe
|
|
# os.environ['NEXFORT_FX_FORCE_TRITON_SDPA'] = '1'
|
|
|
|
# pipe = compile_pipe(
|
|
# pipe,
|
|
# backend="nexfort",
|
|
# options= {"mode": "max-optimize:max-autotune:max-autotune", "memory_format": "channels_last", "options": {"inductor.optimize_linear_epilogue": False, "triton.fuse_attention_allow_fp16_reduction": False}},
|
|
# ignores=["vae"],
|
|
# fuse_qkv_projections= False,
|
|
# )
|
|
|
|
pipeline = {
|
|
"pipe": pipe,
|
|
"dtype": dtype,
|
|
"base_path": base_path,
|
|
"onediff": True if compile == "onediff" else False,
|
|
"cpu_offloading": enable_sequential_cpu_offload,
|
|
"manual_offloading": manual_offloading,
|
|
"scheduler_config": scheduler_config,
|
|
"model_name": model,
|
|
}
|
|
|
|
return (pipeline, vae)
|
|
#region GGUF
|
|
class DownloadAndLoadCogVideoGGUFModel:
|
|
@classmethod
|
|
def INPUT_TYPES(s):
|
|
return {
|
|
"required": {
|
|
"model": (
|
|
[
|
|
"CogVideoX_5b_GGUF_Q4_0.safetensors",
|
|
"CogVideoX_5b_I2V_GGUF_Q4_0.safetensors",
|
|
"CogVideoX_5b_1_5_I2V_GGUF_Q4_0.safetensors",
|
|
"CogVideoX_5b_fun_GGUF_Q4_0.safetensors",
|
|
"CogVideoX_5b_fun_1_1_GGUF_Q4_0.safetensors",
|
|
"CogVideoX_5b_fun_1_1_Pose_GGUF_Q4_0.safetensors",
|
|
"CogVideoX_5b_Interpolation_GGUF_Q4_0.safetensors",
|
|
"CogVideoX_5b_Tora_GGUF_Q4_0.safetensors",
|
|
],
|
|
),
|
|
"vae_precision": (["fp16", "fp32", "bf16"], {"default": "bf16", "tooltip": "VAE dtype"}),
|
|
"fp8_fastmode": ("BOOLEAN", {"default": False, "tooltip": "only supported on 4090 and later GPUs, also requires torch 2.4.0 with cu124 minimum"}),
|
|
"load_device": (["main_device", "offload_device"], {"default": "main_device"}),
|
|
"enable_sequential_cpu_offload": ("BOOLEAN", {"default": False, "tooltip": "significantly reducing memory usage and slows down the inference"}),
|
|
},
|
|
"optional": {
|
|
"block_edit": ("TRANSFORMERBLOCKS", {"default": None}),
|
|
#"lora": ("COGLORA", {"default": None}),
|
|
"compile": (["disabled","torch"], {"tooltip": "compile the model for faster inference, these are advanced options only available on Linux, see readme for more info"}),
|
|
"attention_mode": (["sdpa", "sageattn"], {"default": "sdpa"}),
|
|
}
|
|
}
|
|
|
|
RETURN_TYPES = ("COGVIDEOMODEL", "VAE",)
|
|
RETURN_NAMES = ("model", "vae",)
|
|
FUNCTION = "loadmodel"
|
|
CATEGORY = "CogVideoWrapper"
|
|
|
|
def loadmodel(self, model, vae_precision, fp8_fastmode, load_device, enable_sequential_cpu_offload,
|
|
block_edit=None, compile="disabled", attention_mode="sdpa"):
|
|
|
|
device = mm.get_torch_device()
|
|
offload_device = mm.unet_offload_device()
|
|
mm.soft_empty_cache()
|
|
|
|
vae_dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[vae_precision]
|
|
download_path = os.path.join(folder_paths.models_dir, 'CogVideo', 'GGUF')
|
|
gguf_path = os.path.join(folder_paths.models_dir, 'diffusion_models', model) # check MinusZone's model path first
|
|
if not os.path.exists(gguf_path):
|
|
gguf_path = os.path.join(download_path, model)
|
|
if not os.path.exists(gguf_path):
|
|
if "I2V" in model or "1_1" in model or "Interpolation" in model or "Tora" in model:
|
|
repo_id = "Kijai/CogVideoX_GGUF"
|
|
else:
|
|
repo_id = "MinusZoneAI/ComfyUI-CogVideoX-MZ"
|
|
log.info(f"Downloading model to: {gguf_path}")
|
|
from huggingface_hub import snapshot_download
|
|
|
|
snapshot_download(
|
|
repo_id=repo_id,
|
|
allow_patterns=[f"*{model}*"],
|
|
local_dir=download_path,
|
|
local_dir_use_symlinks=False,
|
|
)
|
|
|
|
if "5b" in model:
|
|
scheduler_path = os.path.join(script_directory, 'configs', 'scheduler_config_5b.json')
|
|
transformer_path = os.path.join(script_directory, 'configs', 'transformer_config_5b.json')
|
|
elif "2b" in model:
|
|
scheduler_path = os.path.join(script_directory, 'configs', 'scheduler_config_2b.json')
|
|
transformer_path = os.path.join(script_directory, 'configs', 'transformer_config_2b.json')
|
|
|
|
with open(transformer_path) as f:
|
|
transformer_config = json.load(f)
|
|
|
|
|
|
from . import mz_gguf_loader
|
|
import importlib
|
|
importlib.reload(mz_gguf_loader)
|
|
|
|
with mz_gguf_loader.quantize_lazy_load():
|
|
if "fun" in model:
|
|
if "Pose" in model:
|
|
transformer_config["in_channels"] = 32
|
|
else:
|
|
transformer_config["in_channels"] = 33
|
|
elif "I2V" in model or "Interpolation" in model:
|
|
transformer_config["in_channels"] = 32
|
|
if "1_5" in model:
|
|
transformer_config["ofs_embed_dim"] = 512
|
|
transformer_config["use_learned_positional_embeddings"] = False
|
|
transformer_config["patch_size_t"] = 2
|
|
transformer_config["patch_bias"] = False
|
|
transformer_config["sample_height"] = 300
|
|
transformer_config["sample_width"] = 300
|
|
else:
|
|
transformer_config["in_channels"] = 16
|
|
|
|
transformer = CogVideoXTransformer3DModel.from_config(transformer_config)
|
|
|
|
params_to_keep = {"patch_embed", "pos_embedding", "time_embedding"}
|
|
if "2b" in model:
|
|
cast_dtype = torch.float16
|
|
elif "1_5" in model:
|
|
params_to_keep = {"norm1.linear.weight", "patch_embed", "time_embedding", "ofs_embedding", "norm_final", "norm_out", "proj_out"}
|
|
cast_dtype = torch.bfloat16
|
|
for name, param in transformer.named_parameters():
|
|
if not any(keyword in name for keyword in params_to_keep):
|
|
param.data = param.data.to(torch.float8_e4m3fn)
|
|
else:
|
|
param.data = param.data.to(cast_dtype)
|
|
#for name, param in transformer.named_parameters():
|
|
# print(name, param.data.dtype)
|
|
|
|
if block_edit is not None:
|
|
transformer = remove_specific_blocks(transformer, block_edit)
|
|
|
|
transformer.attention_mode = attention_mode
|
|
|
|
if fp8_fastmode:
|
|
params_to_keep = {"patch_embed", "lora", "pos_embedding", "time_embedding"}
|
|
if "1.5" in model:
|
|
params_to_keep.update({"ff","norm1.linear.weight", "norm_k", "norm_q","ofs_embedding", "norm_final", "norm_out", "proj_out"})
|
|
from .fp8_optimization import convert_fp8_linear
|
|
convert_fp8_linear(transformer, vae_dtype, params_to_keep=params_to_keep)
|
|
|
|
if compile == "torch":
|
|
# compilation
|
|
for i, block in enumerate(transformer.transformer_blocks):
|
|
transformer.transformer_blocks[i] = torch.compile(block, fullgraph=False, dynamic=False, backend="inductor")
|
|
with open(scheduler_path) as f:
|
|
scheduler_config = json.load(f)
|
|
|
|
scheduler = CogVideoXDDIMScheduler.from_config(scheduler_config, subfolder="scheduler")
|
|
|
|
# VAE
|
|
vae_dl_path = os.path.join(folder_paths.models_dir, 'CogVideo', 'VAE')
|
|
vae_path = os.path.join(vae_dl_path, "cogvideox_vae.safetensors")
|
|
if not os.path.exists(vae_path):
|
|
log.info(f"Downloading VAE model to: {vae_path}")
|
|
from huggingface_hub import snapshot_download
|
|
|
|
snapshot_download(
|
|
repo_id="Kijai/CogVideoX-Fun-pruned",
|
|
allow_patterns=["*cogvideox_vae.safetensors*"],
|
|
local_dir=vae_dl_path,
|
|
local_dir_use_symlinks=False,
|
|
)
|
|
with open(os.path.join(script_directory, 'configs', 'vae_config.json')) as f:
|
|
vae_config = json.load(f)
|
|
|
|
#VAE
|
|
vae_sd = load_torch_file(vae_path)
|
|
vae = AutoencoderKLCogVideoX.from_config(vae_config).to(vae_dtype).to(offload_device)
|
|
vae.load_state_dict(vae_sd)
|
|
del vae_sd
|
|
pipe = CogVideoXPipeline(transformer, scheduler, dtype=vae_dtype)
|
|
|
|
if enable_sequential_cpu_offload:
|
|
pipe.enable_sequential_cpu_offload()
|
|
|
|
sd = load_torch_file(gguf_path)
|
|
pipe.transformer = mz_gguf_loader.quantize_load_state_dict(pipe.transformer, sd, device="cpu")
|
|
del sd
|
|
|
|
if load_device == "offload_device":
|
|
pipe.transformer.to(offload_device)
|
|
else:
|
|
pipe.transformer.to(device)
|
|
|
|
pipeline = {
|
|
"pipe": pipe,
|
|
"dtype": vae_dtype,
|
|
"base_path": model,
|
|
"onediff": False,
|
|
"cpu_offloading": enable_sequential_cpu_offload,
|
|
"scheduler_config": scheduler_config,
|
|
"model_name": model,
|
|
"manual_offloading": True,
|
|
}
|
|
|
|
return (pipeline, vae)
|
|
|
|
#region ModelLoader
|
|
class CogVideoXModelLoader:
|
|
@classmethod
|
|
def INPUT_TYPES(s):
|
|
return {
|
|
"required": {
|
|
"model": (folder_paths.get_filename_list("diffusion_models"), {"tooltip": "The name of the checkpoint (model) to load.",}),
|
|
|
|
"base_precision": (["fp16", "fp32", "bf16"], {"default": "bf16"}),
|
|
"quantization": (['disabled', 'fp8_e4m3fn', 'fp8_e4m3fn_fast', 'torchao_fp8dq', "torchao_fp8dqrow", "torchao_int8dq", "torchao_fp6"], {"default": 'disabled', "tooltip": "optional quantization method"}),
|
|
"load_device": (["main_device", "offload_device"], {"default": "main_device"}),
|
|
"enable_sequential_cpu_offload": ("BOOLEAN", {"default": False, "tooltip": "significantly reducing memory usage and slows down the inference"}),
|
|
},
|
|
"optional": {
|
|
"block_edit": ("TRANSFORMERBLOCKS", {"default": None}),
|
|
"lora": ("COGLORA", {"default": None}),
|
|
"compile_args":("COMPILEARGS", ),
|
|
"attention_mode": (["sdpa", "sageattn", "fused_sdpa", "fused_sageattn"], {"default": "sdpa"}),
|
|
}
|
|
}
|
|
|
|
RETURN_TYPES = ("COGVIDEOMODEL",)
|
|
RETURN_NAMES = ("model", )
|
|
FUNCTION = "loadmodel"
|
|
CATEGORY = "CogVideoWrapper"
|
|
|
|
def loadmodel(self, model, base_precision, load_device, enable_sequential_cpu_offload,
|
|
block_edit=None, compile_args=None, lora=None, attention_mode="sdpa", quantization="disabled"):
|
|
|
|
device = mm.get_torch_device()
|
|
offload_device = mm.unet_offload_device()
|
|
manual_offloading = True
|
|
transformer_load_device = device if load_device == "main_device" else offload_device
|
|
mm.soft_empty_cache()
|
|
|
|
base_dtype = {"fp8_e4m3fn": torch.float8_e4m3fn, "fp8_e4m3fn_fast": torch.float8_e4m3fn, "bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[base_precision]
|
|
|
|
model_path = folder_paths.get_full_path_or_raise("diffusion_models", model)
|
|
sd = load_torch_file(model_path, device=transformer_load_device)
|
|
|
|
model_type = ""
|
|
if sd["patch_embed.proj.weight"].shape == (3072, 33, 2, 2):
|
|
model_type = "fun_5b"
|
|
elif sd["patch_embed.proj.weight"].shape == (3072, 16, 2, 2):
|
|
model_type = "5b"
|
|
elif sd["patch_embed.proj.weight"].shape == (3072, 128):
|
|
model_type = "5b_1_5"
|
|
elif sd["patch_embed.proj.weight"].shape == (3072, 256):
|
|
model_type = "5b_I2V_1_5"
|
|
elif sd["patch_embed.proj.weight"].shape == (1920, 33, 2, 2):
|
|
model_type = "fun_2b"
|
|
elif sd["patch_embed.proj.weight"].shape == (1920, 16, 2, 2):
|
|
model_type = "2b"
|
|
elif sd["patch_embed.proj.weight"].shape == (3072, 32, 2, 2):
|
|
if "pos_embedding" in sd:
|
|
model_type = "fun_5b_pose"
|
|
else:
|
|
model_type = "I2V_5b"
|
|
else:
|
|
raise Exception("Selected model is not recognized")
|
|
log.info(f"Detected CogVideoX model type: {model_type}")
|
|
|
|
if "5b" in model_type:
|
|
scheduler_config_path = os.path.join(script_directory, 'configs', 'scheduler_config_5b.json')
|
|
transformer_config_path = os.path.join(script_directory, 'configs', 'transformer_config_5b.json')
|
|
elif "2b" in model_type:
|
|
scheduler_config_path = os.path.join(script_directory, 'configs', 'scheduler_config_2b.json')
|
|
transformer_config_path = os.path.join(script_directory, 'configs', 'transformer_config_2b.json')
|
|
|
|
with open(transformer_config_path) as f:
|
|
transformer_config = json.load(f)
|
|
|
|
if model_type in ["I2V", "I2V_5b", "fun_5b_pose", "5b_I2V_1_5"]:
|
|
transformer_config["in_channels"] = 32
|
|
if "1_5" in model_type:
|
|
transformer_config["ofs_embed_dim"] = 512
|
|
elif "fun" in model_type:
|
|
transformer_config["in_channels"] = 33
|
|
else:
|
|
transformer_config["in_channels"] = 16
|
|
if "1_5" in model_type:
|
|
transformer_config["use_learned_positional_embeddings"] = False
|
|
transformer_config["patch_size_t"] = 2
|
|
transformer_config["patch_bias"] = False
|
|
transformer_config["sample_height"] = 300
|
|
transformer_config["sample_width"] = 300
|
|
|
|
with init_empty_weights():
|
|
transformer = CogVideoXTransformer3DModel.from_config(transformer_config)
|
|
|
|
#load weights
|
|
#params_to_keep = {}
|
|
log.info("Using accelerate to load and assign model weights to device...")
|
|
|
|
for name, param in transformer.named_parameters():
|
|
#dtype_to_use = base_dtype if any(keyword in name for keyword in params_to_keep) else dtype
|
|
set_module_tensor_to_device(transformer, name, device=transformer_load_device, dtype=base_dtype, value=sd[name])
|
|
del sd
|
|
|
|
|
|
#scheduler
|
|
with open(scheduler_config_path) as f:
|
|
scheduler_config = json.load(f)
|
|
scheduler = CogVideoXDDIMScheduler.from_config(scheduler_config, subfolder="scheduler")
|
|
|
|
if block_edit is not None:
|
|
transformer = remove_specific_blocks(transformer, block_edit)
|
|
|
|
if "fused" in attention_mode:
|
|
from diffusers.models.attention import Attention
|
|
transformer.fuse_qkv_projections = True
|
|
for module in transformer.modules():
|
|
if isinstance(module, Attention):
|
|
module.fuse_projections(fuse=True)
|
|
transformer.attention_mode = attention_mode
|
|
|
|
if "fun" in model_type:
|
|
if not "pose" in model_type:
|
|
raise NotImplementedError("Fun models besides pose are not supported with this loader yet")
|
|
pipe = CogVideoX_Fun_Pipeline_Inpaint(vae, transformer, scheduler)
|
|
else:
|
|
pipe = CogVideoXPipeline(transformer, scheduler, dtype=base_dtype)
|
|
else:
|
|
pipe = CogVideoXPipeline(transformer, scheduler, dtype=base_dtype)
|
|
|
|
if enable_sequential_cpu_offload:
|
|
pipe.enable_sequential_cpu_offload()
|
|
|
|
#LoRAs
|
|
if lora is not None:
|
|
from .lora_utils import merge_lora#, load_lora_into_transformer
|
|
if "fun" in model.lower():
|
|
for l in lora:
|
|
log.info(f"Merging LoRA weights from {l['path']} with strength {l['strength']}")
|
|
transformer = merge_lora(transformer, l["path"], l["strength"])
|
|
else:
|
|
adapter_list = []
|
|
adapter_weights = []
|
|
for l in lora:
|
|
fuse = True if l["fuse_lora"] else False
|
|
lora_sd = load_torch_file(l["path"])
|
|
for key, val in lora_sd.items():
|
|
if "lora_B" in key:
|
|
lora_rank = val.shape[1]
|
|
break
|
|
log.info(f"Merging rank {lora_rank} LoRA weights from {l['path']} with strength {l['strength']}")
|
|
adapter_name = l['path'].split("/")[-1].split(".")[0]
|
|
adapter_weight = l['strength']
|
|
pipe.load_lora_weights(l['path'], weight_name=l['path'].split("/")[-1], lora_rank=lora_rank, adapter_name=adapter_name)
|
|
|
|
#transformer = load_lora_into_transformer(lora, transformer)
|
|
adapter_list.append(adapter_name)
|
|
adapter_weights.append(adapter_weight)
|
|
for l in lora:
|
|
pipe.set_adapters(adapter_list, adapter_weights=adapter_weights)
|
|
if fuse:
|
|
lora_scale = 1
|
|
dimension_loras = ["orbit", "dimensionx"] # for now dimensionx loras need scaling
|
|
if any(item in lora[-1]["path"].lower() for item in dimension_loras):
|
|
lora_scale = lora_scale / lora_rank
|
|
pipe.fuse_lora(lora_scale=lora_scale, components=["transformer"])
|
|
|
|
if compile_args is not None:
|
|
pipe.transformer.to(memory_format=torch.channels_last)
|
|
|
|
#quantization
|
|
if quantization == "fp8_e4m3fn" or quantization == "fp8_e4m3fn_fast":
|
|
params_to_keep = {"patch_embed", "lora", "pos_embedding", "time_embedding", "norm_k", "norm_q", "to_k.bias", "to_q.bias", "to_v.bias"}
|
|
if "1.5" in model:
|
|
params_to_keep.update({"norm1.linear.weight", "ofs_embedding", "norm_final", "norm_out", "proj_out"})
|
|
for name, param in pipe.transformer.named_parameters():
|
|
if not any(keyword in name for keyword in params_to_keep):
|
|
param.data = param.data.to(torch.float8_e4m3fn)
|
|
|
|
if quantization == "fp8_e4m3fn_fast":
|
|
from .fp8_optimization import convert_fp8_linear
|
|
if "1.5" in model:
|
|
params_to_keep.update({"ff"}) #otherwise NaNs
|
|
convert_fp8_linear(pipe.transformer, base_dtype, params_to_keep=params_to_keep)
|
|
|
|
#compile
|
|
if compile_args is not None:
|
|
torch._dynamo.config.cache_size_limit = compile_args["dynamo_cache_size_limit"]
|
|
for i, block in enumerate(pipe.transformer.transformer_blocks):
|
|
if "CogVideoXBlock" in str(block):
|
|
pipe.transformer.transformer_blocks[i] = torch.compile(block, fullgraph=compile_args["fullgraph"], dynamic=compile_args["dynamic"], backend=compile_args["backend"], mode=compile_args["mode"])
|
|
|
|
if "torchao" in quantization:
|
|
try:
|
|
from torchao.quantization import (
|
|
quantize_,
|
|
fpx_weight_only,
|
|
float8_dynamic_activation_float8_weight,
|
|
int8_dynamic_activation_int8_weight
|
|
)
|
|
except:
|
|
raise ImportError("torchao is not installed, please install torchao to use fp8dq")
|
|
|
|
def filter_fn(module: nn.Module, fqn: str) -> bool:
|
|
target_submodules = {'attn1', 'ff'} # avoid norm layers, 1.5 at least won't work with quantized norm1 #todo: test other models
|
|
if any(sub in fqn for sub in target_submodules):
|
|
return isinstance(module, nn.Linear)
|
|
return False
|
|
|
|
if "fp6" in quantization: #slower for some reason on 4090
|
|
quant_func = fpx_weight_only(3, 2)
|
|
elif "fp8dq" in quantization: #very fast on 4090 when compiled
|
|
quant_func = float8_dynamic_activation_float8_weight()
|
|
elif 'fp8dqrow' in quantization:
|
|
from torchao.quantization.quant_api import PerRow
|
|
quant_func = float8_dynamic_activation_float8_weight(granularity=PerRow())
|
|
elif 'int8dq' in quantization:
|
|
quant_func = int8_dynamic_activation_int8_weight()
|
|
|
|
for i, block in enumerate(pipe.transformer.transformer_blocks):
|
|
if "CogVideoXBlock" in str(block):
|
|
quantize_(block, quant_func, filter_fn=filter_fn)
|
|
|
|
manual_offloading = False # to disable manual .to(device) calls
|
|
log.info(f"Quantized transformer blocks to {quantization}")
|
|
|
|
# if load_device == "offload_device":
|
|
# pipe.transformer.to(offload_device)
|
|
# else:
|
|
# pipe.transformer.to(device)
|
|
|
|
pipeline = {
|
|
"pipe": pipe,
|
|
"dtype": base_dtype,
|
|
"base_path": model,
|
|
"onediff": False,
|
|
"cpu_offloading": enable_sequential_cpu_offload,
|
|
"scheduler_config": scheduler_config,
|
|
"model_name": model,
|
|
"manual_offloading": manual_offloading,
|
|
}
|
|
|
|
return (pipeline,)
|
|
|
|
#region VAE
|
|
|
|
class CogVideoXVAELoader:
|
|
@classmethod
|
|
def INPUT_TYPES(s):
|
|
return {
|
|
"required": {
|
|
"model_name": (folder_paths.get_filename_list("vae"), {"tooltip": "The name of the checkpoint (vae) to load."}),
|
|
},
|
|
"optional": {
|
|
"precision": (["fp16", "fp32", "bf16"],
|
|
{"default": "bf16"}
|
|
),
|
|
}
|
|
}
|
|
|
|
RETURN_TYPES = ("VAE",)
|
|
RETURN_NAMES = ("vae", )
|
|
FUNCTION = "loadmodel"
|
|
CATEGORY = "CogVideoWrapper"
|
|
DESCRIPTION = "Loads CogVideoX VAE model from 'ComfyUI/models/vae'"
|
|
|
|
def loadmodel(self, model_name, precision):
|
|
device = mm.get_torch_device()
|
|
offload_device = mm.unet_offload_device()
|
|
|
|
dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[precision]
|
|
with open(os.path.join(script_directory, 'configs', 'vae_config.json')) as f:
|
|
vae_config = json.load(f)
|
|
model_path = folder_paths.get_full_path("vae", model_name)
|
|
vae_sd = load_torch_file(model_path)
|
|
|
|
vae = AutoencoderKLCogVideoX.from_config(vae_config).to(dtype).to(offload_device)
|
|
vae.load_state_dict(vae_sd)
|
|
|
|
return (vae,)
|
|
|
|
#region Tora
|
|
class DownloadAndLoadToraModel:
|
|
@classmethod
|
|
def INPUT_TYPES(s):
|
|
return {
|
|
"required": {
|
|
"model": (
|
|
[
|
|
"kijai/CogVideoX-5b-Tora",
|
|
],
|
|
),
|
|
},
|
|
}
|
|
|
|
RETURN_TYPES = ("TORAMODEL",)
|
|
RETURN_NAMES = ("tora_model", )
|
|
FUNCTION = "loadmodel"
|
|
CATEGORY = "CogVideoWrapper"
|
|
DESCRIPTION = "Downloads and loads the the Tora model from Huggingface to 'ComfyUI/models/CogVideo/CogVideoX-5b-Tora'"
|
|
|
|
def loadmodel(self, model):
|
|
device = mm.get_torch_device()
|
|
offload_device = mm.unet_offload_device()
|
|
mm.soft_empty_cache()
|
|
|
|
download_path = folder_paths.get_folder_paths("CogVideo")[0]
|
|
|
|
from .tora.traj_module import MGF
|
|
|
|
try:
|
|
from accelerate import init_empty_weights
|
|
from accelerate.utils import set_module_tensor_to_device
|
|
is_accelerate_available = True
|
|
except:
|
|
is_accelerate_available = False
|
|
pass
|
|
|
|
download_path = os.path.join(folder_paths.models_dir, 'CogVideo', "CogVideoX-5b-Tora")
|
|
fuser_path = os.path.join(download_path, "fuser", "fuser.safetensors")
|
|
if not os.path.exists(fuser_path):
|
|
log.info(f"Downloading Fuser model to: {fuser_path}")
|
|
from huggingface_hub import snapshot_download
|
|
|
|
snapshot_download(
|
|
repo_id=model,
|
|
allow_patterns=["*fuser.safetensors*"],
|
|
local_dir=download_path,
|
|
local_dir_use_symlinks=False,
|
|
)
|
|
|
|
hidden_size = 3072
|
|
num_layers = 42
|
|
|
|
with (init_empty_weights() if is_accelerate_available else nullcontext()):
|
|
fuser_list = nn.ModuleList([MGF(128, hidden_size) for _ in range(num_layers)])
|
|
|
|
fuser_sd = load_torch_file(fuser_path)
|
|
if is_accelerate_available:
|
|
for key in fuser_sd:
|
|
set_module_tensor_to_device(fuser_list, key, dtype=torch.float16, device=device, value=fuser_sd[key])
|
|
else:
|
|
fuser_list.load_state_dict(fuser_sd)
|
|
for module in fuser_list:
|
|
for param in module.parameters():
|
|
param.data = param.data.to(torch.bfloat16).to(device)
|
|
del fuser_sd
|
|
|
|
traj_extractor_path = os.path.join(download_path, "traj_extractor", "traj_extractor.safetensors")
|
|
if not os.path.exists(traj_extractor_path):
|
|
log.info(f"Downloading trajectory extractor model to: {traj_extractor_path}")
|
|
from huggingface_hub import snapshot_download
|
|
|
|
snapshot_download(
|
|
repo_id="kijai/CogVideoX-5b-Tora",
|
|
allow_patterns=["*traj_extractor.safetensors*"],
|
|
local_dir=download_path,
|
|
local_dir_use_symlinks=False,
|
|
)
|
|
|
|
from .tora.traj_module import TrajExtractor
|
|
with (init_empty_weights() if is_accelerate_available else nullcontext()):
|
|
traj_extractor = TrajExtractor(
|
|
vae_downsize=(4, 8, 8),
|
|
patch_size=2,
|
|
nums_rb=2,
|
|
cin=16,
|
|
channels=[128] * 42,
|
|
sk=True,
|
|
use_conv=False,
|
|
)
|
|
|
|
traj_sd = load_torch_file(traj_extractor_path)
|
|
if is_accelerate_available:
|
|
for key in traj_sd:
|
|
set_module_tensor_to_device(traj_extractor, key, dtype=torch.float32, device=device, value=traj_sd[key])
|
|
else:
|
|
traj_extractor.load_state_dict(traj_sd)
|
|
traj_extractor.to(torch.float32).to(device)
|
|
|
|
toramodel = {
|
|
"fuser_list": fuser_list,
|
|
"traj_extractor": traj_extractor,
|
|
}
|
|
|
|
return (toramodel,)
|
|
#region controlnet
|
|
class DownloadAndLoadCogVideoControlNet:
|
|
@classmethod
|
|
def INPUT_TYPES(s):
|
|
return {
|
|
"required": {
|
|
"model": (
|
|
[
|
|
"TheDenk/cogvideox-2b-controlnet-hed-v1",
|
|
"TheDenk/cogvideox-2b-controlnet-canny-v1",
|
|
"TheDenk/cogvideox-5b-controlnet-hed-v1",
|
|
"TheDenk/cogvideox-5b-controlnet-canny-v1"
|
|
],
|
|
),
|
|
|
|
},
|
|
}
|
|
|
|
RETURN_TYPES = ("COGVIDECONTROLNETMODEL",)
|
|
RETURN_NAMES = ("cogvideo_controlnet", )
|
|
FUNCTION = "loadmodel"
|
|
CATEGORY = "CogVideoWrapper"
|
|
|
|
def loadmodel(self, model):
|
|
from .cogvideo_controlnet import CogVideoXControlnet
|
|
|
|
device = mm.get_torch_device()
|
|
offload_device = mm.unet_offload_device()
|
|
mm.soft_empty_cache()
|
|
|
|
|
|
download_path = os.path.join(folder_paths.models_dir, 'CogVideo', 'ControlNet')
|
|
base_path = os.path.join(download_path, (model.split("/")[-1]))
|
|
|
|
if not os.path.exists(base_path):
|
|
log.info(f"Downloading model to: {base_path}")
|
|
from huggingface_hub import snapshot_download
|
|
|
|
snapshot_download(
|
|
repo_id=model,
|
|
ignore_patterns=["*text_encoder*", "*tokenizer*"],
|
|
local_dir=base_path,
|
|
local_dir_use_symlinks=False,
|
|
)
|
|
|
|
controlnet = CogVideoXControlnet.from_pretrained(base_path)
|
|
|
|
return (controlnet,)
|
|
|
|
NODE_CLASS_MAPPINGS = {
|
|
"DownloadAndLoadCogVideoModel": DownloadAndLoadCogVideoModel,
|
|
"DownloadAndLoadCogVideoGGUFModel": DownloadAndLoadCogVideoGGUFModel,
|
|
"DownloadAndLoadCogVideoControlNet": DownloadAndLoadCogVideoControlNet,
|
|
"DownloadAndLoadToraModel": DownloadAndLoadToraModel,
|
|
"CogVideoLoraSelect": CogVideoLoraSelect,
|
|
"CogVideoXVAELoader": CogVideoXVAELoader,
|
|
"CogVideoXModelLoader": CogVideoXModelLoader,
|
|
}
|
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
|
"DownloadAndLoadCogVideoModel": "(Down)load CogVideo Model",
|
|
"DownloadAndLoadCogVideoGGUFModel": "(Down)load CogVideo GGUF Model",
|
|
"DownloadAndLoadCogVideoControlNet": "(Down)load CogVideo ControlNet",
|
|
"DownloadAndLoadToraModel": "(Down)load Tora Model",
|
|
"CogVideoLoraSelect": "CogVideo LoraSelect",
|
|
"CogVideoXVAELoader": "CogVideoX VAE Loader",
|
|
"CogVideoXModelLoader": "CogVideoX Model Loader",
|
|
} |