From 21675b296b761ad9fe8f8be1b02606110fb69399 Mon Sep 17 00:00:00 2001 From: kijai <40791699+kijai@users.noreply.github.com> Date: Sun, 22 Sep 2024 16:07:32 +0300 Subject: [PATCH 1/4] Add 5b I2V GGUF https://huggingface.co/Kijai/CogVideoX_GGUF/blob/main/CogVideoX_5b_I2V_GGUF_Q4_0.safetensors --- cogvideox_fun/pipeline_cogvideox_inpaint.py | 1 - configs/transformer_config_I2V_5b.json | 2 -- nodes.py | 12 +++++++++++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/cogvideox_fun/pipeline_cogvideox_inpaint.py b/cogvideox_fun/pipeline_cogvideox_inpaint.py index f372342..7437d03 100644 --- a/cogvideox_fun/pipeline_cogvideox_inpaint.py +++ b/cogvideox_fun/pipeline_cogvideox_inpaint.py @@ -21,7 +21,6 @@ from typing import Callable, Dict, List, Optional, Tuple, Union import torch import torch.nn.functional as F from einops import rearrange -from transformers import T5EncoderModel, T5Tokenizer from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback from diffusers.models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel diff --git a/configs/transformer_config_I2V_5b.json b/configs/transformer_config_I2V_5b.json index 3265e76..420eb9f 100644 --- a/configs/transformer_config_I2V_5b.json +++ b/configs/transformer_config_I2V_5b.json @@ -1,6 +1,4 @@ { - "_class_name": "CogVideoXTransformer3DModel", - "_diffusers_version": "0.31.0.dev0", "activation_fn": "gelu-approximate", "attention_bias": true, "attention_head_dim": 64, diff --git a/nodes.py b/nodes.py index 4577310..5be183e 100644 --- a/nodes.py +++ b/nodes.py @@ -173,6 +173,7 @@ class DownloadAndLoadCogVideoGGUFModel: "model": ( [ "CogVideoX_5b_GGUF_Q4_0.safetensors", + "CogVideoX_5b_I2V_GGUF_Q4_0.safetensors", "CogVideoX_5b_fun_GGUF_Q4_0.safetensors", ], ), @@ -198,11 +199,15 @@ class DownloadAndLoadCogVideoGGUFModel: if not os.path.exists(gguf_path): gguf_path = os.path.join(download_path, model) if not os.path.exists(gguf_path): + if "I2V" in model: + repo_id = "Kijai/CogVideoX_GGUF" + else: + repo_id = "MinusZoneAI/ComfyUI-CogVideoX-MZ" log.info(f"Downloading model to: {gguf_path}") from huggingface_hub import snapshot_download snapshot_download( - repo_id="MinusZoneAI/ComfyUI-CogVideoX-MZ", + repo_id=repo_id, allow_patterns=[f"*{model}*"], local_dir=download_path, local_dir_use_symlinks=False, @@ -212,6 +217,8 @@ class DownloadAndLoadCogVideoGGUFModel: with open(os.path.join(script_directory, 'configs', 'transformer_config_5b.json')) as f: transformer_config = json.load(f) sd = load_torch_file(gguf_path) + for key, value in sd.items(): + print(key, value.shape, value.dtype) from . import mz_gguf_loader import importlib @@ -221,6 +228,9 @@ class DownloadAndLoadCogVideoGGUFModel: if "fun" in model: transformer_config["in_channels"] = 33 transformer = CogVideoXTransformer3DModelFun.from_config(transformer_config) + elif "I2V" in model: + transformer_config["in_channels"] = 32 + transformer = CogVideoXTransformer3DModel.from_config(transformer_config) else: transformer_config["in_channels"] = 16 transformer = CogVideoXTransformer3DModel.from_config(transformer_config) From 2a71aba1aa77eeec565080b3a31d4e512ebc4936 Mon Sep 17 00:00:00 2001 From: kijai <40791699+kijai@users.noreply.github.com> Date: Sun, 22 Sep 2024 16:09:00 +0300 Subject: [PATCH 2/4] remove print --- nodes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nodes.py b/nodes.py index 5be183e..8b4dc9a 100644 --- a/nodes.py +++ b/nodes.py @@ -217,8 +217,8 @@ class DownloadAndLoadCogVideoGGUFModel: with open(os.path.join(script_directory, 'configs', 'transformer_config_5b.json')) as f: transformer_config = json.load(f) sd = load_torch_file(gguf_path) - for key, value in sd.items(): - print(key, value.shape, value.dtype) + # for key, value in sd.items(): + # print(key, value.shape, value.dtype) from . import mz_gguf_loader import importlib From d3d7f043cd5438807bafb90c3fd4b9a00a839082 Mon Sep 17 00:00:00 2001 From: kijai <40791699+kijai@users.noreply.github.com> Date: Sun, 22 Sep 2024 17:03:28 +0300 Subject: [PATCH 3/4] Allow sequential_cpu_offload for GGUF too --- configs/transformer_config_2b.json | 2 - nodes.py | 67 +++++++++++++++++++----------- 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/configs/transformer_config_2b.json b/configs/transformer_config_2b.json index 7336f77..7389864 100644 --- a/configs/transformer_config_2b.json +++ b/configs/transformer_config_2b.json @@ -1,6 +1,4 @@ { - "_class_name": "CogVideoXTransformer3DModel", - "_diffusers_version": "0.30.0.dev0", "activation_fn": "gelu-approximate", "attention_bias": true, "attention_head_dim": 64, diff --git a/nodes.py b/nodes.py index 8b4dc9a..882c7b6 100644 --- a/nodes.py +++ b/nodes.py @@ -175,11 +175,13 @@ class DownloadAndLoadCogVideoGGUFModel: "CogVideoX_5b_GGUF_Q4_0.safetensors", "CogVideoX_5b_I2V_GGUF_Q4_0.safetensors", "CogVideoX_5b_fun_GGUF_Q4_0.safetensors", + #"CogVideoX_2b_fun_GGUF_Q4_0.safetensors" ], ), "vae_precision": (["fp16", "fp32", "bf16"], {"default": "bf16", "tooltip": "VAE dtype"}), "fp8_fastmode": ("BOOLEAN", {"default": False, "tooltip": "only supported on 4090 and later GPUs"}), "load_device": (["main_device", "offload_device"], {"default": "main_device"}), + "enable_sequential_cpu_offload": ("BOOLEAN", {"default": False, "tooltip": "significantly reducing memory usage and slows down the inference"}), }, } @@ -188,7 +190,7 @@ class DownloadAndLoadCogVideoGGUFModel: FUNCTION = "loadmodel" CATEGORY = "CogVideoWrapper" - def loadmodel(self, model, vae_precision, fp8_fastmode, load_device): + def loadmodel(self, model, vae_precision, fp8_fastmode, load_device, enable_sequential_cpu_offload): device = mm.get_torch_device() offload_device = mm.unet_offload_device() mm.soft_empty_cache() @@ -213,17 +215,24 @@ class DownloadAndLoadCogVideoGGUFModel: local_dir_use_symlinks=False, ) - - with open(os.path.join(script_directory, 'configs', 'transformer_config_5b.json')) as f: - transformer_config = json.load(f) + if "5b" in model: + scheduler_path = os.path.join(script_directory, 'configs', 'scheduler_config_5b.json') + transformer_path = os.path.join(script_directory, 'configs', 'transformer_config_5b.json') + elif "2b" in model: + scheduler_path = os.path.join(script_directory, 'configs', 'scheduler_config_2b.json') + transformer_path = os.path.join(script_directory, 'configs', 'transformer_config_2b.json') + + with open(transformer_path) as f: + transformer_config = json.load(f) + sd = load_torch_file(gguf_path) - # for key, value in sd.items(): - # print(key, value.shape, value.dtype) + #for key, value in sd.items(): + # print(key, value.shape, value.dtype) from . import mz_gguf_loader import importlib importlib.reload(mz_gguf_loader) - + with mz_gguf_loader.quantize_lazy_load(): if "fun" in model: transformer_config["in_channels"] = 33 @@ -235,7 +244,14 @@ class DownloadAndLoadCogVideoGGUFModel: transformer_config["in_channels"] = 16 transformer = CogVideoXTransformer3DModel.from_config(transformer_config) - transformer.to(torch.float8_e4m3fn) + if "2b" in model: + for name, param in transformer.named_parameters(): + if name != "pos_embedding": + param.data = param.data.to(torch.float8_e4m3fn) + else: + param.data = param.data.to(torch.float16) + else: + transformer.to(torch.float8_e4m3fn) transformer = mz_gguf_loader.quantize_load_state_dict(transformer, sd, device="cpu") if load_device == "offload_device": transformer.to(offload_device) @@ -246,7 +262,7 @@ class DownloadAndLoadCogVideoGGUFModel: from .fp8_optimization import convert_fp8_linear convert_fp8_linear(transformer, vae_dtype) - scheduler_path = os.path.join(script_directory, 'configs', 'scheduler_config_5b.json') + with open(scheduler_path) as f: scheduler_config = json.load(f) @@ -279,28 +295,31 @@ class DownloadAndLoadCogVideoGGUFModel: pipe = CogVideoXPipeline(vae, transformer, scheduler) # compilation - if compile == "torch": - torch._dynamo.config.suppress_errors = True - pipe.transformer.to(memory_format=torch.channels_last) - pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True) - elif compile == "onediff": - from onediffx import compile_pipe - os.environ['NEXFORT_FX_FORCE_TRITON_SDPA'] = '1' + # if compile == "torch": + # torch._dynamo.config.suppress_errors = True + # pipe.transformer.to(memory_format=torch.channels_last) + # pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True) + # elif compile == "onediff": + # from onediffx import compile_pipe + # os.environ['NEXFORT_FX_FORCE_TRITON_SDPA'] = '1' - pipe = compile_pipe( - pipe, - backend="nexfort", - options= {"mode": "max-optimize:max-autotune:max-autotune", "memory_format": "channels_last", "options": {"inductor.optimize_linear_epilogue": False, "triton.fuse_attention_allow_fp16_reduction": False}}, - ignores=["vae"], - fuse_qkv_projections=True, - ) + # pipe = compile_pipe( + # pipe, + # backend="nexfort", + # options= {"mode": "max-optimize:max-autotune:max-autotune", "memory_format": "channels_last", "options": {"inductor.optimize_linear_epilogue": False, "triton.fuse_attention_allow_fp16_reduction": False}}, + # ignores=["vae"], + # fuse_qkv_projections=True, + # ) + + if enable_sequential_cpu_offload: + pipe.enable_sequential_cpu_offload() pipeline = { "pipe": pipe, "dtype": vae_dtype, "base_path": "Fun" if "fun" in model else "sad", "onediff": True if compile == "onediff" else False, - "cpu_offloading": False, + "cpu_offloading": enable_sequential_cpu_offload, "scheduler_config": scheduler_config } From 3c8e939f8edf1f51a6394b54faec9fc4341f1d95 Mon Sep 17 00:00:00 2001 From: kijai <40791699+kijai@users.noreply.github.com> Date: Sun, 22 Sep 2024 18:12:50 +0300 Subject: [PATCH 4/4] Add more schedulers for "fun" model --- nodes.py | 84 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 32 deletions(-) diff --git a/nodes.py b/nodes.py index 882c7b6..59d5621 100644 --- a/nodes.py +++ b/nodes.py @@ -3,7 +3,34 @@ import torch import folder_paths import comfy.model_management as mm from comfy.utils import ProgressBar, load_torch_file -from diffusers.schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler, DDIMScheduler, PNDMScheduler, DPMSolverMultistepScheduler, EulerDiscreteScheduler, EulerAncestralDiscreteScheduler +from diffusers.schedulers import ( + CogVideoXDDIMScheduler, + CogVideoXDPMScheduler, + DDIMScheduler, + PNDMScheduler, + DPMSolverMultistepScheduler, + EulerDiscreteScheduler, + EulerAncestralDiscreteScheduler, + UniPCMultistepScheduler, + HeunDiscreteScheduler, + SASolverScheduler, + DEISMultistepScheduler, + DDIMInverseScheduler + ) + +scheduler_mapping = { + "DPM++": DPMSolverMultistepScheduler, + "Euler": EulerDiscreteScheduler, + "Euler A": EulerAncestralDiscreteScheduler, + "PNDM": PNDMScheduler, + "DDIM": DDIMScheduler, + "CogVideoXDDIM": CogVideoXDDIMScheduler, + "CogVideoXDPMScheduler": CogVideoXDPMScheduler, + "SASolverScheduler": SASolverScheduler, + "UniPCMultistepScheduler": UniPCMultistepScheduler, + "HeunDiscreteScheduler": HeunDiscreteScheduler, + "DEISMultistepScheduler": DEISMultistepScheduler +} from diffusers.models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel from .pipeline_cogvideox import CogVideoXPipeline @@ -737,6 +764,10 @@ class CogVideoXFunSampler: "DPM++", "PNDM", "DDIM", + "SASolverScheduler", + "UniPCMultistepScheduler", + "HeunDiscreteScheduler", + "DEISMultistepScheduler", "CogVideoXDDIM", "CogVideoXDPMScheduler", ], @@ -787,21 +818,11 @@ class CogVideoXFunSampler: # Load Sampler scheduler_config = pipeline["scheduler_config"] - if scheduler == "DPM++": - noise_scheduler = DPMSolverMultistepScheduler.from_config(scheduler_config) - elif scheduler == "Euler": - noise_scheduler = EulerDiscreteScheduler.from_config(scheduler_config) - elif scheduler == "Euler A": - noise_scheduler = EulerAncestralDiscreteScheduler.from_config(scheduler_config) - elif scheduler == "PNDM": - noise_scheduler = PNDMScheduler.from_config(scheduler_config) - elif scheduler == "DDIM": - noise_scheduler = DDIMScheduler.from_config(scheduler_config) - elif scheduler == "CogVideoXDDIM": - noise_scheduler = CogVideoXDDIMScheduler.from_config(scheduler_config) - elif scheduler == "CogVideoXDPMScheduler": - noise_scheduler = CogVideoXDPMScheduler.from_config(scheduler_config) - pipe.scheduler = noise_scheduler + if scheduler in scheduler_mapping: + noise_scheduler = scheduler_mapping[scheduler].from_config(scheduler_config) + pipe.scheduler = noise_scheduler + else: + raise ValueError(f"Unknown scheduler: {scheduler}") #if not pipeline["cpu_offloading"]: # pipe.transformer.to(device) @@ -865,6 +886,10 @@ class CogVideoXFunVid2VidSampler: "DPM++", "PNDM", "DDIM", + "SASolverScheduler", + "UniPCMultistepScheduler", + "HeunDiscreteScheduler", + "DEISMultistepScheduler", "CogVideoXDDIM", "CogVideoXDPMScheduler", ], @@ -887,8 +912,12 @@ class CogVideoXFunVid2VidSampler: offload_device = mm.unet_offload_device() pipe = pipeline["pipe"] dtype = pipeline["dtype"] + base_path = pipeline["base_path"] - pipe.enable_model_cpu_offload(device=device) + assert "Fun" in base_path, "'Unfun' models not supported in 'CogVideoXFunSampler', use the 'CogVideoSampler'" + + if not pipeline["cpu_offloading"]: + pipe.enable_model_cpu_offload(device=device) mm.soft_empty_cache() @@ -902,21 +931,12 @@ class CogVideoXFunVid2VidSampler: base_path = pipeline["base_path"] # Load Sampler - if scheduler == "DPM++": - noise_scheduler = DPMSolverMultistepScheduler.from_pretrained(base_path, subfolder= 'scheduler') - elif scheduler == "Euler": - noise_scheduler = EulerDiscreteScheduler.from_pretrained(base_path, subfolder= 'scheduler') - elif scheduler == "Euler A": - noise_scheduler = EulerAncestralDiscreteScheduler.from_pretrained(base_path, subfolder= 'scheduler') - elif scheduler == "PNDM": - noise_scheduler = PNDMScheduler.from_pretrained(base_path, subfolder= 'scheduler') - elif scheduler == "DDIM": - noise_scheduler = DDIMScheduler.from_pretrained(base_path, subfolder= 'scheduler') - elif scheduler == "CogVideoXDDIM": - noise_scheduler = CogVideoXDDIMScheduler.from_pretrained(base_path, subfolder= 'scheduler') - elif scheduler == "CogVideoXDPMScheduler": - noise_scheduler = CogVideoXDPMScheduler.from_pretrained(base_path, subfolder= 'scheduler') - pipe.scheduler = noise_scheduler + scheduler_config = pipeline["scheduler_config"] + if scheduler in scheduler_mapping: + noise_scheduler = scheduler_mapping[scheduler].from_config(scheduler_config) + pipe.scheduler = noise_scheduler + else: + raise ValueError(f"Unknown scheduler: {scheduler}") generator= torch.Generator(device).manual_seed(seed)