Merge remote-tracking branch 'kijai/main'

# Conflicts:
#	nodes.py
This commit is contained in:
Phr00t 2024-09-22 13:11:20 -04:00
commit 0b4a6e31a8
4 changed files with 57 additions and 29 deletions

View File

@ -21,7 +21,6 @@ from typing import Callable, Dict, List, Optional, Tuple, Union
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
from einops import rearrange from einops import rearrange
from transformers import T5EncoderModel, T5Tokenizer
from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback
from diffusers.models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel from diffusers.models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel

View File

@ -1,6 +1,4 @@
{ {
"_class_name": "CogVideoXTransformer3DModel",
"_diffusers_version": "0.30.0.dev0",
"activation_fn": "gelu-approximate", "activation_fn": "gelu-approximate",
"attention_bias": true, "attention_bias": true,
"attention_head_dim": 64, "attention_head_dim": 64,

View File

@ -1,6 +1,4 @@
{ {
"_class_name": "CogVideoXTransformer3DModel",
"_diffusers_version": "0.31.0.dev0",
"activation_fn": "gelu-approximate", "activation_fn": "gelu-approximate",
"attention_bias": true, "attention_bias": true,
"attention_head_dim": 64, "attention_head_dim": 64,

View File

@ -173,12 +173,15 @@ class DownloadAndLoadCogVideoGGUFModel:
"model": ( "model": (
[ [
"CogVideoX_5b_GGUF_Q4_0.safetensors", "CogVideoX_5b_GGUF_Q4_0.safetensors",
"CogVideoX_5b_I2V_GGUF_Q4_0.safetensors",
"CogVideoX_5b_fun_GGUF_Q4_0.safetensors", "CogVideoX_5b_fun_GGUF_Q4_0.safetensors",
#"CogVideoX_2b_fun_GGUF_Q4_0.safetensors"
], ],
), ),
"vae_precision": (["fp16", "fp32", "bf16"], {"default": "bf16", "tooltip": "VAE dtype"}), "vae_precision": (["fp16", "fp32", "bf16"], {"default": "bf16", "tooltip": "VAE dtype"}),
"fp8_fastmode": ("BOOLEAN", {"default": False, "tooltip": "only supported on 4090 and later GPUs"}), "fp8_fastmode": ("BOOLEAN", {"default": False, "tooltip": "only supported on 4090 and later GPUs"}),
"load_device": (["main_device", "offload_device"], {"default": "main_device"}), "load_device": (["main_device", "offload_device"], {"default": "main_device"}),
"enable_sequential_cpu_offload": ("BOOLEAN", {"default": False, "tooltip": "significantly reducing memory usage and slows down the inference"}),
}, },
} }
@ -187,7 +190,7 @@ class DownloadAndLoadCogVideoGGUFModel:
FUNCTION = "loadmodel" FUNCTION = "loadmodel"
CATEGORY = "CogVideoWrapper" CATEGORY = "CogVideoWrapper"
def loadmodel(self, model, vae_precision, fp8_fastmode, load_device): def loadmodel(self, model, vae_precision, fp8_fastmode, load_device, enable_sequential_cpu_offload):
device = mm.get_torch_device() device = mm.get_torch_device()
offload_device = mm.unet_offload_device() offload_device = mm.unet_offload_device()
mm.soft_empty_cache() mm.soft_empty_cache()
@ -198,20 +201,33 @@ class DownloadAndLoadCogVideoGGUFModel:
if not os.path.exists(gguf_path): if not os.path.exists(gguf_path):
gguf_path = os.path.join(download_path, model) gguf_path = os.path.join(download_path, model)
if not os.path.exists(gguf_path): if not os.path.exists(gguf_path):
if "I2V" in model:
repo_id = "Kijai/CogVideoX_GGUF"
else:
repo_id = "MinusZoneAI/ComfyUI-CogVideoX-MZ"
log.info(f"Downloading model to: {gguf_path}") log.info(f"Downloading model to: {gguf_path}")
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download
snapshot_download( snapshot_download(
repo_id="MinusZoneAI/ComfyUI-CogVideoX-MZ", repo_id=repo_id,
allow_patterns=[f"*{model}*"], allow_patterns=[f"*{model}*"],
local_dir=download_path, local_dir=download_path,
local_dir_use_symlinks=False, local_dir_use_symlinks=False,
) )
if "5b" in model:
scheduler_path = os.path.join(script_directory, 'configs', 'scheduler_config_5b.json')
transformer_path = os.path.join(script_directory, 'configs', 'transformer_config_5b.json')
elif "2b" in model:
scheduler_path = os.path.join(script_directory, 'configs', 'scheduler_config_2b.json')
transformer_path = os.path.join(script_directory, 'configs', 'transformer_config_2b.json')
with open(transformer_path) as f:
transformer_config = json.load(f)
with open(os.path.join(script_directory, 'configs', 'transformer_config_5b.json')) as f:
transformer_config = json.load(f)
sd = load_torch_file(gguf_path) sd = load_torch_file(gguf_path)
#for key, value in sd.items():
# print(key, value.shape, value.dtype)
from . import mz_gguf_loader from . import mz_gguf_loader
import importlib import importlib
@ -221,11 +237,21 @@ class DownloadAndLoadCogVideoGGUFModel:
if "fun" in model: if "fun" in model:
transformer_config["in_channels"] = 33 transformer_config["in_channels"] = 33
transformer = CogVideoXTransformer3DModelFun.from_config(transformer_config) transformer = CogVideoXTransformer3DModelFun.from_config(transformer_config)
elif "I2V" in model:
transformer_config["in_channels"] = 32
transformer = CogVideoXTransformer3DModel.from_config(transformer_config)
else: else:
transformer_config["in_channels"] = 16 transformer_config["in_channels"] = 16
transformer = CogVideoXTransformer3DModel.from_config(transformer_config) transformer = CogVideoXTransformer3DModel.from_config(transformer_config)
transformer.to(torch.float8_e4m3fn) if "2b" in model:
for name, param in transformer.named_parameters():
if name != "pos_embedding":
param.data = param.data.to(torch.float8_e4m3fn)
else:
param.data = param.data.to(torch.float16)
else:
transformer.to(torch.float8_e4m3fn)
transformer = mz_gguf_loader.quantize_load_state_dict(transformer, sd, device="cpu") transformer = mz_gguf_loader.quantize_load_state_dict(transformer, sd, device="cpu")
if load_device == "offload_device": if load_device == "offload_device":
transformer.to(offload_device) transformer.to(offload_device)
@ -236,7 +262,7 @@ class DownloadAndLoadCogVideoGGUFModel:
from .fp8_optimization import convert_fp8_linear from .fp8_optimization import convert_fp8_linear
convert_fp8_linear(transformer, vae_dtype) convert_fp8_linear(transformer, vae_dtype)
scheduler_path = os.path.join(script_directory, 'configs', 'scheduler_config_5b.json')
with open(scheduler_path) as f: with open(scheduler_path) as f:
scheduler_config = json.load(f) scheduler_config = json.load(f)
@ -269,28 +295,31 @@ class DownloadAndLoadCogVideoGGUFModel:
pipe = CogVideoXPipeline(vae, transformer, scheduler) pipe = CogVideoXPipeline(vae, transformer, scheduler)
# compilation # compilation
if compile == "torch": # if compile == "torch":
torch._dynamo.config.suppress_errors = True # torch._dynamo.config.suppress_errors = True
pipe.transformer.to(memory_format=torch.channels_last) # pipe.transformer.to(memory_format=torch.channels_last)
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True) # pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
elif compile == "onediff": # elif compile == "onediff":
from onediffx import compile_pipe # from onediffx import compile_pipe
os.environ['NEXFORT_FX_FORCE_TRITON_SDPA'] = '1' # os.environ['NEXFORT_FX_FORCE_TRITON_SDPA'] = '1'
pipe = compile_pipe( # pipe = compile_pipe(
pipe, # pipe,
backend="nexfort", # backend="nexfort",
options= {"mode": "max-optimize:max-autotune:max-autotune", "memory_format": "channels_last", "options": {"inductor.optimize_linear_epilogue": False, "triton.fuse_attention_allow_fp16_reduction": False}}, # options= {"mode": "max-optimize:max-autotune:max-autotune", "memory_format": "channels_last", "options": {"inductor.optimize_linear_epilogue": False, "triton.fuse_attention_allow_fp16_reduction": False}},
ignores=["vae"], # ignores=["vae"],
fuse_qkv_projections=True, # fuse_qkv_projections=True,
) # )
if enable_sequential_cpu_offload:
pipe.enable_sequential_cpu_offload()
pipeline = { pipeline = {
"pipe": pipe, "pipe": pipe,
"dtype": vae_dtype, "dtype": vae_dtype,
"base_path": "Fun" if "fun" in model else "sad", "base_path": "Fun" if "fun" in model else "sad",
"onediff": True if compile == "onediff" else False, "onediff": True if compile == "onediff" else False,
"cpu_offloading": False, "cpu_offloading": enable_sequential_cpu_offload,
"scheduler_config": scheduler_config "scheduler_config": scheduler_config
} }
@ -853,8 +882,12 @@ class CogVideoXFunVid2VidSampler:
offload_device = mm.unet_offload_device() offload_device = mm.unet_offload_device()
pipe = pipeline["pipe"] pipe = pipeline["pipe"]
dtype = pipeline["dtype"] dtype = pipeline["dtype"]
base_path = pipeline["base_path"]
pipe.enable_model_cpu_offload(device=device) assert "Fun" in base_path, "'Unfun' models not supported in 'CogVideoXFunSampler', use the 'CogVideoSampler'"
if not pipeline["cpu_offloading"]:
pipe.enable_model_cpu_offload(device=device)
mm.soft_empty_cache() mm.soft_empty_cache()