From cf01dc2b0bf7d3daddfc7e61bfae10747e281d14 Mon Sep 17 00:00:00 2001 From: Kijai <40791699+kijai@users.noreply.github.com> Date: Wed, 28 Aug 2024 16:51:59 +0300 Subject: [PATCH] onediff support --- .../cogvideo_2b_vid2vid_test_example_02.json | 4 +-- examples/cogvideox_5b_example_01.json | 4 +-- nodes.py | 28 ++++++++++++++++--- readme.md | 11 ++++++++ 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/examples/cogvideo_2b_vid2vid_test_example_02.json b/examples/cogvideo_2b_vid2vid_test_example_02.json index e195575..f78505c 100644 --- a/examples/cogvideo_2b_vid2vid_test_example_02.json +++ b/examples/cogvideo_2b_vid2vid_test_example_02.json @@ -819,7 +819,7 @@ "frame_rate": 8, "loop_count": 0, "filename_prefix": "CogVideoX_vid2vid", - "format": "video/nvenc_h264-mp4", + "format": "video/h264-mp4", "pix_fmt": "yuv420p", "bitrate": 10, "megabit": true, @@ -833,7 +833,7 @@ "filename": "AnimateDiff_00001.mp4", "subfolder": "", "type": "temp", - "format": "video/nvenc_h264-mp4", + "format": "video/h264-mp4", "frame_rate": 8 } } diff --git a/examples/cogvideox_5b_example_01.json b/examples/cogvideox_5b_example_01.json index 50d0668..04fb30d 100644 --- a/examples/cogvideox_5b_example_01.json +++ b/examples/cogvideox_5b_example_01.json @@ -279,7 +279,7 @@ "frame_rate": 8, "loop_count": 0, "filename_prefix": "CogVideoX5B", - "format": "video/nvenc_h264-mp4", + "format": "video/h264-mp4", "pix_fmt": "yuv420p", "bitrate": 10, "megabit": true, @@ -293,7 +293,7 @@ "filename": "CogVideoX5B_00009.mp4", "subfolder": "", "type": "temp", - "format": "video/nvenc_h264-mp4", + "format": "video/h264-mp4", "frame_rate": 8 }, "muted": false diff --git a/nodes.py b/nodes.py index ae5d41a..33dbd42 100644 --- a/nodes.py +++ b/nodes.py @@ -6,11 +6,14 @@ from comfy.utils import ProgressBar from diffusers.schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler from diffusers.models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel from .pipeline_cogvideox import CogVideoXPipeline +from contextlib import nullcontext + import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') log = logging.getLogger(__name__) + class DownloadAndLoadCogVideoModel: @classmethod def INPUT_TYPES(s): @@ -30,6 +33,7 @@ class DownloadAndLoadCogVideoModel: ), "fp8_transformer": ("BOOLEAN", {"default": False, "tooltip": "cast the transformer to torch.float8_e4m3fn"}), "torch_compile": ("BOOLEAN", {"default": False, "tooltip": "use torch.compile to speed up inference, Linux only"}), + "onediff": ("BOOLEAN", {"default": False, "tooltip": "use onediff/nexfort to speed up inference, requires onediff installed (Linux only)"}), } } @@ -38,7 +42,7 @@ class DownloadAndLoadCogVideoModel: FUNCTION = "loadmodel" CATEGORY = "CogVideoWrapper" - def loadmodel(self, model, precision, fp8_transformer, torch_compile): + def loadmodel(self, model, precision, fp8_transformer, torch_compile, onediff): device = mm.get_torch_device() offload_device = mm.unet_offload_device() mm.soft_empty_cache() @@ -72,13 +76,26 @@ class DownloadAndLoadCogVideoModel: if torch_compile: torch._dynamo.config.suppress_errors = True - pipe.transformer.to(device).to(memory_format=torch.channels_last) + pipe.transformer.to(memory_format=torch.channels_last) pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True) + if onediff: + from onediffx import compile_pipe, quantize_pipe + options = None + pipe = compile_pipe( + pipe, + backend="nexfort", + options=options, + ignores=["vae"], + fuse_qkv_projections=True, + ) + + pipeline = { "pipe": pipe, "dtype": dtype, - "base_path": base_path + "base_path": base_path, + "onediff": onediff } return (pipeline,) @@ -253,7 +270,10 @@ class CogVideoSampler: pipe.scheduler = CogVideoXDDIMScheduler.from_pretrained(base_path, subfolder="scheduler") elif scheduler == "DPM": pipe.scheduler = CogVideoXDPMScheduler.from_pretrained(base_path, subfolder="scheduler") - with torch.autocast(mm.get_autocast_device(device)): + + autocastcondition = not pipeline["onediff"] + autocast_context = torch.autocast(mm.get_autocast_device(device)) if autocastcondition else nullcontext() + with autocast_context: latents = pipeline["pipe"]( num_inference_steps=steps, height = height, diff --git a/readme.md b/readme.md index a601208..c0f975f 100644 --- a/readme.md +++ b/readme.md @@ -1,5 +1,16 @@ # WORK IN PROGRESS +## Updade2 + +Added **experimental** support for onediff, this reduced sampling time by ~30% for me, reaching 4.23 it/s on 4090 with 49 frames. +This requires using Linux, torch 2.4.0, onediff and nexfort installation: + +`pip install --pre onediff onediffx` + +`pip install nexfort` + +First run will take around 5 mins for the compilation. + ## Update 5b model is now also supported for basic text2vid: https://huggingface.co/THUDM/CogVideoX-5b