onediff support

2026-03-16 16:47:20 +08:00 · 2024-08-28 16:51:59 +03:00 · 2024-08-28 16:51:59 +03:00 · cf01dc2b0b
commit cf01dc2b0b
parent 4b7fdac56d
4 changed files with 39 additions and 8 deletions
--- a/examples/cogvideo_2b_vid2vid_test_example_02.json
+++ b/examples/cogvideo_2b_vid2vid_test_example_02.json
@ -819,7 +819,7 @@
        "frame_rate": 8,
        "loop_count": 0,
        "filename_prefix": "CogVideoX_vid2vid",
-        "format": "video/nvenc_h264-mp4",
+        "format": "video/h264-mp4",
        "pix_fmt": "yuv420p",
        "bitrate": 10,
        "megabit": true,
@ -833,7 +833,7 @@
            "filename": "AnimateDiff_00001.mp4",
            "subfolder": "",
            "type": "temp",
-            "format": "video/nvenc_h264-mp4",
+            "format": "video/h264-mp4",
            "frame_rate": 8
          }
        }
--- a/examples/cogvideox_5b_example_01.json
+++ b/examples/cogvideox_5b_example_01.json
@ -279,7 +279,7 @@
        "frame_rate": 8,
        "loop_count": 0,
        "filename_prefix": "CogVideoX5B",
-        "format": "video/nvenc_h264-mp4",
+        "format": "video/h264-mp4",
        "pix_fmt": "yuv420p",
        "bitrate": 10,
        "megabit": true,
@ -293,7 +293,7 @@
            "filename": "CogVideoX5B_00009.mp4",
            "subfolder": "",
            "type": "temp",
-            "format": "video/nvenc_h264-mp4",
+            "format": "video/h264-mp4",
            "frame_rate": 8
          },
          "muted": false
--- a/nodes.py
+++ b/nodes.py
@ -6,11 +6,14 @@ from comfy.utils import ProgressBar
 from diffusers.schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
 from diffusers.models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
 from .pipeline_cogvideox import CogVideoXPipeline
+from contextlib import nullcontext
+

 import logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 log = logging.getLogger(__name__)

+
 class DownloadAndLoadCogVideoModel:
    @classmethod
    def INPUT_TYPES(s):
@ -30,6 +33,7 @@ class DownloadAndLoadCogVideoModel:
                ),
                "fp8_transformer": ("BOOLEAN", {"default": False, "tooltip": "cast the transformer to torch.float8_e4m3fn"}),
                "torch_compile": ("BOOLEAN", {"default": False, "tooltip": "use torch.compile to speed up inference, Linux only"}),
+                "onediff": ("BOOLEAN", {"default": False, "tooltip": "use onediff/nexfort to speed up inference, requires onediff installed (Linux only)"}),
            }
        }

@ -38,7 +42,7 @@ class DownloadAndLoadCogVideoModel:
    FUNCTION = "loadmodel"
    CATEGORY = "CogVideoWrapper"

-    def loadmodel(self, model, precision, fp8_transformer, torch_compile):
+    def loadmodel(self, model, precision, fp8_transformer, torch_compile, onediff):
        device = mm.get_torch_device()
        offload_device = mm.unet_offload_device()
        mm.soft_empty_cache()
@ -72,13 +76,26 @@ class DownloadAndLoadCogVideoModel:

        if torch_compile:
            torch._dynamo.config.suppress_errors = True
-            pipe.transformer.to(device).to(memory_format=torch.channels_last)
+            pipe.transformer.to(memory_format=torch.channels_last)
            pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)

+        if onediff:
+            from onediffx import compile_pipe, quantize_pipe
+            options = None
+            pipe = compile_pipe(
+            pipe,
+            backend="nexfort",
+            options=options,
+            ignores=["vae"],
+            fuse_qkv_projections=True,
+            )
+
+
        pipeline = {
            "pipe": pipe,
            "dtype": dtype,
-            "base_path": base_path
+            "base_path": base_path,
+            "onediff": onediff
        }

        return (pipeline,)
@ -253,7 +270,10 @@ class CogVideoSampler:
            pipe.scheduler = CogVideoXDDIMScheduler.from_pretrained(base_path, subfolder="scheduler")
        elif scheduler == "DPM":
            pipe.scheduler = CogVideoXDPMScheduler.from_pretrained(base_path, subfolder="scheduler")
-        with torch.autocast(mm.get_autocast_device(device)):
+            
+        autocastcondition = not pipeline["onediff"]
+        autocast_context = torch.autocast(mm.get_autocast_device(device)) if autocastcondition else nullcontext()
+        with autocast_context:
            latents = pipeline["pipe"](
                num_inference_steps=steps,
                height = height,
--- a/readme.md
+++ b/readme.md
@ -1,5 +1,16 @@
 # WORK IN PROGRESS

+## Updade2
+
+Added **experimental** support for onediff, this reduced sampling time by ~30% for me, reaching 4.23 it/s on 4090 with 49 frames. 
+This requires using Linux, torch 2.4.0, onediff and nexfort installation:
+
+`pip install --pre onediff onediffx`
+
+`pip install nexfort`
+
+First run will take around 5 mins for the compilation.
+
 ## Update
 5b model is now also supported for basic text2vid: https://huggingface.co/THUDM/CogVideoX-5b