mirror of
https://git.datalinker.icu/kijai/ComfyUI-CogVideoXWrapper.git
synced 2026-01-25 03:34:20 +08:00
onediff support
This commit is contained in:
parent
4b7fdac56d
commit
cf01dc2b0b
@ -819,7 +819,7 @@
|
||||
"frame_rate": 8,
|
||||
"loop_count": 0,
|
||||
"filename_prefix": "CogVideoX_vid2vid",
|
||||
"format": "video/nvenc_h264-mp4",
|
||||
"format": "video/h264-mp4",
|
||||
"pix_fmt": "yuv420p",
|
||||
"bitrate": 10,
|
||||
"megabit": true,
|
||||
@ -833,7 +833,7 @@
|
||||
"filename": "AnimateDiff_00001.mp4",
|
||||
"subfolder": "",
|
||||
"type": "temp",
|
||||
"format": "video/nvenc_h264-mp4",
|
||||
"format": "video/h264-mp4",
|
||||
"frame_rate": 8
|
||||
}
|
||||
}
|
||||
|
||||
@ -279,7 +279,7 @@
|
||||
"frame_rate": 8,
|
||||
"loop_count": 0,
|
||||
"filename_prefix": "CogVideoX5B",
|
||||
"format": "video/nvenc_h264-mp4",
|
||||
"format": "video/h264-mp4",
|
||||
"pix_fmt": "yuv420p",
|
||||
"bitrate": 10,
|
||||
"megabit": true,
|
||||
@ -293,7 +293,7 @@
|
||||
"filename": "CogVideoX5B_00009.mp4",
|
||||
"subfolder": "",
|
||||
"type": "temp",
|
||||
"format": "video/nvenc_h264-mp4",
|
||||
"format": "video/h264-mp4",
|
||||
"frame_rate": 8
|
||||
},
|
||||
"muted": false
|
||||
|
||||
28
nodes.py
28
nodes.py
@ -6,11 +6,14 @@ from comfy.utils import ProgressBar
|
||||
from diffusers.schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
|
||||
from diffusers.models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
|
||||
from .pipeline_cogvideox import CogVideoXPipeline
|
||||
from contextlib import nullcontext
|
||||
|
||||
|
||||
import logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DownloadAndLoadCogVideoModel:
|
||||
@classmethod
|
||||
def INPUT_TYPES(s):
|
||||
@ -30,6 +33,7 @@ class DownloadAndLoadCogVideoModel:
|
||||
),
|
||||
"fp8_transformer": ("BOOLEAN", {"default": False, "tooltip": "cast the transformer to torch.float8_e4m3fn"}),
|
||||
"torch_compile": ("BOOLEAN", {"default": False, "tooltip": "use torch.compile to speed up inference, Linux only"}),
|
||||
"onediff": ("BOOLEAN", {"default": False, "tooltip": "use onediff/nexfort to speed up inference, requires onediff installed (Linux only)"}),
|
||||
}
|
||||
}
|
||||
|
||||
@ -38,7 +42,7 @@ class DownloadAndLoadCogVideoModel:
|
||||
FUNCTION = "loadmodel"
|
||||
CATEGORY = "CogVideoWrapper"
|
||||
|
||||
def loadmodel(self, model, precision, fp8_transformer, torch_compile):
|
||||
def loadmodel(self, model, precision, fp8_transformer, torch_compile, onediff):
|
||||
device = mm.get_torch_device()
|
||||
offload_device = mm.unet_offload_device()
|
||||
mm.soft_empty_cache()
|
||||
@ -72,13 +76,26 @@ class DownloadAndLoadCogVideoModel:
|
||||
|
||||
if torch_compile:
|
||||
torch._dynamo.config.suppress_errors = True
|
||||
pipe.transformer.to(device).to(memory_format=torch.channels_last)
|
||||
pipe.transformer.to(memory_format=torch.channels_last)
|
||||
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
|
||||
|
||||
if onediff:
|
||||
from onediffx import compile_pipe, quantize_pipe
|
||||
options = None
|
||||
pipe = compile_pipe(
|
||||
pipe,
|
||||
backend="nexfort",
|
||||
options=options,
|
||||
ignores=["vae"],
|
||||
fuse_qkv_projections=True,
|
||||
)
|
||||
|
||||
|
||||
pipeline = {
|
||||
"pipe": pipe,
|
||||
"dtype": dtype,
|
||||
"base_path": base_path
|
||||
"base_path": base_path,
|
||||
"onediff": onediff
|
||||
}
|
||||
|
||||
return (pipeline,)
|
||||
@ -253,7 +270,10 @@ class CogVideoSampler:
|
||||
pipe.scheduler = CogVideoXDDIMScheduler.from_pretrained(base_path, subfolder="scheduler")
|
||||
elif scheduler == "DPM":
|
||||
pipe.scheduler = CogVideoXDPMScheduler.from_pretrained(base_path, subfolder="scheduler")
|
||||
with torch.autocast(mm.get_autocast_device(device)):
|
||||
|
||||
autocastcondition = not pipeline["onediff"]
|
||||
autocast_context = torch.autocast(mm.get_autocast_device(device)) if autocastcondition else nullcontext()
|
||||
with autocast_context:
|
||||
latents = pipeline["pipe"](
|
||||
num_inference_steps=steps,
|
||||
height = height,
|
||||
|
||||
11
readme.md
11
readme.md
@ -1,5 +1,16 @@
|
||||
# WORK IN PROGRESS
|
||||
|
||||
## Updade2
|
||||
|
||||
Added **experimental** support for onediff, this reduced sampling time by ~30% for me, reaching 4.23 it/s on 4090 with 49 frames.
|
||||
This requires using Linux, torch 2.4.0, onediff and nexfort installation:
|
||||
|
||||
`pip install --pre onediff onediffx`
|
||||
|
||||
`pip install nexfort`
|
||||
|
||||
First run will take around 5 mins for the compilation.
|
||||
|
||||
## Update
|
||||
5b model is now also supported for basic text2vid: https://huggingface.co/THUDM/CogVideoX-5b
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user