mirror of
https://git.datalinker.icu/kijai/ComfyUI-CogVideoXWrapper.git
synced 2026-05-02 00:59:11 +08:00
onediff support
This commit is contained in:
parent
4b7fdac56d
commit
cf01dc2b0b
@ -819,7 +819,7 @@
|
|||||||
"frame_rate": 8,
|
"frame_rate": 8,
|
||||||
"loop_count": 0,
|
"loop_count": 0,
|
||||||
"filename_prefix": "CogVideoX_vid2vid",
|
"filename_prefix": "CogVideoX_vid2vid",
|
||||||
"format": "video/nvenc_h264-mp4",
|
"format": "video/h264-mp4",
|
||||||
"pix_fmt": "yuv420p",
|
"pix_fmt": "yuv420p",
|
||||||
"bitrate": 10,
|
"bitrate": 10,
|
||||||
"megabit": true,
|
"megabit": true,
|
||||||
@ -833,7 +833,7 @@
|
|||||||
"filename": "AnimateDiff_00001.mp4",
|
"filename": "AnimateDiff_00001.mp4",
|
||||||
"subfolder": "",
|
"subfolder": "",
|
||||||
"type": "temp",
|
"type": "temp",
|
||||||
"format": "video/nvenc_h264-mp4",
|
"format": "video/h264-mp4",
|
||||||
"frame_rate": 8
|
"frame_rate": 8
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -279,7 +279,7 @@
|
|||||||
"frame_rate": 8,
|
"frame_rate": 8,
|
||||||
"loop_count": 0,
|
"loop_count": 0,
|
||||||
"filename_prefix": "CogVideoX5B",
|
"filename_prefix": "CogVideoX5B",
|
||||||
"format": "video/nvenc_h264-mp4",
|
"format": "video/h264-mp4",
|
||||||
"pix_fmt": "yuv420p",
|
"pix_fmt": "yuv420p",
|
||||||
"bitrate": 10,
|
"bitrate": 10,
|
||||||
"megabit": true,
|
"megabit": true,
|
||||||
@ -293,7 +293,7 @@
|
|||||||
"filename": "CogVideoX5B_00009.mp4",
|
"filename": "CogVideoX5B_00009.mp4",
|
||||||
"subfolder": "",
|
"subfolder": "",
|
||||||
"type": "temp",
|
"type": "temp",
|
||||||
"format": "video/nvenc_h264-mp4",
|
"format": "video/h264-mp4",
|
||||||
"frame_rate": 8
|
"frame_rate": 8
|
||||||
},
|
},
|
||||||
"muted": false
|
"muted": false
|
||||||
|
|||||||
28
nodes.py
28
nodes.py
@ -6,11 +6,14 @@ from comfy.utils import ProgressBar
|
|||||||
from diffusers.schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
|
from diffusers.schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
|
||||||
from diffusers.models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
|
from diffusers.models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
|
||||||
from .pipeline_cogvideox import CogVideoXPipeline
|
from .pipeline_cogvideox import CogVideoXPipeline
|
||||||
|
from contextlib import nullcontext
|
||||||
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class DownloadAndLoadCogVideoModel:
|
class DownloadAndLoadCogVideoModel:
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def INPUT_TYPES(s):
|
||||||
@ -30,6 +33,7 @@ class DownloadAndLoadCogVideoModel:
|
|||||||
),
|
),
|
||||||
"fp8_transformer": ("BOOLEAN", {"default": False, "tooltip": "cast the transformer to torch.float8_e4m3fn"}),
|
"fp8_transformer": ("BOOLEAN", {"default": False, "tooltip": "cast the transformer to torch.float8_e4m3fn"}),
|
||||||
"torch_compile": ("BOOLEAN", {"default": False, "tooltip": "use torch.compile to speed up inference, Linux only"}),
|
"torch_compile": ("BOOLEAN", {"default": False, "tooltip": "use torch.compile to speed up inference, Linux only"}),
|
||||||
|
"onediff": ("BOOLEAN", {"default": False, "tooltip": "use onediff/nexfort to speed up inference, requires onediff installed (Linux only)"}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -38,7 +42,7 @@ class DownloadAndLoadCogVideoModel:
|
|||||||
FUNCTION = "loadmodel"
|
FUNCTION = "loadmodel"
|
||||||
CATEGORY = "CogVideoWrapper"
|
CATEGORY = "CogVideoWrapper"
|
||||||
|
|
||||||
def loadmodel(self, model, precision, fp8_transformer, torch_compile):
|
def loadmodel(self, model, precision, fp8_transformer, torch_compile, onediff):
|
||||||
device = mm.get_torch_device()
|
device = mm.get_torch_device()
|
||||||
offload_device = mm.unet_offload_device()
|
offload_device = mm.unet_offload_device()
|
||||||
mm.soft_empty_cache()
|
mm.soft_empty_cache()
|
||||||
@ -72,13 +76,26 @@ class DownloadAndLoadCogVideoModel:
|
|||||||
|
|
||||||
if torch_compile:
|
if torch_compile:
|
||||||
torch._dynamo.config.suppress_errors = True
|
torch._dynamo.config.suppress_errors = True
|
||||||
pipe.transformer.to(device).to(memory_format=torch.channels_last)
|
pipe.transformer.to(memory_format=torch.channels_last)
|
||||||
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
|
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
|
||||||
|
|
||||||
|
if onediff:
|
||||||
|
from onediffx import compile_pipe, quantize_pipe
|
||||||
|
options = None
|
||||||
|
pipe = compile_pipe(
|
||||||
|
pipe,
|
||||||
|
backend="nexfort",
|
||||||
|
options=options,
|
||||||
|
ignores=["vae"],
|
||||||
|
fuse_qkv_projections=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
pipeline = {
|
pipeline = {
|
||||||
"pipe": pipe,
|
"pipe": pipe,
|
||||||
"dtype": dtype,
|
"dtype": dtype,
|
||||||
"base_path": base_path
|
"base_path": base_path,
|
||||||
|
"onediff": onediff
|
||||||
}
|
}
|
||||||
|
|
||||||
return (pipeline,)
|
return (pipeline,)
|
||||||
@ -253,7 +270,10 @@ class CogVideoSampler:
|
|||||||
pipe.scheduler = CogVideoXDDIMScheduler.from_pretrained(base_path, subfolder="scheduler")
|
pipe.scheduler = CogVideoXDDIMScheduler.from_pretrained(base_path, subfolder="scheduler")
|
||||||
elif scheduler == "DPM":
|
elif scheduler == "DPM":
|
||||||
pipe.scheduler = CogVideoXDPMScheduler.from_pretrained(base_path, subfolder="scheduler")
|
pipe.scheduler = CogVideoXDPMScheduler.from_pretrained(base_path, subfolder="scheduler")
|
||||||
with torch.autocast(mm.get_autocast_device(device)):
|
|
||||||
|
autocastcondition = not pipeline["onediff"]
|
||||||
|
autocast_context = torch.autocast(mm.get_autocast_device(device)) if autocastcondition else nullcontext()
|
||||||
|
with autocast_context:
|
||||||
latents = pipeline["pipe"](
|
latents = pipeline["pipe"](
|
||||||
num_inference_steps=steps,
|
num_inference_steps=steps,
|
||||||
height = height,
|
height = height,
|
||||||
|
|||||||
11
readme.md
11
readme.md
@ -1,5 +1,16 @@
|
|||||||
# WORK IN PROGRESS
|
# WORK IN PROGRESS
|
||||||
|
|
||||||
|
## Updade2
|
||||||
|
|
||||||
|
Added **experimental** support for onediff, this reduced sampling time by ~30% for me, reaching 4.23 it/s on 4090 with 49 frames.
|
||||||
|
This requires using Linux, torch 2.4.0, onediff and nexfort installation:
|
||||||
|
|
||||||
|
`pip install --pre onediff onediffx`
|
||||||
|
|
||||||
|
`pip install nexfort`
|
||||||
|
|
||||||
|
First run will take around 5 mins for the compilation.
|
||||||
|
|
||||||
## Update
|
## Update
|
||||||
5b model is now also supported for basic text2vid: https://huggingface.co/THUDM/CogVideoX-5b
|
5b model is now also supported for basic text2vid: https://huggingface.co/THUDM/CogVideoX-5b
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user