From 72c6ad996a2cacc8aa72ba5ab25765334a178e4b Mon Sep 17 00:00:00 2001 From: kijai <40791699+kijai@users.noreply.github.com> Date: Wed, 7 Aug 2024 12:01:10 +0300 Subject: [PATCH] Use main diffusers 0.30.0 --- examples/cogvideo_vid2vid_test_example_01.json | 4 ++-- examples/example_01.json | 2 +- nodes.py | 13 ++++++++----- readme.md | 4 +--- requirements.txt | 2 +- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/examples/cogvideo_vid2vid_test_example_01.json b/examples/cogvideo_vid2vid_test_example_01.json index fb4b6e5..2416c9d 100644 --- a/examples/cogvideo_vid2vid_test_example_01.json +++ b/examples/cogvideo_vid2vid_test_example_01.json @@ -130,7 +130,7 @@ "Node name for S&R": "CogVideoTextEncode" }, "widgets_values": [ - "bad quality video, blurry, messy" + "" ] }, { @@ -163,7 +163,7 @@ "Node name for S&R": "DownloadAndLoadCogVideoModel" }, "widgets_values": [ - "fp16" + "bf16" ] }, { diff --git a/examples/example_01.json b/examples/example_01.json index 1881508..29a854f 100644 --- a/examples/example_01.json +++ b/examples/example_01.json @@ -285,7 +285,7 @@ "Node name for S&R": "DownloadAndLoadCogVideoModel" }, "widgets_values": [ - "fp16" + "bf16" ] }, { diff --git a/nodes.py b/nodes.py index 9239618..dc3bf73 100644 --- a/nodes.py +++ b/nodes.py @@ -2,6 +2,7 @@ import os import torch import folder_paths import comfy.model_management as mm +from comfy.utils import ProgressBar from diffusers.schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler from diffusers.models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel from .pipeline_cogvideox import CogVideoXPipeline @@ -24,7 +25,7 @@ class DownloadAndLoadCogVideoModel: "fp32", "bf16", ], - {"default": "fp16"}, + {"default": "bf16"}, ), }, } @@ -286,14 +287,16 @@ class CogVideoDecode: latents = 1 / vae.config.scaling_factor * latents frames = [] + pbar = ProgressBar(num_seconds) for i in range(num_seconds): - # Whether or not to clear fake context parallel cache - fake_cp = i + 1 < num_seconds start_frame, end_frame = (0, 3) if i == 0 else (2 * i + 1, 2 * i + 3) - current_frames = vae.decode(latents[:, :, start_frame:end_frame], fake_cp=fake_cp).sample + current_frames = vae.decode(latents[:, :, start_frame:end_frame]).sample frames.append(current_frames) - mm.soft_empty_cache() + + pbar.update(1) + vae.clear_fake_context_parallel_cache() vae.to(offload_device) + mm.soft_empty_cache() frames = torch.cat(frames, dim=2) video = pipeline["pipe"].video_processor.postprocess_video(video=frames, output_type="pt") diff --git a/readme.md b/readme.md index e2f3c01..57b6b9a 100644 --- a/readme.md +++ b/readme.md @@ -1,8 +1,6 @@ # WORK IN PROGRESS -Currently requires diffusers with PR: https://github.com/huggingface/diffusers/pull/9082 - -This is specified in requirements.txt +Requires diffusers 0.30.0 (this is specified in requirements.txt) https://github.com/user-attachments/assets/9e41f37b-2bb3-411c-81fa-e91b80da2559 diff --git a/requirements.txt b/requirements.txt index a3887f2..a4e18bd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ huggingface_hub -git+https://github.com/huggingface/diffusers.git@878f609aa5ce4a78fea0f048726889debde1d7e8#egg=diffusers \ No newline at end of file +diffusers>=0.30.0 \ No newline at end of file