Merge branch 'main' into add-5b

2026-05-18 20:56:59 +08:00 · 2024-08-27 09:25:52 -05:00 · 2024-08-27 09:25:52 -05:00 · dc9de70259
commit dc9de70259
parent cea711c84d 0c8b0eb184
4 changed files with 600 additions and 82 deletions
--- a/examples/cogvideox_5b_example_01.json
+++ b/examples/cogvideox_5b_example_01.json
@ -0,0 +1,415 @@
 {
  "last_node_id": 33,
  "last_link_id": 59,
  "nodes": [
    {
      "id": 20,
      "type": "CLIPLoader",
      "pos": [
        -59,
        397
      ],
      "size": {
        "0": 451.30548095703125,
        "1": 82
      },
      "flags": {},
      "order": 0,
      "mode": 0,
      "outputs": [
        {
          "name": "CLIP",
          "type": "CLIP",
          "links": [
            54,
            56
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CLIPLoader"
      },
      "widgets_values": [
        "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
        "sd3"
      ]
    },
    {
      "id": 31,
      "type": "CogVideoTextEncode",
      "pos": [
        503,
        521
      ],
      "size": {
        "0": 463.01251220703125,
        "1": 98.10446166992188
      },
      "flags": {},
      "order": 3,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 56
        }
      ],
      "outputs": [
        {
          "name": "conditioning",
          "type": "CONDITIONING",
          "links": [
            57
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoTextEncode"
      },
      "widgets_values": [
        ""
      ]
    },
    {
      "id": 11,
      "type": "CogVideoDecode",
      "pos": [
        1140,
        783
      ],
      "size": {
        "0": 210,
        "1": 46
      },
      "flags": {},
      "order": 5,
      "mode": 0,
      "inputs": [
        {
          "name": "pipeline",
          "type": "COGVIDEOPIPE",
          "link": 37
        },
        {
          "name": "samples",
          "type": "LATENT",
          "link": 38
        }
      ],
      "outputs": [
        {
          "name": "images",
          "type": "IMAGE",
          "links": [
            59
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoDecode"
      }
    },
    {
      "id": 1,
      "type": "DownloadAndLoadCogVideoModel",
      "pos": [
        649,
        182
      ],
      "size": {
        "0": 315,
        "1": 82
      },
      "flags": {},
      "order": 1,
      "mode": 0,
      "outputs": [
        {
          "name": "cogvideo_pipe",
          "type": "COGVIDEOPIPE",
          "links": [
            36
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "DownloadAndLoadCogVideoModel"
      },
      "widgets_values": [
        "THUDM/CogVideoX-5b",
        "bf16"
      ]
    },
    {
      "id": 22,
      "type": "CogVideoSampler",
      "pos": [
        1041,
        342
      ],
      "size": {
        "0": 315,
        "1": 382
      },
      "flags": {},
      "order": 4,
      "mode": 0,
      "inputs": [
        {
          "name": "pipeline",
          "type": "COGVIDEOPIPE",
          "link": 36
        },
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 55,
          "slot_index": 1
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 57
        },
        {
          "name": "samples",
          "type": "LATENT",
          "link": null
        }
      ],
      "outputs": [
        {
          "name": "cogvideo_pipe",
          "type": "COGVIDEOPIPE",
          "links": [
            37
          ],
          "shape": 3
        },
        {
          "name": "samples",
          "type": "LATENT",
          "links": [
            38
          ],
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoSampler"
      },
      "widgets_values": [
        480,
        720,
        49,
        8,
        50,
        7,
        806286757407563,
        "fixed",
        "DPM",
        49,
        8,
        1
      ]
    },
    {
      "id": 33,
      "type": "VHS_VideoCombine",
      "pos": [
        1533,
        136
      ],
      "size": [
        778.7022705078125,
        853.801513671875
      ],
      "flags": {},
      "order": 6,
      "mode": 0,
      "inputs": [
        {
          "name": "images",
          "type": "IMAGE",
          "link": 59
        },
        {
          "name": "audio",
          "type": "AUDIO",
          "link": null
        },
        {
          "name": "meta_batch",
          "type": "VHS_BatchManager",
          "link": null
        },
        {
          "name": "vae",
          "type": "VAE",
          "link": null
        }
      ],
      "outputs": [
        {
          "name": "Filenames",
          "type": "VHS_FILENAMES",
          "links": null,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "VHS_VideoCombine"
      },
      "widgets_values": {
        "frame_rate": 8,
        "loop_count": 0,
        "filename_prefix": "CogVideoX5B",
        "format": "video/nvenc_h264-mp4",
        "pix_fmt": "yuv420p",
        "bitrate": 10,
        "megabit": true,
        "save_metadata": true,
        "pingpong": false,
        "save_output": false,
        "videopreview": {
          "hidden": false,
          "paused": false,
          "params": {
            "filename": "CogVideoX5B.mp4",
            "subfolder": "",
            "type": "temp",
            "format": "video/nvenc_h264-mp4",
            "frame_rate": 8
          },
          "muted": false
        }
      }
    },
    {
      "id": 30,
      "type": "CogVideoTextEncode",
      "pos": [
        500,
        308
      ],
      "size": {
        "0": 474.8450012207031,
        "1": 164.7423553466797
      },
      "flags": {},
      "order": 2,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 54
        }
      ],
      "outputs": [
        {
          "name": "conditioning",
          "type": "CONDITIONING",
          "links": [
            55
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoTextEncode"
      },
      "widgets_values": [
        "The camera follows behind a white vintage SUV with a black roof rack as it speeds up a steep dirt road surrounded by pine trees on a steep mountain slope, dust kicks up from its tires, the sunlight shines on the SUV as it speeds along the dirt road, casting a warm glow over the scene. The dirt road curves gently into the distance, with no other cars or vehicles in sight. The trees on either side of the road are redwoods, with patches of greenery scattered throughout. The car is seen from the rear following the curve with ease, making it seem as if it is on a rugged drive through the rugged terrain. The dirt road itself is surrounded by steep hills and mountains, with a clear blue sky above with wispy clouds.\n"
      ]
    }
  ],
  "links": [
    [
      36,
      1,
      0,
      22,
      0,
      "COGVIDEOPIPE"
    ],
    [
      37,
      22,
      0,
      11,
      0,
      "COGVIDEOPIPE"
    ],
    [
      38,
      22,
      1,
      11,
      1,
      "LATENT"
    ],
    [
      54,
      20,
      0,
      30,
      0,
      "CLIP"
    ],
    [
      55,
      30,
      0,
      22,
      1,
      "CONDITIONING"
    ],
    [
      56,
      20,
      0,
      31,
      0,
      "CLIP"
    ],
    [
      57,
      31,
      0,
      22,
      2,
      "CONDITIONING"
    ],
    [
      59,
      11,
      0,
      33,
      0,
      "IMAGE"
    ]
  ],
  "groups": [],
  "config": {},
  "extra": {
    "ds": {
      "scale": 0.7513148009015782,
      "offset": [
        106.37225000664994,
        78.14886929032406
      ]
    }
  },
  "version": 0.4
 }
--- a/nodes.py
+++ b/nodes.py
@ -48,7 +48,10 @@ class DownloadAndLoadCogVideoModel:
        dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[precision]
-        base_path = os.path.join(folder_paths.models_dir, "CogVideo", "CogVideo2B")
+        if "2b" in model:
            base_path = os.path.join(folder_paths.models_dir, "CogVideo", "CogVideo2B")
        elif "5b" in model:
            base_path = os.path.join(folder_paths.models_dir, "CogVideo", "CogVideoX-5b")
        if not os.path.exists(base_path):
            log.info(f"Downloading model to: {base_path}")
@ -205,14 +208,14 @@ class CogVideoSampler:
                "negative": ("CONDITIONING", ),
                "height": ("INT", {"default": 480, "min": 128, "max": 2048, "step": 8}),
                "width": ("INT", {"default": 720, "min": 128, "max": 2048, "step": 8}),
-                "num_frames": ("INT", {"default": 48, "min": 8, "max": 1024, "step": 8}),
+                "num_frames": ("INT", {"default": 48, "min": 8, "max": 1024, "step": 1}),
                "fps": ("INT", {"default": 8, "min": 1, "max": 100, "step": 1}),
                "steps": ("INT", {"default": 25, "min": 1}),
                "cfg": ("FLOAT", {"default": 6.0, "min": 0.0, "max": 30.0, "step": 0.01}),
                "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
                "scheduler": (["DDIM", "DPM"],),
-                "t_tile_length": ("INT", {"default": 16, "min": 16, "max": 128, "step": 4}),
+                "t_tile_length": ("INT", {"default": 16, "min": 2, "max": 128, "step": 1}),
-                "t_tile_overlap": ("INT", {"default": 8, "min": 8, "max": 128, "step": 2}),
+                "t_tile_overlap": ("INT", {"default": 8, "min": 2, "max": 128, "step": 1}),
            },
            "optional": {
                "samples": ("LATENT", ),
@ -282,10 +285,10 @@ class CogVideoDecode:
    RETURN_TYPES = ("IMAGE",)
    RETURN_NAMES = ("images",)
-    FUNCTION = "process"
+    FUNCTION = "decode"
    CATEGORY = "CogVideoWrapper"
-    def process(self, pipeline, samples):
+    def decode(self, pipeline, samples):
        device = mm.get_torch_device()
        offload_device = mm.unet_offload_device()
        latents = samples["samples"]
@ -305,19 +308,20 @@ class CogVideoDecode:
        frames = []
        pbar = ProgressBar(num_seconds)
-        for i in range(num_seconds):
+        # for i in range(num_seconds):
-            start_frame, end_frame = (0, 3) if i == 0 else (2 * i + 1, 2 * i + 3)
+        #     start_frame, end_frame = (0, 3) if i == 0 else (2 * i + 1, 2 * i + 3)
-            current_frames = vae.decode(latents[:, :, start_frame:end_frame]).sample
+        #     current_frames = vae.decode(latents[:, :, start_frame:end_frame]).sample
-            frames.append(current_frames)
+        #     frames.append(current_frames)
-            pbar.update(1)
+        #     pbar.update(1)
-        vae.clear_fake_context_parallel_cache()
+        frames = vae.decode(latents).sample
        vae.to(offload_device)
        mm.soft_empty_cache()
-        frames = torch.cat(frames, dim=2)
+        #frames = torch.cat(frames, dim=2)
        video = pipeline["pipe"].video_processor.postprocess_video(video=frames, output_type="pt")
        video = video[0].permute(0, 2, 3, 1).cpu().float()
        print(video.min(), video.max())
        return (video,)
--- a/pipeline_cogvideox.py
+++ b/pipeline_cogvideox.py
@ -17,6 +17,7 @@ import inspect
 from typing import Callable, Dict, List, Optional, Tuple, Union
 import torch
 import math
 from diffusers.models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
 from diffusers.pipelines.pipeline_utils import DiffusionPipeline
@ -24,11 +25,29 @@ from diffusers.schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
 from diffusers.utils import logging
 from diffusers.utils.torch_utils import randn_tensor
 from diffusers.video_processor import VideoProcessor
 from diffusers.models.embeddings import get_3d_rotary_pos_embed
 from comfy.utils import ProgressBar
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 def get_resize_crop_region_for_grid(src, tgt_width, tgt_height):
    tw = tgt_width
    th = tgt_height
    h, w = src
    r = h / w
    if r > (th / tw):
        resize_height = th
        resize_width = int(round(th / h * w))
    else:
        resize_width = tw
        resize_height = int(round(tw / w * h))
    crop_top = int(round((th - resize_height) / 2.0))
    crop_left = int(round((tw - resize_width) / 2.0))
    return (crop_top, crop_left), (crop_top + resize_height, crop_left + resize_width)
 # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.retrieve_timesteps
 def retrieve_timesteps(
    scheduler,
@ -229,6 +248,46 @@ class CogVideoXPipeline(DiffusionPipeline):
        weights = weights.unsqueeze(0).unsqueeze(2).unsqueeze(3).unsqueeze(4).repeat(1, t_batch_size,1, 1, 1)
        return weights
    def fuse_qkv_projections(self) -> None:
        r"""Enables fused QKV projections."""
        self.fusing_transformer = True
        self.transformer.fuse_qkv_projections()
    def unfuse_qkv_projections(self) -> None:
        r"""Disable QKV projection fusion if enabled."""
        if not self.fusing_transformer:
            logger.warning("The Transformer was not initially fused for QKV projections. Doing nothing.")
        else:
            self.transformer.unfuse_qkv_projections()
            self.fusing_transformer = False
    def _prepare_rotary_positional_embeddings(
        self,
        height: int,
        width: int,
        num_frames: int,
        device: torch.device,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        grid_height = height // (self.vae_scale_factor_spatial * self.transformer.config.patch_size)
        grid_width = width // (self.vae_scale_factor_spatial * self.transformer.config.patch_size)
        base_size_width = 720 // (self.vae_scale_factor_spatial * self.transformer.config.patch_size)
        base_size_height = 480 // (self.vae_scale_factor_spatial * self.transformer.config.patch_size)
        grid_crops_coords = get_resize_crop_region_for_grid(
            (grid_height, grid_width), base_size_width, base_size_height
        )
        freqs_cos, freqs_sin = get_3d_rotary_pos_embed(
            embed_dim=self.transformer.config.attention_head_dim,
            crops_coords=grid_crops_coords,
            grid_size=(grid_height, grid_width),
            temporal_size=num_frames,
            use_real=True,
        )
        freqs_cos = freqs_cos.to(device=device)
        freqs_sin = freqs_sin.to(device=device)
        return freqs_cos, freqs_sin
    @property
    def guidance_scale(self):
        return self._guidance_scale
@ -374,6 +433,15 @@ class CogVideoXPipeline(DiffusionPipeline):
        t_tile_weights = self._gaussian_weights(t_tile_length=t_tile_length, t_batch_size=1).to(latents.device).to(latents.dtype)
        print("latents.shape", latents.shape)
        print("latents.device", latents.device)
        # 6.5. Create rotary embeds if required
        image_rotary_emb = (
            self._prepare_rotary_positional_embeddings(height, width, latents.size(1), device)
            if self.transformer.config.use_rotary_positional_embeddings
            else None
        )
        # 7. Denoising loop
        num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
        comfy_pbar = ProgressBar(num_inference_steps)
@ -383,94 +451,125 @@ class CogVideoXPipeline(DiffusionPipeline):
            for i, t in enumerate(timesteps):
                if self.interrupt:
                    continue
                if not isinstance(self.scheduler, CogVideoXDPMScheduler):
                    #temporal tiling code based on https://github.com/mayuelala/FollowYourEmoji/blob/main/models/video_pipeline.py
                    # =====================================================
                    grid_ts = 0
                    cur_t = 0
                    while cur_t < latents.shape[1]:
                        cur_t = max(grid_ts * t_tile_length - t_tile_overlap * grid_ts, 0) + t_tile_length
                        grid_ts += 1
-                #temporal tiling code based on https://github.com/mayuelala/FollowYourEmoji/blob/main/models/video_pipeline.py
+                    all_t = latents.shape[1]
-                # =====================================================
+                    latents_all_list = []
-                grid_ts = 0
+                    # =====================================================
                cur_t = 0
                while cur_t < latents.shape[1]:
                    cur_t = max(grid_ts * t_tile_length - t_tile_overlap * grid_ts, 0) + t_tile_length
                    grid_ts += 1
-                all_t = latents.shape[1]
+                    for t_i in range(grid_ts):
-                latents_all_list = []
+                        if t_i < grid_ts - 1:
-                # =====================================================
+                            ofs_t = max(t_i * t_tile_length - t_tile_overlap * t_i, 0)
                        if t_i == grid_ts - 1:
                            ofs_t = all_t - t_tile_length
-                for t_i in range(grid_ts):
+                        input_start_t = ofs_t
-                    if t_i < grid_ts - 1:
+                        input_end_t = ofs_t + t_tile_length
                        ofs_t = max(t_i * t_tile_length - t_tile_overlap * t_i, 0)
                    if t_i == grid_ts - 1:
                        ofs_t = all_t - t_tile_length
-                    input_start_t = ofs_t
+                        #latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
-                    input_end_t = ofs_t + t_tile_length
+                        #latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
-                    #latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
+                        latents_tile = latents[:, input_start_t:input_end_t,:, :, :]
-                    #latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+                        latent_model_input_tile = torch.cat([latents_tile] * 2) if do_classifier_free_guidance else latents_tile
                        latent_model_input_tile = self.scheduler.scale_model_input(latent_model_input_tile, t)
-                    latents_tile = latents[:, input_start_t:input_end_t,:, :, :]
+                        #t_input = t[None].to(device)
-                    latent_model_input_tile = torch.cat([latents_tile] * 2) if do_classifier_free_guidance else latents_tile
+                        t_input = t.expand(latent_model_input_tile.shape[0]) # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
                    latent_model_input_tile = self.scheduler.scale_model_input(latent_model_input_tile, t)
-                    #t_input = t[None].to(device)
+                        # predict noise model_output
-                    t_input = t.expand(latent_model_input_tile.shape[0]) # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
+                        noise_pred = self.transformer(
                            hidden_states=latent_model_input_tile,
                            encoder_hidden_states=prompt_embeds,
                            timestep=t_input,
                            image_rotary_emb=image_rotary_emb,
                            return_dict=False,
                        )[0]
                        noise_pred = noise_pred.float()                  
                        if self.do_classifier_free_guidance:
                            noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
                            noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
                        # compute the previous noisy sample x_t -> x_t-1
                        latents_tile = self.scheduler.step(noise_pred, t, latents_tile, **extra_step_kwargs, return_dict=False)[0]            
                        latents_all_list.append(latents_tile)
                    # ==========================================
                    latents_all = torch.zeros(latents.shape, device=latents.device, dtype=latents.dtype)
                    contributors = torch.zeros(latents.shape, device=latents.device, dtype=latents.dtype)
                    # Add each tile contribution to overall latents
                    for t_i in range(grid_ts):
                        if t_i < grid_ts - 1:
                            ofs_t = max(t_i * t_tile_length - t_tile_overlap * t_i, 0)
                        if t_i == grid_ts - 1:
                            ofs_t = all_t - t_tile_length
                        input_start_t = ofs_t
                        input_end_t = ofs_t + t_tile_length
                        latents_all[:, input_start_t:input_end_t,:, :, :] += latents_all_list[t_i] * t_tile_weights
                        contributors[:, input_start_t:input_end_t,:, :, :] += t_tile_weights
                    latents_all /= contributors
                    latents = latents_all
                    if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
                        progress_bar.update()
                        comfy_pbar.update(1)
                    # ==========================================
                else:
                    latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
                    latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
                    # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
                    timestep = t.expand(latent_model_input.shape[0])
                    # predict noise model_output
                    noise_pred = self.transformer(
-                        hidden_states=latent_model_input_tile,
+                        hidden_states=latent_model_input,
                        encoder_hidden_states=prompt_embeds,
-                        timestep=t_input,
+                        timestep=timestep,
                        image_rotary_emb=image_rotary_emb,
                        return_dict=False,
                    )[0]
                    noise_pred = noise_pred.float()
-                    if self.do_classifier_free_guidance:
+                   
                    self._guidance_scale = 1 + guidance_scale * (
                        (1 - math.cos(math.pi * ((num_inference_steps - t.item()) / num_inference_steps) ** 5.0)) / 2
                    )
                    if do_classifier_free_guidance:
                        noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
                        noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
                    # compute the previous noisy sample x_t -> x_t-1
                    if not isinstance(self.scheduler, CogVideoXDPMScheduler):
-                        latents_tile = self.scheduler.step(noise_pred, t, latents_tile, **extra_step_kwargs, return_dict=False)[0]
+                        latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
                    else:
-                        raise NotImplementedError("DPM is not supported with temporal tiling")
+                        latents, old_pred_original_sample = self.scheduler.step(
-                    # else:
+                            noise_pred,
-                    #     latents_tile, old_pred_original_sample = self.scheduler.step(
+                            old_pred_original_sample,
-                    #         noise_pred,
+                            t,
-                    #         old_pred_original_sample,
+                            timesteps[i - 1] if i > 0 else None,
-                    #         t,
+                            latents,
-                    #         t_input[t_i - 1] if t_i > 0 else None,
+                            **extra_step_kwargs,
-                    #         latents_tile,
+                            return_dict=False,
-                    #         **extra_step_kwargs,
+                        )
-                    #         return_dict=False,
+                    latents = latents.to(prompt_embeds.dtype)
                    #     )
-                    latents_all_list.append(latents_tile)
+                    if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
                        progress_bar.update()
                        comfy_pbar.update(1)
                # ==========================================
                latents_all = torch.zeros(latents.shape, device=latents.device, dtype=latents.dtype)
                contributors = torch.zeros(latents.shape, device=latents.device, dtype=latents.dtype)
                # Add each tile contribution to overall latents
                for t_i in range(grid_ts):
                    if t_i < grid_ts - 1:
                        ofs_t = max(t_i * t_tile_length - t_tile_overlap * t_i, 0)
                    if t_i == grid_ts - 1:
                        ofs_t = all_t - t_tile_length
                    input_start_t = ofs_t
                    input_end_t = ofs_t + t_tile_length
                    latents_all[:, input_start_t:input_end_t,:, :, :] += latents_all_list[t_i] * t_tile_weights
                    contributors[:, input_start_t:input_end_t,:, :, :] += t_tile_weights
                latents_all /= contributors
                latents = latents_all
                # ==========================================
                if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
                    progress_bar.update()
                    comfy_pbar.update(1)
        # Offload all models
        self.maybe_free_model_hooks()
--- a/requirements.txt
+++ b/requirements.txt
@ -1,2 +1,2 @@
 huggingface_hub
-diffusers>=0.30.0
+diffusers>=0.30.1
`@ -1,2 +1,2 @@`
	`huggingface_hub`	`huggingface_hub`
	`diffusers>=0.30.0`	`diffusers>=0.30.1`