update

2026-07-09 19:27:05 +08:00 · 2024-11-11 01:19:11 +02:00 · 2024-11-11 01:19:11 +02:00 · ca63f5dade
commit ca63f5dade
parent 184097e78e
3 changed files with 7 additions and 3 deletions
--- a/custom_cogvideox_transformer_3d.py
+++ b/custom_cogvideox_transformer_3d.py
@ -571,11 +571,10 @@ class CogVideoXTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
        # 2. Patch embedding
        p = self.config.patch_size
        p_t = self.config.patch_size_t
        # We know that the hidden states height and width will always be divisible by patch_size.
        # But, the number of frames may not be divisible by patch_size_t. So, we pad with the beginning frames.
        if p_t is not None:
-            remaining_frames = p_t - num_frames % p_t
+            remaining_frames = 0 if num_frames % 2 == 0 else 1
            first_frame = hidden_states[:, :1].repeat(1, 1 + remaining_frames, 1, 1, 1)
            hidden_states = torch.cat([first_frame, hidden_states[:, 1:]], dim=1)
--- a/model_loading.py
+++ b/model_loading.py
@ -263,7 +263,8 @@ class DownloadAndLoadCogVideoModel:
                    pipe.set_adapters(adapter_list, adapter_weights=adapter_weights)
                if fuse:
                    lora_scale = 1
-                    if "dimensionx" in lora[-1]["path"].lower():
+                    dimension_loras = ["orbit_left_lora", "dimensionx"] # for now dimensionx loras need scaling
                    if any(item in lora[-1]["path"].lower() for item in dimension_loras):
                        lora_scale = lora_scale / lora_rank
                    pipe.fuse_lora(lora_scale=lora_scale, components=["transformer"])
--- a/nodes.py
+++ b/nodes.py
@ -828,6 +828,10 @@ class CogVideoSampler:
            num_frames == 49 or
            context_options is not None
        ), "1.0 I2V model can only do 49 frames"
        if image_cond_latents is not None:
            assert "I2V" in pipeline.get("model_name", ""), "Image condition latents only supported for I2V models"
        else:
            assert "I2V" not in pipeline.get("model_name", ""), "Image condition latents required for I2V models"
        device = mm.get_torch_device()
        offload_device = mm.unet_offload_device()