Update workflows, fix controlnet

2026-07-31 14:05:41 +08:00 · 2024-11-19 15:23:38 +02:00 · 2024-11-19 15:23:38 +02:00 · 128f89c4d2
commit 128f89c4d2
parent a7646c0d6f
16 changed files with 6627 additions and 7832 deletions
--- a/custom_cogvideox_transformer_3d.py
+++ b/custom_cogvideox_transformer_3d.py
@ -610,29 +610,29 @@ class CogVideoXTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
        if self.fastercache_counter >= self.fastercache_start_step + 3 and self.fastercache_counter % 5 !=0:
            # 3. Transformer blocks
            for i, block in enumerate(self.transformer_blocks):
-                    hidden_states, encoder_hidden_states = block(
+                hidden_states, encoder_hidden_states = block(
-                        hidden_states=hidden_states[:1],
+                    hidden_states=hidden_states[:1],
-                        encoder_hidden_states=encoder_hidden_states[:1],
+                    encoder_hidden_states=encoder_hidden_states[:1],
-                        temb=emb[:1],
+                    temb=emb[:1],
-                        image_rotary_emb=image_rotary_emb,
+                    image_rotary_emb=image_rotary_emb,
-                        video_flow_feature=video_flow_features[i][:1] if video_flow_features is not None else None,
+                    video_flow_feature=video_flow_features[i][:1] if video_flow_features is not None else None,
-                        fuser = self.fuser_list[i] if self.fuser_list is not None else None,
+                    fuser = self.fuser_list[i] if self.fuser_list is not None else None,
-                        block_use_fastercache = i <= self.fastercache_num_blocks_to_cache,
+                    block_use_fastercache = i <= self.fastercache_num_blocks_to_cache,
-                        fastercache_counter = self.fastercache_counter,
+                    fastercache_counter = self.fastercache_counter,
-                        fastercache_start_step = self.fastercache_start_step,
+                    fastercache_start_step = self.fastercache_start_step,
-                        fastercache_device = self.fastercache_device,
+                    fastercache_device = self.fastercache_device,
-                        attention_mode = self.attention_mode
+                    attention_mode = self.attention_mode
-                    )
+                )
-                    if (controlnet_states is not None) and (i < len(controlnet_states)):
+                if (controlnet_states is not None) and (i < len(controlnet_states)):
-                        controlnet_states_block = controlnet_states[i]
+                    controlnet_states_block = controlnet_states[i]
-                        controlnet_block_weight = 1.0
+                    controlnet_block_weight = 1.0
-                        if isinstance(controlnet_weights, (list, np.ndarray)) or torch.is_tensor(controlnet_weights):
+                    if isinstance(controlnet_weights, (list, np.ndarray)) or torch.is_tensor(controlnet_weights):
-                            controlnet_block_weight = controlnet_weights[i]
+                        controlnet_block_weight = controlnet_weights[i]
-                        elif isinstance(controlnet_weights, (float, int)):
+                    elif isinstance(controlnet_weights, (float, int)):
-                            controlnet_block_weight = controlnet_weights
+                        controlnet_block_weight = controlnet_weights
-                        hidden_states = hidden_states + controlnet_states_block * controlnet_block_weight
+                    hidden_states = hidden_states + controlnet_states_block * controlnet_block_weight
            if not self.config.use_rotary_positional_embeddings:
                # CogVideoX-2B
@ -698,15 +698,16 @@ class CogVideoXTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
                #if has_nan:
                #    raise ValueError(f"block output hidden_states has nan: {has_nan}")
-            if (controlnet_states is not None) and (i < len(controlnet_states)):
+                #controlnet
-                controlnet_states_block = controlnet_states[i]
+                if (controlnet_states is not None) and (i < len(controlnet_states)):
-                controlnet_block_weight = 1.0
+                    controlnet_states_block = controlnet_states[i]
-                if isinstance(controlnet_weights, (list, np.ndarray)) or torch.is_tensor(controlnet_weights):
+                    controlnet_block_weight = 1.0
-                    controlnet_block_weight = controlnet_weights[i]
+                    if isinstance(controlnet_weights, (list, np.ndarray)) or torch.is_tensor(controlnet_weights):
-                elif isinstance(controlnet_weights, (float, int)):
+                        controlnet_block_weight = controlnet_weights[i]
-                    controlnet_block_weight = controlnet_weights
+                        print(controlnet_block_weight)
-                
+                    elif isinstance(controlnet_weights, (float, int)):
-                hidden_states = hidden_states + controlnet_states_block * controlnet_block_weight
+                        controlnet_block_weight = controlnet_weights                    
                    hidden_states = hidden_states + controlnet_states_block * controlnet_block_weight
            if not self.config.use_rotary_positional_embeddings:
                # CogVideoX-2B
--- a/examples/cogvideo_2b_context_schedule_test_01.json
+++ b/examples/cogvideo_2b_context_schedule_test_01.json
@ -1,561 +0,0 @@
 {
  "last_node_id": 34,
  "last_link_id": 61,
  "nodes": [
    {
      "id": 33,
      "type": "GetImageSizeAndCount",
      "pos": {
        "0": 1176,
        "1": 122
      },
      "size": {
        "0": 210,
        "1": 86
      },
      "flags": {},
      "order": 7,
      "mode": 0,
      "inputs": [
        {
          "name": "image",
          "type": "IMAGE",
          "link": 59
        }
      ],
      "outputs": [
        {
          "name": "image",
          "type": "IMAGE",
          "links": [
            60
          ],
          "slot_index": 0,
          "shape": 3
        },
        {
          "name": "720 width",
          "type": "INT",
          "links": null,
          "shape": 3
        },
        {
          "name": "480 height",
          "type": "INT",
          "links": null,
          "shape": 3
        },
        {
          "name": "104 count",
          "type": "INT",
          "links": null,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "GetImageSizeAndCount"
      },
      "widgets_values": []
    },
    {
      "id": 30,
      "type": "CogVideoTextEncode",
      "pos": {
        "0": 500,
        "1": 308
      },
      "size": [
        474.8035864085422,
        211.10369504535595
      ],
      "flags": {},
      "order": 3,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 54
        }
      ],
      "outputs": [
        {
          "name": "conditioning",
          "type": "CONDITIONING",
          "links": [
            55
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoTextEncode"
      },
      "widgets_values": [
        "A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. The panda's fluffy paws strum a miniature\nacoustic guitar, producing soft, melodic tunes. Nearby, a few other pandas gather, watching curiously and some clapping in rhythm. Sunlight filters\nthrough the tall bamboo, casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. The\nbackground includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical atmosphere of this unique musical\nperformance.",
        1,
        true
      ]
    },
    {
      "id": 31,
      "type": "CogVideoTextEncode",
      "pos": {
        "0": 508,
        "1": 576
      },
      "size": {
        "0": 463.01251220703125,
        "1": 124
      },
      "flags": {},
      "order": 4,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 56
        }
      ],
      "outputs": [
        {
          "name": "conditioning",
          "type": "CONDITIONING",
          "links": [
            57
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoTextEncode"
      },
      "widgets_values": [
        "",
        1,
        true
      ]
    },
    {
      "id": 20,
      "type": "CLIPLoader",
      "pos": {
        "0": -37,
        "1": 443
      },
      "size": {
        "0": 451.30548095703125,
        "1": 82
      },
      "flags": {},
      "order": 0,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "CLIP",
          "type": "CLIP",
          "links": [
            54,
            56
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CLIPLoader"
      },
      "widgets_values": [
        "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
        "sd3"
      ]
    },
    {
      "id": 11,
      "type": "CogVideoDecode",
      "pos": {
        "0": 1045,
        "1": 776
      },
      "size": {
        "0": 295.70111083984375,
        "1": 198
      },
      "flags": {},
      "order": 6,
      "mode": 0,
      "inputs": [
        {
          "name": "pipeline",
          "type": "COGVIDEOPIPE",
          "link": 37
        },
        {
          "name": "samples",
          "type": "LATENT",
          "link": 38
        }
      ],
      "outputs": [
        {
          "name": "images",
          "type": "IMAGE",
          "links": [
            59
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoDecode"
      },
      "widgets_values": [
        true,
        96,
        96,
        0.083,
        0.083,
        true
      ]
    },
    {
      "id": 1,
      "type": "DownloadAndLoadCogVideoModel",
      "pos": {
        "0": 652,
        "1": 43
      },
      "size": {
        "0": 315,
        "1": 194
      },
      "flags": {},
      "order": 1,
      "mode": 0,
      "inputs": [
        {
          "name": "pab_config",
          "type": "PAB_CONFIG",
          "link": null
        },
        {
          "name": "block_edit",
          "type": "TRANSFORMERBLOCKS",
          "link": null
        },
        {
          "name": "lora",
          "type": "COGLORA",
          "link": null
        }
      ],
      "outputs": [
        {
          "name": "cogvideo_pipe",
          "type": "COGVIDEOPIPE",
          "links": [
            36
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "DownloadAndLoadCogVideoModel"
      },
      "widgets_values": [
        "THUDM/CogVideoX-2b",
        "fp16",
        "enabled",
        "disabled",
        false
      ]
    },
    {
      "id": 32,
      "type": "VHS_VideoCombine",
      "pos": {
        "0": 1439,
        "1": 122
      },
      "size": [
        563.3333740234375,
        686.2222493489583
      ],
      "flags": {},
      "order": 8,
      "mode": 0,
      "inputs": [
        {
          "name": "images",
          "type": "IMAGE",
          "link": 60,
          "slot_index": 0
        },
        {
          "name": "audio",
          "type": "VHS_AUDIO",
          "link": null
        },
        {
          "name": "meta_batch",
          "type": "VHS_BatchManager",
          "link": null
        },
        {
          "name": "vae",
          "type": "VAE",
          "link": null
        }
      ],
      "outputs": [
        {
          "name": "Filenames",
          "type": "VHS_FILENAMES",
          "links": null,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "VHS_VideoCombine"
      },
      "widgets_values": {
        "frame_rate": 8,
        "loop_count": 0,
        "filename_prefix": "CogVideo2B_long",
        "format": "video/h264-mp4",
        "pix_fmt": "yuv420p",
        "crf": 19,
        "save_metadata": true,
        "pingpong": false,
        "save_output": false,
        "videopreview": {
          "hidden": false,
          "paused": false,
          "params": {
            "filename": "CogVideo2B_long_00005.mp4",
            "subfolder": "",
            "type": "temp",
            "format": "video/h264-mp4",
            "frame_rate": 8
          }
        }
      }
    },
    {
      "id": 34,
      "type": "CogVideoContextOptions",
      "pos": {
        "0": 1053,
        "1": -84
      },
      "size": {
        "0": 315,
        "1": 154
      },
      "flags": {},
      "order": 2,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "context_options",
          "type": "COGCONTEXT",
          "links": [
            61
          ],
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoContextOptions"
      },
      "widgets_values": [
        "uniform_standard",
        52,
        4,
        8,
        true
      ]
    },
    {
      "id": 22,
      "type": "CogVideoSampler",
      "pos": {
        "0": 1041,
        "1": 342
      },
      "size": {
        "0": 315,
        "1": 382
      },
      "flags": {},
      "order": 5,
      "mode": 0,
      "inputs": [
        {
          "name": "pipeline",
          "type": "COGVIDEOPIPE",
          "link": 36
        },
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 55,
          "slot_index": 1
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 57
        },
        {
          "name": "samples",
          "type": "LATENT",
          "link": null
        },
        {
          "name": "image_cond_latents",
          "type": "LATENT",
          "link": null
        },
        {
          "name": "context_options",
          "type": "COGCONTEXT",
          "link": 61
        }
      ],
      "outputs": [
        {
          "name": "cogvideo_pipe",
          "type": "COGVIDEOPIPE",
          "links": [
            37
          ],
          "shape": 3
        },
        {
          "name": "samples",
          "type": "LATENT",
          "links": [
            38
          ],
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoSampler"
      },
      "widgets_values": [
        480,
        720,
        104,
        32,
        6,
        42,
        "fixed",
        "CogVideoXDDIM",
        1
      ]
    }
  ],
  "links": [
    [
      36,
      1,
      0,
      22,
      0,
      "COGVIDEOPIPE"
    ],
    [
      37,
      22,
      0,
      11,
      0,
      "COGVIDEOPIPE"
    ],
    [
      38,
      22,
      1,
      11,
      1,
      "LATENT"
    ],
    [
      54,
      20,
      0,
      30,
      0,
      "CLIP"
    ],
    [
      55,
      30,
      0,
      22,
      1,
      "CONDITIONING"
    ],
    [
      56,
      20,
      0,
      31,
      0,
      "CLIP"
    ],
    [
      57,
      31,
      0,
      22,
      2,
      "CONDITIONING"
    ],
    [
      59,
      11,
      0,
      33,
      0,
      "IMAGE"
    ],
    [
      60,
      33,
      0,
      32,
      0,
      "IMAGE"
    ],
    [
      61,
      34,
      0,
      22,
      5,
      "COGCONTEXT"
    ]
  ],
  "groups": [],
  "config": {},
  "extra": {
    "ds": {
      "scale": 0.8390545288825444,
      "offset": [
        -14.198557467892236,
        144.90015432747748
      ]
    }
  },
  "version": 0.4
 }
--- a/examples/cogvideox_2b_controlnet_example_01.json
+++ b/examples/cogvideox_2b_controlnet_example_01.json
--- a/examples/cogvideox_I2V_example_01.json
+++ b/examples/cogvideox_I2V_example_01.json
@ -1,42 +1,7 @@
 {
-  "last_node_id": 58,
+  "last_node_id": 63,
-  "last_link_id": 129,
+  "last_link_id": 149,
  "nodes": [
    {
      "id": 20,
      "type": "CLIPLoader",
      "pos": {
        "0": -26,
        "1": 400
      },
      "size": {
        "0": 451.30548095703125,
        "1": 82
      },
      "flags": {},
      "order": 0,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "CLIP",
          "type": "CLIP",
          "links": [
            54,
            56
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CLIPLoader"
      },
      "widgets_values": [
        "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
        "sd3"
      ]
    },
    {
      "id": 31,
      "type": "CogVideoTextEncode",
@ -46,16 +11,16 @@
      },
      "size": {
        "0": 463.01251220703125,
-        "1": 124
+        "1": 144
      },
      "flags": {},
-      "order": 4,
+      "order": 6,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
-          "link": 56
+          "link": 149
        }
      ],
      "outputs": [
@ -63,10 +28,15 @@
          "name": "conditioning",
          "type": "CONDITIONING",
          "links": [
-            123
+            146
          ],
          "slot_index": 0,
          "shape": 3
        },
        {
          "name": "clip",
          "type": "CLIP",
          "links": null
        }
      ],
      "properties": {
@ -78,6 +48,208 @@
        true
      ]
    },
    {
      "id": 63,
      "type": "CogVideoSampler",
      "pos": {
        "0": 1142,
        "1": 74
      },
      "size": [
        330,
        574
      ],
      "flags": {},
      "order": 7,
      "mode": 0,
      "inputs": [
        {
          "name": "model",
          "type": "COGVIDEOMODEL",
          "link": 144
        },
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 145
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 146
        },
        {
          "name": "samples",
          "type": "LATENT",
          "link": null,
          "shape": 7
        },
        {
          "name": "image_cond_latents",
          "type": "LATENT",
          "link": 147,
          "shape": 7
        },
        {
          "name": "context_options",
          "type": "COGCONTEXT",
          "link": null,
          "shape": 7
        },
        {
          "name": "controlnet",
          "type": "COGVIDECONTROLNET",
          "link": null,
          "shape": 7
        },
        {
          "name": "tora_trajectory",
          "type": "TORAFEATURES",
          "link": null,
          "shape": 7
        },
        {
          "name": "fastercache",
          "type": "FASTERCACHEARGS",
          "link": null,
          "shape": 7
        }
      ],
      "outputs": [
        {
          "name": "samples",
          "type": "LATENT",
          "links": [
            148
          ]
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoSampler"
      },
      "widgets_values": [
        49,
        25,
        6,
        0,
        "fixed",
        "CogVideoXDDIM",
        1
      ]
    },
    {
      "id": 62,
      "type": "CogVideoImageEncode",
      "pos": {
        "0": 1149,
        "1": 711
      },
      "size": {
        "0": 315,
        "1": 122
      },
      "flags": {},
      "order": 5,
      "mode": 0,
      "inputs": [
        {
          "name": "vae",
          "type": "VAE",
          "link": 141
        },
        {
          "name": "start_image",
          "type": "IMAGE",
          "link": 142
        },
        {
          "name": "end_image",
          "type": "IMAGE",
          "link": null,
          "shape": 7
        }
      ],
      "outputs": [
        {
          "name": "samples",
          "type": "LATENT",
          "links": [
            147
          ]
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoImageEncode"
      },
      "widgets_values": [
        false,
        0
      ]
    },
    {
      "id": 59,
      "type": "DownloadAndLoadCogVideoModel",
      "pos": {
        "0": 622,
        "1": -25
      },
      "size": {
        "0": 315,
        "1": 218
      },
      "flags": {},
      "order": 0,
      "mode": 0,
      "inputs": [
        {
          "name": "block_edit",
          "type": "TRANSFORMERBLOCKS",
          "link": null,
          "shape": 7
        },
        {
          "name": "lora",
          "type": "COGLORA",
          "link": null,
          "shape": 7
        },
        {
          "name": "compile_args",
          "type": "COMPILEARGS",
          "link": null,
          "shape": 7
        }
      ],
      "outputs": [
        {
          "name": "model",
          "type": "COGVIDEOMODEL",
          "links": [
            144
          ]
        },
        {
          "name": "vae",
          "type": "VAE",
          "links": [
            132,
            141
          ],
          "slot_index": 1
        }
      ],
      "properties": {
        "Node name for S&R": "DownloadAndLoadCogVideoModel"
      },
      "widgets_values": [
        "THUDM/CogVideoX-5b-I2V",
        "bf16",
        "disabled",
        false,
        "sdpa",
        "main_device"
      ]
    },
    {
      "id": 30,
      "type": "CogVideoTextEncode",
@ -90,7 +262,7 @@
        "1": 168.08047485351562
      },
      "flags": {},
-      "order": 3,
+      "order": 4,
      "mode": 0,
      "inputs": [
        {
@ -104,10 +276,18 @@
          "name": "conditioning",
          "type": "CONDITIONING",
          "links": [
-            122
+            145
          ],
          "slot_index": 0,
          "shape": 3
        },
        {
          "name": "clip",
          "type": "CLIP",
          "links": [
            149
          ],
          "slot_index": 1
        }
      ],
      "properties": {
@ -116,22 +296,22 @@
      "widgets_values": [
        "a majestic stag is grazing in an enhanced forest, basking in the setting sun filtered by the trees",
        1,
-        true
+        false
      ]
    },
    {
      "id": 37,
      "type": "ImageResizeKJ",
      "pos": {
-        "0": 809,
+        "0": 784,
-        "1": 684
+        "1": 731
      },
      "size": {
        "0": 315,
        "1": 266
      },
      "flags": {},
-      "order": 5,
+      "order": 3,
      "mode": 0,
      "inputs": [
        {
@ -142,7 +322,8 @@
        {
          "name": "get_image_size",
          "type": "IMAGE",
-          "link": null
+          "link": null,
          "shape": 7
        },
        {
          "name": "width_input",
@ -166,7 +347,7 @@
          "name": "IMAGE",
          "type": "IMAGE",
          "links": [
-            125
+            142
          ],
          "slot_index": 0,
          "shape": 3
@ -199,64 +380,88 @@
      ]
    },
    {
-      "id": 58,
+      "id": 36,
-      "type": "CogVideoImageEncode",
+      "type": "LoadImage",
      "pos": {
-        "0": 1156,
+        "0": 335,
-        "1": 650
+        "1": 731
      },
      "size": {
-        "0": 315,
+        "0": 402.06353759765625,
-        "1": 122
+        "1": 396.6225891113281
      },
      "flags": {},
-      "order": 6,
+      "order": 1,
      "mode": 0,
-      "inputs": [
+      "inputs": [],
        {
          "name": "pipeline",
          "type": "COGVIDEOPIPE",
          "link": 124
        },
        {
          "name": "image",
          "type": "IMAGE",
          "link": 125
        },
        {
          "name": "mask",
          "type": "MASK",
          "link": null
        }
      ],
      "outputs": [
        {
-          "name": "samples",
+          "name": "IMAGE",
-          "type": "LATENT",
+          "type": "IMAGE",
          "links": [
-            129
+            71
          ],
          "slot_index": 0,
          "shape": 3
        },
        {
          "name": "MASK",
          "type": "MASK",
          "links": null,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "LoadImage"
      },
      "widgets_values": [
        "sd3stag.png",
        "image"
      ]
    },
    {
      "id": 20,
      "type": "CLIPLoader",
      "pos": {
        "0": -2,
        "1": 304
      },
      "size": {
        "0": 451.30548095703125,
        "1": 82
      },
      "flags": {},
      "order": 2,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "CLIP",
          "type": "CLIP",
          "links": [
            54
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
-        "Node name for S&R": "CogVideoImageEncode"
+        "Node name for S&R": "CLIPLoader"
      },
      "widgets_values": [
-        16,
+        "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
-        true
+        "sd3"
      ]
    },
    {
-      "id": 56,
+      "id": 60,
      "type": "CogVideoDecode",
      "pos": {
-        "0": 1581,
+        "0": 1523,
-        "1": 148
+        "1": -6
      },
      "size": {
-        "0": 300.396484375,
+        "0": 315,
        "1": 198
      },
      "flags": {},
@ -264,14 +469,14 @@
      "mode": 0,
      "inputs": [
        {
-          "name": "pipeline",
+          "name": "vae",
-          "type": "COGVIDEOPIPE",
+          "type": "VAE",
-          "link": 128
+          "link": 132
        },
        {
          "name": "samples",
          "type": "LATENT",
-          "link": 127
+          "link": 148
        }
      ],
      "outputs": [
@ -279,17 +484,15 @@
          "name": "images",
          "type": "IMAGE",
          "links": [
-            118
+            134
-          ],
+          ]
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoDecode"
      },
      "widgets_values": [
-        false,
+        true,
        240,
        360,
        0.2,
@ -301,8 +504,8 @@
      "id": 44,
      "type": "VHS_VideoCombine",
      "pos": {
-        "0": 1927,
+        "0": 1884,
-        "1": 146
+        "1": -6
      },
      "size": [
        605.3909912109375,
@ -315,22 +518,25 @@
        {
          "name": "images",
          "type": "IMAGE",
-          "link": 118
+          "link": 134
        },
        {
          "name": "audio",
          "type": "AUDIO",
-          "link": null
+          "link": null,
          "shape": 7
        },
        {
          "name": "meta_batch",
          "type": "VHS_BatchManager",
-          "link": null
+          "link": null,
          "shape": 7
        },
        {
          "name": "vae",
          "type": "VAE",
-          "link": null
+          "link": null,
          "shape": 7
        }
      ],
      "outputs": [
@ -367,180 +573,6 @@
          "muted": false
        }
      }
    },
    {
      "id": 36,
      "type": "LoadImage",
      "pos": {
        "0": 365,
        "1": 685
      },
      "size": {
        "0": 402.06353759765625,
        "1": 396.6225891113281
      },
      "flags": {},
      "order": 1,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "IMAGE",
          "type": "IMAGE",
          "links": [
            71
          ],
          "slot_index": 0,
          "shape": 3
        },
        {
          "name": "MASK",
          "type": "MASK",
          "links": null,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "LoadImage"
      },
      "widgets_values": [
        "sd3stag.png",
        "image"
      ]
    },
    {
      "id": 57,
      "type": "CogVideoSampler",
      "pos": {
        "0": 1138,
        "1": 150
      },
      "size": [
        399.878095897654,
        350
      ],
      "flags": {},
      "order": 7,
      "mode": 0,
      "inputs": [
        {
          "name": "pipeline",
          "type": "COGVIDEOPIPE",
          "link": 121
        },
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 122
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 123
        },
        {
          "name": "samples",
          "type": "LATENT",
          "link": null
        },
        {
          "name": "image_cond_latents",
          "type": "LATENT",
          "link": 129
        },
        {
          "name": "context_options",
          "type": "COGCONTEXT",
          "link": null
        }
      ],
      "outputs": [
        {
          "name": "cogvideo_pipe",
          "type": "COGVIDEOPIPE",
          "links": [
            128
          ],
          "slot_index": 0,
          "shape": 3
        },
        {
          "name": "samples",
          "type": "LATENT",
          "links": [
            127
          ],
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoSampler"
      },
      "widgets_values": [
        480,
        720,
        49,
        20,
        6,
        65334758276105,
        "fixed",
        "CogVideoXDPMScheduler",
        1
      ]
    },
    {
      "id": 1,
      "type": "DownloadAndLoadCogVideoModel",
      "pos": {
        "0": 633,
        "1": 44
      },
      "size": {
        "0": 337.8885192871094,
        "1": 194
      },
      "flags": {},
      "order": 2,
      "mode": 0,
      "inputs": [
        {
          "name": "pab_config",
          "type": "PAB_CONFIG",
          "link": null
        },
        {
          "name": "block_edit",
          "type": "TRANSFORMERBLOCKS",
          "link": null
        },
        {
          "name": "lora",
          "type": "COGLORA",
          "link": null
        }
      ],
      "outputs": [
        {
          "name": "cogvideo_pipe",
          "type": "COGVIDEOPIPE",
          "links": [
            121,
            124
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "DownloadAndLoadCogVideoModel"
      },
      "widgets_values": [
        "THUDM/CogVideoX-5b-I2V",
        "bf16",
        "disabled",
        "disabled",
        false
      ]
    }
  ],
  "links": [
@ -552,14 +584,6 @@
      0,
      "CLIP"
    ],
    [
      56,
      20,
      0,
      31,
      0,
      "CLIP"
    ],
    [
      71,
      36,
@ -569,86 +593,94 @@
      "IMAGE"
    ],
    [
-      118,
+      132,
-      56,
+      59,
      1,
      60,
      0,
      "VAE"
    ],
    [
      134,
      60,
      0,
      44,
      0,
      "IMAGE"
    ],
    [
-      121,
+      141,
      59,
      1,
      62,
      0,
-      57,
+      "VAE"
      0,
      "COGVIDEOPIPE"
    ],
    [
-      122,
+      142,
      30,
      0,
      57,
      1,
      "CONDITIONING"
    ],
    [
      123,
      31,
      0,
      57,
      2,
      "CONDITIONING"
    ],
    [
      124,
      1,
      0,
      58,
      0,
      "COGVIDEOPIPE"
    ],
    [
      125,
      37,
      0,
-      58,
+      62,
      1,
      "IMAGE"
    ],
    [
-      127,
+      144,
-      57,
+      59,
-      1,
+      0,
-      56,
+      63,
-      1,
+      0,
-      "LATENT"
+      "COGVIDEOMODEL"
    ],
    [
-      128,
+      145,
-      57,
+      30,
      0,
-      56,
+      63,
-      0,
+      1,
-      "COGVIDEOPIPE"
+      "CONDITIONING"
    ],
    [
-      129,
+      146,
-      58,
+      31,
      0,
-      57,
+      63,
      2,
      "CONDITIONING"
    ],
    [
      147,
      62,
      0,
      63,
      4,
      "LATENT"
    ],
    [
      148,
      63,
      0,
      60,
      1,
      "LATENT"
    ],
    [
      149,
      30,
      1,
      31,
      0,
      "CLIP"
    ]
  ],
  "groups": [],
  "config": {},
  "extra": {
    "ds": {
-      "scale": 0.6934334949442514,
+      "scale": 0.7627768444387059,
      "offset": [
-        -24.154349208343916,
+        648.7113591814891,
-        155.20539218330134
+        185.9907078691075
      ]
    }
  },
--- a/examples/cogvideox_1_0_5b_I2V_Tora_02.json
+++ b/examples/cogvideox_1_0_5b_I2V_Tora_02.json
--- a/examples/cogvideox_1_0_5b_T2V_02.json
+++ b/examples/cogvideox_1_0_5b_T2V_02.json
@ -1,48 +1,7 @@
 {
-  "last_node_id": 34,
+  "last_node_id": 37,
-  "last_link_id": 64,
+  "last_link_id": 72,
  "nodes": [
    {
      "id": 31,
      "type": "CogVideoTextEncode",
      "pos": {
        "0": 503,
        "1": 521
      },
      "size": {
        "0": 463.01251220703125,
        "1": 124
      },
      "flags": {},
      "order": 3,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 56
        }
      ],
      "outputs": [
        {
          "name": "conditioning",
          "type": "CONDITIONING",
          "links": [
            62
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoTextEncode"
      },
      "widgets_values": [
        "",
        1,
        true
      ]
    },
    {
      "id": 30,
      "type": "CogVideoTextEncode",
@ -50,12 +9,12 @@
        "0": 500,
        "1": 308
      },
-      "size": {
+      "size": [
-        "0": 471.90142822265625,
+        470.99399664051055,
-        "1": 168.08047485351562
+        237.5088638951354
-      },
+      ],
      "flags": {},
-      "order": 2,
+      "order": 3,
      "mode": 0,
      "inputs": [
        {
@ -69,10 +28,18 @@
          "name": "conditioning",
          "type": "CONDITIONING",
          "links": [
-            61
+            67
          ],
          "slot_index": 0,
          "shape": 3
        },
        {
          "name": "clip",
          "type": "CLIP",
          "links": [
            65
          ],
          "slot_index": 1
        }
      ],
      "properties": {
@ -81,192 +48,79 @@
      "widgets_values": [
        "A golden retriever, sporting sleek black sunglasses, with its lengthy fur flowing in the breeze, sprints playfully across a rooftop terrace, recently refreshed by a light rain. The scene unfolds from a distance, the dog's energetic bounds growing larger as it approaches the camera, its tail wagging with unrestrained joy, while droplets of water glisten on the concrete behind it. The overcast sky provides a dramatic backdrop, emphasizing the vibrant golden coat of the canine as it dashes towards the viewer.\n\n",
        1,
-        true
+        false
      ]
    },
    {
-      "id": 33,
+      "id": 31,
-      "type": "VHS_VideoCombine",
+      "type": "CogVideoTextEncode",
      "pos": {
-        "0": 1441,
+        "0": 503,
-        "1": 129
+        "1": 602
      },
      "size": [
-        778.7022705078125,
+        464.4980515341475,
-        310
+        169.87479027400514
      ],
      "flags": {},
-      "order": 6,
+      "order": 4,
      "mode": 0,
      "inputs": [
        {
-          "name": "images",
+          "name": "clip",
          "type": "IMAGE",
          "link": 59
        },
        {
          "name": "audio",
          "type": "AUDIO",
          "link": null
        },
        {
          "name": "meta_batch",
          "type": "VHS_BatchManager",
          "link": null
        },
        {
          "name": "vae",
          "type": "VAE",
          "link": null
        }
      ],
      "outputs": [
        {
          "name": "Filenames",
          "type": "VHS_FILENAMES",
          "links": null,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "VHS_VideoCombine"
      },
      "widgets_values": {
        "frame_rate": 8,
        "loop_count": 0,
        "filename_prefix": "CogVideoX5B",
        "format": "video/h264-mp4",
        "pix_fmt": "yuv420p",
        "crf": 19,
        "save_metadata": true,
        "pingpong": false,
        "save_output": false,
        "videopreview": {
          "hidden": false,
          "paused": false,
          "params": {
            "filename": "CogVideoX5B_00009.mp4",
            "subfolder": "",
            "type": "temp",
            "format": "video/h264-mp4",
            "frame_rate": 8
          },
          "muted": false
        }
      }
    },
    {
      "id": 20,
      "type": "CLIPLoader",
      "pos": {
        "0": -26,
        "1": 400
      },
      "size": {
        "0": 451.30548095703125,
        "1": 82
      },
      "flags": {},
      "order": 0,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "CLIP",
          "type": "CLIP",
-          "links": [
+          "link": 65
            54,
            56
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CLIPLoader"
      },
      "widgets_values": [
        "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
        "sd3"
      ]
    },
    {
      "id": 1,
      "type": "DownloadAndLoadCogVideoModel",
      "pos": {
        "0": 642,
        "1": 90
      },
      "size": {
        "0": 315,
        "1": 194
      },
      "flags": {},
      "order": 1,
      "mode": 0,
      "inputs": [
        {
          "name": "pab_config",
          "type": "PAB_CONFIG",
          "link": null
        },
        {
          "name": "block_edit",
          "type": "TRANSFORMERBLOCKS",
          "link": null
        },
        {
          "name": "lora",
          "type": "COGLORA",
          "link": null
        }
      ],
      "outputs": [
        {
-          "name": "cogvideo_pipe",
+          "name": "conditioning",
-          "type": "COGVIDEOPIPE",
+          "type": "CONDITIONING",
          "links": [
-            60
+            68
          ],
          "slot_index": 0,
          "shape": 3
        },
        {
          "name": "clip",
          "type": "CLIP",
          "links": null
        }
      ],
      "properties": {
-        "Node name for S&R": "DownloadAndLoadCogVideoModel"
+        "Node name for S&R": "CogVideoTextEncode"
      },
      "widgets_values": [
-        "THUDM/CogVideoX-5b",
+        "",
-        "bf16",
+        1,
-        "disabled",
+        true
        "disabled",
        false
      ]
    },
    {
      "id": 11,
      "type": "CogVideoDecode",
      "pos": {
-        "0": 1051,
+        "0": 1416,
-        "1": 748
+        "1": 40
      },
      "size": {
        "0": 300.396484375,
        "1": 198
      },
      "flags": {},
-      "order": 5,
+      "order": 6,
      "mode": 0,
      "inputs": [
        {
-          "name": "pipeline",
+          "name": "vae",
-          "type": "COGVIDEOPIPE",
+          "type": "VAE",
-          "link": 63
+          "link": 71
        },
        {
          "name": "samples",
          "type": "LATENT",
-          "link": 64
+          "link": 69
        }
      ],
      "outputs": [
@ -293,83 +147,297 @@
      ]
    },
    {
-      "id": 34,
+      "id": 36,
-      "type": "CogVideoSampler",
+      "type": "DownloadAndLoadCogVideoModel",
      "pos": {
-        "0": 1041,
+        "0": 645,
-        "1": 342
+        "1": 17
      },
      "size": {
-        "0": 315.8404846191406,
+        "0": 315,
-        "1": 358
+        "1": 218
      },
      "flags": {},
-      "order": 4,
+      "order": 0,
      "mode": 0,
      "inputs": [
        {
-          "name": "pipeline",
+          "name": "block_edit",
-          "type": "COGVIDEOPIPE",
+          "type": "TRANSFORMERBLOCKS",
-          "link": 60
+          "link": null,
          "shape": 7
        },
        {
-          "name": "positive",
+          "name": "lora",
-          "type": "CONDITIONING",
+          "type": "COGLORA",
-          "link": 61
+          "link": null,
          "shape": 7
        },
        {
-          "name": "negative",
+          "name": "compile_args",
-          "type": "CONDITIONING",
+          "type": "COMPILEARGS",
-          "link": 62
+          "link": null,
-        },
+          "shape": 7
        {
          "name": "samples",
          "type": "LATENT",
          "link": null
        },
        {
          "name": "image_cond_latents",
          "type": "LATENT",
          "link": null
        },
        {
          "name": "context_options",
          "type": "COGCONTEXT",
          "link": null
        }
      ],
      "outputs": [
        {
-          "name": "cogvideo_pipe",
+          "name": "model",
-          "type": "COGVIDEOPIPE",
+          "type": "COGVIDEOMODEL",
          "links": [
-            63
+            70
          ]
        },
        {
          "name": "vae",
          "type": "VAE",
          "links": [
            71
          ],
          "slot_index": 1
        }
      ],
      "properties": {
        "Node name for S&R": "DownloadAndLoadCogVideoModel"
      },
      "widgets_values": [
        "THUDM/CogVideoX-5b",
        "bf16",
        "disabled",
        false,
        "sdpa",
        "main_device"
      ]
    },
    {
      "id": 20,
      "type": "CLIPLoader",
      "pos": {
        "0": 5,
        "1": 308
      },
      "size": {
        "0": 451.30548095703125,
        "1": 82
      },
      "flags": {},
      "order": 1,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "CLIP",
          "type": "CLIP",
          "links": [
            54
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CLIPLoader"
      },
      "widgets_values": [
        "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
        "sd3"
      ]
    },
    {
      "id": 37,
      "type": "EmptyLatentImage",
      "pos": {
        "0": 643,
        "1": 827
      },
      "size": {
        "0": 315,
        "1": 106
      },
      "flags": {},
      "order": 2,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "links": [
            72
          ]
        }
      ],
      "properties": {
        "Node name for S&R": "EmptyLatentImage"
      },
      "widgets_values": [
        720,
        480,
        1
      ]
    },
    {
      "id": 35,
      "type": "CogVideoSampler",
      "pos": {
        "0": 1042,
        "1": 291
      },
      "size": [
        330,
        574
      ],
      "flags": {},
      "order": 5,
      "mode": 0,
      "inputs": [
        {
          "name": "model",
          "type": "COGVIDEOMODEL",
          "link": 70
        },
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 67
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 68
        },
        {
          "name": "samples",
          "type": "LATENT",
          "link": 72,
          "shape": 7
        },
        {
          "name": "image_cond_latents",
          "type": "LATENT",
          "link": null,
          "shape": 7
        },
        {
          "name": "context_options",
          "type": "COGCONTEXT",
          "link": null,
          "shape": 7
        },
        {
          "name": "controlnet",
          "type": "COGVIDECONTROLNET",
          "link": null,
          "shape": 7
        },
        {
          "name": "tora_trajectory",
          "type": "TORAFEATURES",
          "link": null,
          "shape": 7
        },
        {
          "name": "fastercache",
          "type": "FASTERCACHEARGS",
          "link": null,
          "shape": 7
        }
      ],
      "outputs": [
        {
          "name": "samples",
          "type": "LATENT",
          "links": [
-            64
+            69
-          ],
+          ]
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoSampler"
      },
      "widgets_values": [
        480,
        720,
        49,
        50,
        6,
-        806286757407563,
+        0,
        "fixed",
-        "DPM++",
+        "CogVideoXDDIM",
        1
      ]
    },
    {
      "id": 33,
      "type": "VHS_VideoCombine",
      "pos": {
        "0": 1767,
        "1": 39
      },
      "size": [
        778.7022705078125,
        829.801513671875
      ],
      "flags": {},
      "order": 7,
      "mode": 0,
      "inputs": [
        {
          "name": "images",
          "type": "IMAGE",
          "link": 59
        },
        {
          "name": "audio",
          "type": "AUDIO",
          "link": null,
          "shape": 7
        },
        {
          "name": "meta_batch",
          "type": "VHS_BatchManager",
          "link": null,
          "shape": 7
        },
        {
          "name": "vae",
          "type": "VAE",
          "link": null,
          "shape": 7
        }
      ],
      "outputs": [
        {
          "name": "Filenames",
          "type": "VHS_FILENAMES",
          "links": null,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "VHS_VideoCombine"
      },
      "widgets_values": {
        "frame_rate": 8,
        "loop_count": 0,
        "filename_prefix": "CogVideoX5B-T2V",
        "format": "video/h264-mp4",
        "pix_fmt": "yuv420p",
        "crf": 19,
        "save_metadata": true,
        "pingpong": false,
        "save_output": false,
        "videopreview": {
          "hidden": false,
          "paused": false,
          "params": {
            "filename": "CogVideoX5B_00001.mp4",
            "subfolder": "",
            "type": "temp",
            "format": "video/h264-mp4",
            "frame_rate": 8
          },
          "muted": false
        }
      }
    }
  ],
  "links": [
@ -381,14 +449,6 @@
      0,
      "CLIP"
    ],
    [
      56,
      20,
      0,
      31,
      0,
      "CLIP"
    ],
    [
      59,
      11,
@ -398,43 +458,59 @@
      "IMAGE"
    ],
    [
-      60,
+      65,
      30,
      1,
      31,
      0,
-      34,
+      "CLIP"
      0,
      "COGVIDEOPIPE"
    ],
    [
-      61,
+      67,
      30,
      0,
-      34,
+      35,
      1,
      "CONDITIONING"
    ],
    [
-      62,
+      68,
      31,
      0,
-      34,
+      35,
      2,
      "CONDITIONING"
    ],
    [
-      63,
+      69,
-      34,
+      35,
      0,
      11,
-      0,
+      1,
-      "COGVIDEOPIPE"
+      "LATENT"
    ],
    [
-      64,
+      70,
-      34,
+      36,
      0,
      35,
      0,
      "COGVIDEOMODEL"
    ],
    [
      71,
      36,
      1,
      11,
-      1,
+      0,
      "VAE"
    ],
    [
      72,
      37,
      0,
      35,
      3,
      "LATENT"
    ]
  ],
@ -442,10 +518,10 @@
  "config": {},
  "extra": {
    "ds": {
-      "scale": 0.6934334949442514,
+      "scale": 0.7627768444387061,
      "offset": [
-        -24.154349208343916,
+        734.1791945221892,
-        155.20539218330134
+        237.29437844909364
      ]
    }
  },
--- a/examples/cogvideox_interpolation_example_01.json
+++ b/examples/cogvideox_interpolation_example_01.json
--- a/examples/cogvideo_5b_vid2vid_example_01.json
+++ b/examples/cogvideo_5b_vid2vid_example_01.json
--- a/examples/cogvideox_5b_Tora_I2V_testing_01.json
+++ b/examples/cogvideox_5b_Tora_I2V_testing_01.json
--- a/examples/cogvideox_5b_tora_trajectory_example_01.json
+++ b/examples/cogvideox_5b_tora_trajectory_example_01.json
--- a/examples/cogvidex_fun_i2v_example_02.json
+++ b/examples/cogvidex_fun_i2v_example_02.json
--- a/examples/cogvideox_Fun_I2V_Tora.json
+++ b/examples/cogvideox_Fun_I2V_Tora.json
--- a/examples/cogvideox_fun_pose_example_01.json
+++ b/examples/cogvideox_fun_pose_example_01.json
--- a/examples/cogvideox_fun_img2vid_tora_01.json
+++ b/examples/cogvideox_fun_img2vid_tora_01.json
--- a/examples/cogvidex_fun_5b_GGUF_10GB_VRAM_example_02.json
+++ b/examples/cogvidex_fun_5b_GGUF_10GB_VRAM_example_02.json
@ -1,622 +0,0 @@
 {
  "last_node_id": 51,
  "last_link_id": 114,
  "nodes": [
    {
      "id": 20,
      "type": "CLIPLoader",
      "pos": {
        "0": -26,
        "1": 400
      },
      "size": {
        "0": 451.30548095703125,
        "1": 82
      },
      "flags": {},
      "order": 0,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "CLIP",
          "type": "CLIP",
          "links": [
            54
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CLIPLoader"
      },
      "widgets_values": [
        "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
        "sd3"
      ]
    },
    {
      "id": 31,
      "type": "CogVideoTextEncode",
      "pos": {
        "0": 497,
        "1": 520
      },
      "size": {
        "0": 463.01251220703125,
        "1": 144
      },
      "flags": {},
      "order": 5,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 108
        }
      ],
      "outputs": [
        {
          "name": "conditioning",
          "type": "CONDITIONING",
          "links": [
            111
          ],
          "slot_index": 0,
          "shape": 3
        },
        {
          "name": "clip",
          "type": "CLIP",
          "links": null
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoTextEncode"
      },
      "widgets_values": [
        "The video is not of a high quality, it has a low resolution. Watermark present in each frame. Strange motion trajectory. ",
        1,
        true
      ]
    },
    {
      "id": 44,
      "type": "VHS_VideoCombine",
      "pos": {
        "0": 1842,
        "1": 345
      },
      "size": [
        855.81494140625,
        881.2099609375
      ],
      "flags": {},
      "order": 8,
      "mode": 0,
      "inputs": [
        {
          "name": "images",
          "type": "IMAGE",
          "link": 97
        },
        {
          "name": "audio",
          "type": "AUDIO",
          "link": null,
          "shape": 7
        },
        {
          "name": "meta_batch",
          "type": "VHS_BatchManager",
          "link": null,
          "shape": 7
        },
        {
          "name": "vae",
          "type": "VAE",
          "link": null,
          "shape": 7
        }
      ],
      "outputs": [
        {
          "name": "Filenames",
          "type": "VHS_FILENAMES",
          "links": null,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "VHS_VideoCombine"
      },
      "widgets_values": {
        "frame_rate": 16,
        "loop_count": 0,
        "filename_prefix": "CogVideoX_Fun",
        "format": "video/h264-mp4",
        "pix_fmt": "yuv420p",
        "crf": 19,
        "save_metadata": true,
        "pingpong": false,
        "save_output": false,
        "videopreview": {
          "hidden": false,
          "paused": false,
          "params": {
            "filename": "CogVideoX_Fun_00003.mp4",
            "subfolder": "",
            "type": "temp",
            "format": "video/h264-mp4",
            "frame_rate": 16
          },
          "muted": false
        }
      }
    },
    {
      "id": 36,
      "type": "LoadImage",
      "pos": {
        "0": 227,
        "1": 700
      },
      "size": {
        "0": 391.3421325683594,
        "1": 456.8497009277344
      },
      "flags": {},
      "order": 1,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "IMAGE",
          "type": "IMAGE",
          "links": [
            71
          ],
          "slot_index": 0,
          "shape": 3
        },
        {
          "name": "MASK",
          "type": "MASK",
          "links": null,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "LoadImage"
      },
      "widgets_values": [
        "sd3stag.png",
        "image"
      ]
    },
    {
      "id": 37,
      "type": "ImageResizeKJ",
      "pos": {
        "0": 688,
        "1": 708
      },
      "size": {
        "0": 315,
        "1": 266
      },
      "flags": {},
      "order": 4,
      "mode": 0,
      "inputs": [
        {
          "name": "image",
          "type": "IMAGE",
          "link": 71
        },
        {
          "name": "get_image_size",
          "type": "IMAGE",
          "link": null,
          "shape": 7
        },
        {
          "name": "width_input",
          "type": "INT",
          "link": null,
          "widget": {
            "name": "width_input"
          }
        },
        {
          "name": "height_input",
          "type": "INT",
          "link": null,
          "widget": {
            "name": "height_input"
          }
        }
      ],
      "outputs": [
        {
          "name": "IMAGE",
          "type": "IMAGE",
          "links": [
            112
          ],
          "slot_index": 0,
          "shape": 3
        },
        {
          "name": "width",
          "type": "INT",
          "links": null,
          "shape": 3
        },
        {
          "name": "height",
          "type": "INT",
          "links": null,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "ImageResizeKJ"
      },
      "widgets_values": [
        720,
        480,
        "lanczos",
        true,
        16,
        0,
        0,
        "disabled"
      ]
    },
    {
      "id": 11,
      "type": "CogVideoDecode",
      "pos": {
        "0": 1477,
        "1": 344
      },
      "size": {
        "0": 300.396484375,
        "1": 198
      },
      "flags": {},
      "order": 7,
      "mode": 0,
      "inputs": [
        {
          "name": "pipeline",
          "type": "COGVIDEOPIPE",
          "link": 113
        },
        {
          "name": "samples",
          "type": "LATENT",
          "link": 114
        }
      ],
      "outputs": [
        {
          "name": "images",
          "type": "IMAGE",
          "links": [
            97
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoDecode"
      },
      "widgets_values": [
        true,
        240,
        360,
        0.2,
        0.2,
        true
      ]
    },
    {
      "id": 30,
      "type": "CogVideoTextEncode",
      "pos": {
        "0": 493,
        "1": 303
      },
      "size": {
        "0": 471.90142822265625,
        "1": 168.08047485351562
      },
      "flags": {},
      "order": 3,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 54
        }
      ],
      "outputs": [
        {
          "name": "conditioning",
          "type": "CONDITIONING",
          "links": [
            110
          ],
          "slot_index": 0,
          "shape": 3
        },
        {
          "name": "clip",
          "type": "CLIP",
          "links": [
            108
          ],
          "slot_index": 1
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoTextEncode"
      },
      "widgets_values": [
        "majestic stag grazing in a forest and basking in the setting sun",
        1,
        false
      ]
    },
    {
      "id": 51,
      "type": "CogVideoXFunSampler",
      "pos": {
        "0": 1058,
        "1": 345
      },
      "size": {
        "0": 367.79998779296875,
        "1": 434
      },
      "flags": {},
      "order": 6,
      "mode": 0,
      "inputs": [
        {
          "name": "pipeline",
          "type": "COGVIDEOPIPE",
          "link": 109
        },
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 110
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 111
        },
        {
          "name": "start_img",
          "type": "IMAGE",
          "link": 112,
          "shape": 7
        },
        {
          "name": "end_img",
          "type": "IMAGE",
          "link": null,
          "shape": 7
        },
        {
          "name": "context_options",
          "type": "COGCONTEXT",
          "link": null,
          "shape": 7
        },
        {
          "name": "tora_trajectory",
          "type": "TORAFEATURES",
          "link": null,
          "shape": 7
        },
        {
          "name": "fastercache",
          "type": "FASTERCACHEARGS",
          "link": null,
          "shape": 7
        },
        {
          "name": "vid2vid_images",
          "type": "IMAGE",
          "link": null,
          "shape": 7
        }
      ],
      "outputs": [
        {
          "name": "cogvideo_pipe",
          "type": "COGVIDEOPIPE",
          "links": [
            113
          ]
        },
        {
          "name": "samples",
          "type": "LATENT",
          "links": [
            114
          ]
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoXFunSampler"
      },
      "widgets_values": [
        49,
        720,
        480,
        43,
        "randomize",
        50,
        6,
        "DDIM",
        0.0563,
        1
      ]
    },
    {
      "id": 48,
      "type": "DownloadAndLoadCogVideoGGUFModel",
      "pos": {
        "0": 585,
        "1": 34
      },
      "size": {
        "0": 378,
        "1": 198
      },
      "flags": {},
      "order": 2,
      "mode": 0,
      "inputs": [
        {
          "name": "pab_config",
          "type": "PAB_CONFIG",
          "link": null,
          "shape": 7
        },
        {
          "name": "block_edit",
          "type": "TRANSFORMERBLOCKS",
          "link": null,
          "shape": 7
        }
      ],
      "outputs": [
        {
          "name": "cogvideo_pipe",
          "type": "COGVIDEOPIPE",
          "links": [
            109
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
        "Node name for S&R": "DownloadAndLoadCogVideoGGUFModel"
      },
      "widgets_values": [
        "CogVideoX_5b_fun_1_1_GGUF_Q4_0.safetensors",
        "bf16",
        false,
        "offload_device",
        false,
        "disabled"
      ]
    }
  ],
  "links": [
    [
      54,
      20,
      0,
      30,
      0,
      "CLIP"
    ],
    [
      71,
      36,
      0,
      37,
      0,
      "IMAGE"
    ],
    [
      97,
      11,
      0,
      44,
      0,
      "IMAGE"
    ],
    [
      108,
      30,
      1,
      31,
      0,
      "CLIP"
    ],
    [
      109,
      48,
      0,
      51,
      0,
      "COGVIDEOPIPE"
    ],
    [
      110,
      30,
      0,
      51,
      1,
      "CONDITIONING"
    ],
    [
      111,
      31,
      0,
      51,
      2,
      "CONDITIONING"
    ],
    [
      112,
      37,
      0,
      51,
      3,
      "IMAGE"
    ],
    [
      113,
      51,
      0,
      11,
      0,
      "COGVIDEOPIPE"
    ],
    [
      114,
      51,
      1,
      11,
      1,
      "LATENT"
    ]
  ],
  "groups": [],
  "config": {},
  "extra": {
    "ds": {
      "scale": 0.7513148009015784,
      "offset": [
        724.7448506313632,
        128.336592104936
      ]
    }
  },
  "version": 0.4
 }
--- a/nodes.py
+++ b/nodes.py
@ -595,14 +595,14 @@ class CogVideoSampler:
    FUNCTION = "process"
    CATEGORY = "CogVideoWrapper"
-    def process(self, pipeline, positive, negative, steps, cfg, seed, scheduler, num_frames, samples=None,
+    def process(self, model, positive, negative, steps, cfg, seed, scheduler, num_frames, samples=None,
                denoise_strength=1.0, image_cond_latents=None, context_options=None, controlnet=None, tora_trajectory=None, fastercache=None):
        mm.soft_empty_cache()
-        model_name = pipeline.get("model_name", "")
+        model_name = model.get("model_name", "")
        supports_image_conds = True if "I2V" in model_name or "interpolation" in model_name.lower() or "fun" in model_name.lower() else False
-        if "fun" in model_name.lower() and image_cond_latents is not None:
+        if "fun" in model_name.lower() and "pose" not in model_name.lower() and image_cond_latents is not None:
            assert image_cond_latents["mask"] is not None, "For fun inpaint models use CogVideoImageEncodeFunInP"
            fun_mask = image_cond_latents["mask"]
        else:
@ -632,11 +632,11 @@ class CogVideoSampler:
        device = mm.get_torch_device()
        offload_device = mm.unet_offload_device()
-        pipe = pipeline["pipe"]
+        pipe = model["pipe"]
-        dtype = pipeline["dtype"]
+        dtype = model["dtype"]
-        scheduler_config = pipeline["scheduler_config"]
+        scheduler_config = model["scheduler_config"]
-        if not pipeline["cpu_offloading"] and pipeline["manual_offloading"]:
+        if not model["cpu_offloading"] and model["manual_offloading"]:
            pipe.transformer.to(device)
        generator = torch.Generator(device=torch.device("cpu")).manual_seed(seed)
@ -683,10 +683,10 @@ class CogVideoSampler:
        except:
            pass
-        autocastcondition = not pipeline["onediff"] or not dtype == torch.float32
+        autocastcondition = not model["onediff"] or not dtype == torch.float32
        autocast_context = torch.autocast(mm.get_autocast_device(device), dtype=dtype) if autocastcondition else nullcontext()
        with autocast_context:
-            latents = pipeline["pipe"](
+            latents = model["pipe"](
                num_inference_steps=steps,
                height = height,
                width = width,
@ -708,7 +708,7 @@ class CogVideoSampler:
                controlnet=controlnet,
                tora=tora_trajectory if tora_trajectory is not None else None,
            )
-        if not pipeline["cpu_offloading"] and pipeline["manual_offloading"]:
+        if not model["cpu_offloading"] and model["manual_offloading"]:
            pipe.transformer.to(offload_device)
        if fastercache is not None:
@ -763,18 +763,16 @@ class CogVideoDecode:
    @classmethod
    def INPUT_TYPES(s):
        return {"required": {
-            "samples": ("LATENT", ),
+                    "vae": ("VAE",),
-            "vae": ("VAE", {"default": None}),
+                    "samples": ("LATENT",),
-            "enable_vae_tiling": ("BOOLEAN", {"default": True, "tooltip": "Drastically reduces memory use but may introduce seams"}),
+                    "enable_vae_tiling": ("BOOLEAN", {"default": True, "tooltip": "Drastically reduces memory use but may introduce seams"}),
-            },
+                    "tile_sample_min_height": ("INT", {"default": 240, "min": 16, "max": 2048, "step": 8, "tooltip": "Minimum tile height, default is half the height"}),
-            "optional": {
+                    "tile_sample_min_width": ("INT", {"default": 360, "min": 16, "max": 2048, "step": 8, "tooltip": "Minimum tile width, default is half the width"}),
-            "tile_sample_min_height": ("INT", {"default": 240, "min": 16, "max": 2048, "step": 8, "tooltip": "Minimum tile height, default is half the height"}),
+                    "tile_overlap_factor_height": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 1.0, "step": 0.001}),
-            "tile_sample_min_width": ("INT", {"default": 360, "min": 16, "max": 2048, "step": 8, "tooltip": "Minimum tile width, default is half the width"}),
+                    "tile_overlap_factor_width": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 1.0, "step": 0.001}),
-            "tile_overlap_factor_height": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 1.0, "step": 0.001}),
+                    "auto_tile_size": ("BOOLEAN", {"default": True, "tooltip": "Auto size based on height and width, default is half the size"}),
-            "tile_overlap_factor_width": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 1.0, "step": 0.001}),
+                    },            
-            "auto_tile_size": ("BOOLEAN", {"default": True, "tooltip": "Auto size based on height and width, default is half the size"}),
+                }
            }
        }
    RETURN_TYPES = ("IMAGE",)
    RETURN_NAMES = ("images",)