Update workflows, fix controlnet

2026-03-16 14:37:12 +08:00 · 2024-11-19 15:23:38 +02:00 · 2024-11-19 15:23:38 +02:00 · 128f89c4d2
commit 128f89c4d2
parent a7646c0d6f
16 changed files with 6627 additions and 7832 deletions
--- a/custom_cogvideox_transformer_3d.py
+++ b/custom_cogvideox_transformer_3d.py
@ -610,29 +610,29 @@ class CogVideoXTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
        if self.fastercache_counter >= self.fastercache_start_step + 3 and self.fastercache_counter % 5 !=0:
            # 3. Transformer blocks
            for i, block in enumerate(self.transformer_blocks):
-                    hidden_states, encoder_hidden_states = block(
-                        hidden_states=hidden_states[:1],
-                        encoder_hidden_states=encoder_hidden_states[:1],
-                        temb=emb[:1],
-                        image_rotary_emb=image_rotary_emb,
-                        video_flow_feature=video_flow_features[i][:1] if video_flow_features is not None else None,
-                        fuser = self.fuser_list[i] if self.fuser_list is not None else None,
-                        block_use_fastercache = i <= self.fastercache_num_blocks_to_cache,
-                        fastercache_counter = self.fastercache_counter,
-                        fastercache_start_step = self.fastercache_start_step,
-                        fastercache_device = self.fastercache_device,
-                        attention_mode = self.attention_mode
-                    )
+                hidden_states, encoder_hidden_states = block(
+                    hidden_states=hidden_states[:1],
+                    encoder_hidden_states=encoder_hidden_states[:1],
+                    temb=emb[:1],
+                    image_rotary_emb=image_rotary_emb,
+                    video_flow_feature=video_flow_features[i][:1] if video_flow_features is not None else None,
+                    fuser = self.fuser_list[i] if self.fuser_list is not None else None,
+                    block_use_fastercache = i <= self.fastercache_num_blocks_to_cache,
+                    fastercache_counter = self.fastercache_counter,
+                    fastercache_start_step = self.fastercache_start_step,
+                    fastercache_device = self.fastercache_device,
+                    attention_mode = self.attention_mode
+                )

-                    if (controlnet_states is not None) and (i < len(controlnet_states)):
-                        controlnet_states_block = controlnet_states[i]
-                        controlnet_block_weight = 1.0
-                        if isinstance(controlnet_weights, (list, np.ndarray)) or torch.is_tensor(controlnet_weights):
-                            controlnet_block_weight = controlnet_weights[i]
-                        elif isinstance(controlnet_weights, (float, int)):
-                            controlnet_block_weight = controlnet_weights
-                        
-                        hidden_states = hidden_states + controlnet_states_block * controlnet_block_weight
+                if (controlnet_states is not None) and (i < len(controlnet_states)):
+                    controlnet_states_block = controlnet_states[i]
+                    controlnet_block_weight = 1.0
+                    if isinstance(controlnet_weights, (list, np.ndarray)) or torch.is_tensor(controlnet_weights):
+                        controlnet_block_weight = controlnet_weights[i]
+                    elif isinstance(controlnet_weights, (float, int)):
+                        controlnet_block_weight = controlnet_weights
+                    
+                    hidden_states = hidden_states + controlnet_states_block * controlnet_block_weight
                    
            if not self.config.use_rotary_positional_embeddings:
                # CogVideoX-2B
@ -698,15 +698,16 @@ class CogVideoXTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
                #if has_nan:
                #    raise ValueError(f"block output hidden_states has nan: {has_nan}")

-            if (controlnet_states is not None) and (i < len(controlnet_states)):
-                controlnet_states_block = controlnet_states[i]
-                controlnet_block_weight = 1.0
-                if isinstance(controlnet_weights, (list, np.ndarray)) or torch.is_tensor(controlnet_weights):
-                    controlnet_block_weight = controlnet_weights[i]
-                elif isinstance(controlnet_weights, (float, int)):
-                    controlnet_block_weight = controlnet_weights
-                
-                hidden_states = hidden_states + controlnet_states_block * controlnet_block_weight
+                #controlnet
+                if (controlnet_states is not None) and (i < len(controlnet_states)):
+                    controlnet_states_block = controlnet_states[i]
+                    controlnet_block_weight = 1.0
+                    if isinstance(controlnet_weights, (list, np.ndarray)) or torch.is_tensor(controlnet_weights):
+                        controlnet_block_weight = controlnet_weights[i]
+                        print(controlnet_block_weight)
+                    elif isinstance(controlnet_weights, (float, int)):
+                        controlnet_block_weight = controlnet_weights                    
+                    hidden_states = hidden_states + controlnet_states_block * controlnet_block_weight
                    
            if not self.config.use_rotary_positional_embeddings:
                # CogVideoX-2B
--- a/examples/cogvideo_2b_context_schedule_test_01.json
+++ b/examples/cogvideo_2b_context_schedule_test_01.json
@ -1,561 +0,0 @@
-{
-  "last_node_id": 34,
-  "last_link_id": 61,
-  "nodes": [
-    {
-      "id": 33,
-      "type": "GetImageSizeAndCount",
-      "pos": {
-        "0": 1176,
-        "1": 122
-      },
-      "size": {
-        "0": 210,
-        "1": 86
-      },
-      "flags": {},
-      "order": 7,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "image",
-          "type": "IMAGE",
-          "link": 59
-        }
-      ],
-      "outputs": [
-        {
-          "name": "image",
-          "type": "IMAGE",
-          "links": [
-            60
-          ],
-          "slot_index": 0,
-          "shape": 3
-        },
-        {
-          "name": "720 width",
-          "type": "INT",
-          "links": null,
-          "shape": 3
-        },
-        {
-          "name": "480 height",
-          "type": "INT",
-          "links": null,
-          "shape": 3
-        },
-        {
-          "name": "104 count",
-          "type": "INT",
-          "links": null,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "GetImageSizeAndCount"
-      },
-      "widgets_values": []
-    },
-    {
-      "id": 30,
-      "type": "CogVideoTextEncode",
-      "pos": {
-        "0": 500,
-        "1": 308
-      },
-      "size": [
-        474.8035864085422,
-        211.10369504535595
-      ],
-      "flags": {},
-      "order": 3,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "clip",
-          "type": "CLIP",
-          "link": 54
-        }
-      ],
-      "outputs": [
-        {
-          "name": "conditioning",
-          "type": "CONDITIONING",
-          "links": [
-            55
-          ],
-          "slot_index": 0,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CogVideoTextEncode"
-      },
-      "widgets_values": [
-        "A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. The panda's fluffy paws strum a miniature\nacoustic guitar, producing soft, melodic tunes. Nearby, a few other pandas gather, watching curiously and some clapping in rhythm. Sunlight filters\nthrough the tall bamboo, casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. The\nbackground includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical atmosphere of this unique musical\nperformance.",
-        1,
-        true
-      ]
-    },
-    {
-      "id": 31,
-      "type": "CogVideoTextEncode",
-      "pos": {
-        "0": 508,
-        "1": 576
-      },
-      "size": {
-        "0": 463.01251220703125,
-        "1": 124
-      },
-      "flags": {},
-      "order": 4,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "clip",
-          "type": "CLIP",
-          "link": 56
-        }
-      ],
-      "outputs": [
-        {
-          "name": "conditioning",
-          "type": "CONDITIONING",
-          "links": [
-            57
-          ],
-          "slot_index": 0,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CogVideoTextEncode"
-      },
-      "widgets_values": [
-        "",
-        1,
-        true
-      ]
-    },
-    {
-      "id": 20,
-      "type": "CLIPLoader",
-      "pos": {
-        "0": -37,
-        "1": 443
-      },
-      "size": {
-        "0": 451.30548095703125,
-        "1": 82
-      },
-      "flags": {},
-      "order": 0,
-      "mode": 0,
-      "inputs": [],
-      "outputs": [
-        {
-          "name": "CLIP",
-          "type": "CLIP",
-          "links": [
-            54,
-            56
-          ],
-          "slot_index": 0,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CLIPLoader"
-      },
-      "widgets_values": [
-        "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
-        "sd3"
-      ]
-    },
-    {
-      "id": 11,
-      "type": "CogVideoDecode",
-      "pos": {
-        "0": 1045,
-        "1": 776
-      },
-      "size": {
-        "0": 295.70111083984375,
-        "1": 198
-      },
-      "flags": {},
-      "order": 6,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "pipeline",
-          "type": "COGVIDEOPIPE",
-          "link": 37
-        },
-        {
-          "name": "samples",
-          "type": "LATENT",
-          "link": 38
-        }
-      ],
-      "outputs": [
-        {
-          "name": "images",
-          "type": "IMAGE",
-          "links": [
-            59
-          ],
-          "slot_index": 0,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CogVideoDecode"
-      },
-      "widgets_values": [
-        true,
-        96,
-        96,
-        0.083,
-        0.083,
-        true
-      ]
-    },
-    {
-      "id": 1,
-      "type": "DownloadAndLoadCogVideoModel",
-      "pos": {
-        "0": 652,
-        "1": 43
-      },
-      "size": {
-        "0": 315,
-        "1": 194
-      },
-      "flags": {},
-      "order": 1,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "pab_config",
-          "type": "PAB_CONFIG",
-          "link": null
-        },
-        {
-          "name": "block_edit",
-          "type": "TRANSFORMERBLOCKS",
-          "link": null
-        },
-        {
-          "name": "lora",
-          "type": "COGLORA",
-          "link": null
-        }
-      ],
-      "outputs": [
-        {
-          "name": "cogvideo_pipe",
-          "type": "COGVIDEOPIPE",
-          "links": [
-            36
-          ],
-          "slot_index": 0,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "DownloadAndLoadCogVideoModel"
-      },
-      "widgets_values": [
-        "THUDM/CogVideoX-2b",
-        "fp16",
-        "enabled",
-        "disabled",
-        false
-      ]
-    },
-    {
-      "id": 32,
-      "type": "VHS_VideoCombine",
-      "pos": {
-        "0": 1439,
-        "1": 122
-      },
-      "size": [
-        563.3333740234375,
-        686.2222493489583
-      ],
-      "flags": {},
-      "order": 8,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "images",
-          "type": "IMAGE",
-          "link": 60,
-          "slot_index": 0
-        },
-        {
-          "name": "audio",
-          "type": "VHS_AUDIO",
-          "link": null
-        },
-        {
-          "name": "meta_batch",
-          "type": "VHS_BatchManager",
-          "link": null
-        },
-        {
-          "name": "vae",
-          "type": "VAE",
-          "link": null
-        }
-      ],
-      "outputs": [
-        {
-          "name": "Filenames",
-          "type": "VHS_FILENAMES",
-          "links": null,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "VHS_VideoCombine"
-      },
-      "widgets_values": {
-        "frame_rate": 8,
-        "loop_count": 0,
-        "filename_prefix": "CogVideo2B_long",
-        "format": "video/h264-mp4",
-        "pix_fmt": "yuv420p",
-        "crf": 19,
-        "save_metadata": true,
-        "pingpong": false,
-        "save_output": false,
-        "videopreview": {
-          "hidden": false,
-          "paused": false,
-          "params": {
-            "filename": "CogVideo2B_long_00005.mp4",
-            "subfolder": "",
-            "type": "temp",
-            "format": "video/h264-mp4",
-            "frame_rate": 8
-          }
-        }
-      }
-    },
-    {
-      "id": 34,
-      "type": "CogVideoContextOptions",
-      "pos": {
-        "0": 1053,
-        "1": -84
-      },
-      "size": {
-        "0": 315,
-        "1": 154
-      },
-      "flags": {},
-      "order": 2,
-      "mode": 0,
-      "inputs": [],
-      "outputs": [
-        {
-          "name": "context_options",
-          "type": "COGCONTEXT",
-          "links": [
-            61
-          ],
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CogVideoContextOptions"
-      },
-      "widgets_values": [
-        "uniform_standard",
-        52,
-        4,
-        8,
-        true
-      ]
-    },
-    {
-      "id": 22,
-      "type": "CogVideoSampler",
-      "pos": {
-        "0": 1041,
-        "1": 342
-      },
-      "size": {
-        "0": 315,
-        "1": 382
-      },
-      "flags": {},
-      "order": 5,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "pipeline",
-          "type": "COGVIDEOPIPE",
-          "link": 36
-        },
-        {
-          "name": "positive",
-          "type": "CONDITIONING",
-          "link": 55,
-          "slot_index": 1
-        },
-        {
-          "name": "negative",
-          "type": "CONDITIONING",
-          "link": 57
-        },
-        {
-          "name": "samples",
-          "type": "LATENT",
-          "link": null
-        },
-        {
-          "name": "image_cond_latents",
-          "type": "LATENT",
-          "link": null
-        },
-        {
-          "name": "context_options",
-          "type": "COGCONTEXT",
-          "link": 61
-        }
-      ],
-      "outputs": [
-        {
-          "name": "cogvideo_pipe",
-          "type": "COGVIDEOPIPE",
-          "links": [
-            37
-          ],
-          "shape": 3
-        },
-        {
-          "name": "samples",
-          "type": "LATENT",
-          "links": [
-            38
-          ],
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CogVideoSampler"
-      },
-      "widgets_values": [
-        480,
-        720,
-        104,
-        32,
-        6,
-        42,
-        "fixed",
-        "CogVideoXDDIM",
-        1
-      ]
-    }
-  ],
-  "links": [
-    [
-      36,
-      1,
-      0,
-      22,
-      0,
-      "COGVIDEOPIPE"
-    ],
-    [
-      37,
-      22,
-      0,
-      11,
-      0,
-      "COGVIDEOPIPE"
-    ],
-    [
-      38,
-      22,
-      1,
-      11,
-      1,
-      "LATENT"
-    ],
-    [
-      54,
-      20,
-      0,
-      30,
-      0,
-      "CLIP"
-    ],
-    [
-      55,
-      30,
-      0,
-      22,
-      1,
-      "CONDITIONING"
-    ],
-    [
-      56,
-      20,
-      0,
-      31,
-      0,
-      "CLIP"
-    ],
-    [
-      57,
-      31,
-      0,
-      22,
-      2,
-      "CONDITIONING"
-    ],
-    [
-      59,
-      11,
-      0,
-      33,
-      0,
-      "IMAGE"
-    ],
-    [
-      60,
-      33,
-      0,
-      32,
-      0,
-      "IMAGE"
-    ],
-    [
-      61,
-      34,
-      0,
-      22,
-      5,
-      "COGCONTEXT"
-    ]
-  ],
-  "groups": [],
-  "config": {},
-  "extra": {
-    "ds": {
-      "scale": 0.8390545288825444,
-      "offset": [
-        -14.198557467892236,
-        144.90015432747748
-      ]
-    }
-  },
-  "version": 0.4
-}
--- a/examples/cogvideox_2b_controlnet_example_01.json
+++ b/examples/cogvideox_2b_controlnet_example_01.json
--- a/examples/cogvideox_I2V_example_01.json
+++ b/examples/cogvideox_I2V_example_01.json
@ -1,42 +1,7 @@
 {
-  "last_node_id": 58,
-  "last_link_id": 129,
+  "last_node_id": 63,
+  "last_link_id": 149,
  "nodes": [
-    {
-      "id": 20,
-      "type": "CLIPLoader",
-      "pos": {
-        "0": -26,
-        "1": 400
-      },
-      "size": {
-        "0": 451.30548095703125,
-        "1": 82
-      },
-      "flags": {},
-      "order": 0,
-      "mode": 0,
-      "inputs": [],
-      "outputs": [
-        {
-          "name": "CLIP",
-          "type": "CLIP",
-          "links": [
-            54,
-            56
-          ],
-          "slot_index": 0,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CLIPLoader"
-      },
-      "widgets_values": [
-        "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
-        "sd3"
-      ]
-    },
    {
      "id": 31,
      "type": "CogVideoTextEncode",
@ -46,16 +11,16 @@
      },
      "size": {
        "0": 463.01251220703125,
-        "1": 124
+        "1": 144
      },
      "flags": {},
-      "order": 4,
+      "order": 6,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
-          "link": 56
+          "link": 149
        }
      ],
      "outputs": [
@ -63,10 +28,15 @@
          "name": "conditioning",
          "type": "CONDITIONING",
          "links": [
-            123
+            146
          ],
          "slot_index": 0,
          "shape": 3
+        },
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "links": null
        }
      ],
      "properties": {
@ -78,6 +48,208 @@
        true
      ]
    },
+    {
+      "id": 63,
+      "type": "CogVideoSampler",
+      "pos": {
+        "0": 1142,
+        "1": 74
+      },
+      "size": [
+        330,
+        574
+      ],
+      "flags": {},
+      "order": 7,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "COGVIDEOMODEL",
+          "link": 144
+        },
+        {
+          "name": "positive",
+          "type": "CONDITIONING",
+          "link": 145
+        },
+        {
+          "name": "negative",
+          "type": "CONDITIONING",
+          "link": 146
+        },
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "link": null,
+          "shape": 7
+        },
+        {
+          "name": "image_cond_latents",
+          "type": "LATENT",
+          "link": 147,
+          "shape": 7
+        },
+        {
+          "name": "context_options",
+          "type": "COGCONTEXT",
+          "link": null,
+          "shape": 7
+        },
+        {
+          "name": "controlnet",
+          "type": "COGVIDECONTROLNET",
+          "link": null,
+          "shape": 7
+        },
+        {
+          "name": "tora_trajectory",
+          "type": "TORAFEATURES",
+          "link": null,
+          "shape": 7
+        },
+        {
+          "name": "fastercache",
+          "type": "FASTERCACHEARGS",
+          "link": null,
+          "shape": 7
+        }
+      ],
+      "outputs": [
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "links": [
+            148
+          ]
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "CogVideoSampler"
+      },
+      "widgets_values": [
+        49,
+        25,
+        6,
+        0,
+        "fixed",
+        "CogVideoXDDIM",
+        1
+      ]
+    },
+    {
+      "id": 62,
+      "type": "CogVideoImageEncode",
+      "pos": {
+        "0": 1149,
+        "1": 711
+      },
+      "size": {
+        "0": 315,
+        "1": 122
+      },
+      "flags": {},
+      "order": 5,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "vae",
+          "type": "VAE",
+          "link": 141
+        },
+        {
+          "name": "start_image",
+          "type": "IMAGE",
+          "link": 142
+        },
+        {
+          "name": "end_image",
+          "type": "IMAGE",
+          "link": null,
+          "shape": 7
+        }
+      ],
+      "outputs": [
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "links": [
+            147
+          ]
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "CogVideoImageEncode"
+      },
+      "widgets_values": [
+        false,
+        0
+      ]
+    },
+    {
+      "id": 59,
+      "type": "DownloadAndLoadCogVideoModel",
+      "pos": {
+        "0": 622,
+        "1": -25
+      },
+      "size": {
+        "0": 315,
+        "1": 218
+      },
+      "flags": {},
+      "order": 0,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "block_edit",
+          "type": "TRANSFORMERBLOCKS",
+          "link": null,
+          "shape": 7
+        },
+        {
+          "name": "lora",
+          "type": "COGLORA",
+          "link": null,
+          "shape": 7
+        },
+        {
+          "name": "compile_args",
+          "type": "COMPILEARGS",
+          "link": null,
+          "shape": 7
+        }
+      ],
+      "outputs": [
+        {
+          "name": "model",
+          "type": "COGVIDEOMODEL",
+          "links": [
+            144
+          ]
+        },
+        {
+          "name": "vae",
+          "type": "VAE",
+          "links": [
+            132,
+            141
+          ],
+          "slot_index": 1
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "DownloadAndLoadCogVideoModel"
+      },
+      "widgets_values": [
+        "THUDM/CogVideoX-5b-I2V",
+        "bf16",
+        "disabled",
+        false,
+        "sdpa",
+        "main_device"
+      ]
+    },
    {
      "id": 30,
      "type": "CogVideoTextEncode",
@ -90,7 +262,7 @@
        "1": 168.08047485351562
      },
      "flags": {},
-      "order": 3,
+      "order": 4,
      "mode": 0,
      "inputs": [
        {
@ -104,10 +276,18 @@
          "name": "conditioning",
          "type": "CONDITIONING",
          "links": [
-            122
+            145
          ],
          "slot_index": 0,
          "shape": 3
+        },
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "links": [
+            149
+          ],
+          "slot_index": 1
        }
      ],
      "properties": {
@ -116,22 +296,22 @@
      "widgets_values": [
        "a majestic stag is grazing in an enhanced forest, basking in the setting sun filtered by the trees",
        1,
-        true
+        false
      ]
    },
    {
      "id": 37,
      "type": "ImageResizeKJ",
      "pos": {
-        "0": 809,
-        "1": 684
+        "0": 784,
+        "1": 731
      },
      "size": {
        "0": 315,
        "1": 266
      },
      "flags": {},
-      "order": 5,
+      "order": 3,
      "mode": 0,
      "inputs": [
        {
@ -142,7 +322,8 @@
        {
          "name": "get_image_size",
          "type": "IMAGE",
-          "link": null
+          "link": null,
+          "shape": 7
        },
        {
          "name": "width_input",
@ -166,7 +347,7 @@
          "name": "IMAGE",
          "type": "IMAGE",
          "links": [
-            125
+            142
          ],
          "slot_index": 0,
          "shape": 3
@ -199,64 +380,88 @@
      ]
    },
    {
-      "id": 58,
-      "type": "CogVideoImageEncode",
+      "id": 36,
+      "type": "LoadImage",
      "pos": {
-        "0": 1156,
-        "1": 650
+        "0": 335,
+        "1": 731
      },
      "size": {
-        "0": 315,
-        "1": 122
+        "0": 402.06353759765625,
+        "1": 396.6225891113281
      },
      "flags": {},
-      "order": 6,
+      "order": 1,
      "mode": 0,
-      "inputs": [
-        {
-          "name": "pipeline",
-          "type": "COGVIDEOPIPE",
-          "link": 124
-        },
-        {
-          "name": "image",
-          "type": "IMAGE",
-          "link": 125
-        },
-        {
-          "name": "mask",
-          "type": "MASK",
-          "link": null
-        }
-      ],
+      "inputs": [],
      "outputs": [
        {
-          "name": "samples",
-          "type": "LATENT",
+          "name": "IMAGE",
+          "type": "IMAGE",
          "links": [
-            129
+            71
+          ],
+          "slot_index": 0,
+          "shape": 3
+        },
+        {
+          "name": "MASK",
+          "type": "MASK",
+          "links": null,
+          "shape": 3
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "LoadImage"
+      },
+      "widgets_values": [
+        "sd3stag.png",
+        "image"
+      ]
+    },
+    {
+      "id": 20,
+      "type": "CLIPLoader",
+      "pos": {
+        "0": -2,
+        "1": 304
+      },
+      "size": {
+        "0": 451.30548095703125,
+        "1": 82
+      },
+      "flags": {},
+      "order": 2,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "CLIP",
+          "type": "CLIP",
+          "links": [
+            54
          ],
          "slot_index": 0,
          "shape": 3
        }
      ],
      "properties": {
-        "Node name for S&R": "CogVideoImageEncode"
+        "Node name for S&R": "CLIPLoader"
      },
      "widgets_values": [
-        16,
-        true
+        "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
+        "sd3"
      ]
    },
    {
-      "id": 56,
+      "id": 60,
      "type": "CogVideoDecode",
      "pos": {
-        "0": 1581,
-        "1": 148
+        "0": 1523,
+        "1": -6
      },
      "size": {
-        "0": 300.396484375,
+        "0": 315,
        "1": 198
      },
      "flags": {},
@ -264,14 +469,14 @@
      "mode": 0,
      "inputs": [
        {
-          "name": "pipeline",
-          "type": "COGVIDEOPIPE",
-          "link": 128
+          "name": "vae",
+          "type": "VAE",
+          "link": 132
        },
        {
          "name": "samples",
          "type": "LATENT",
-          "link": 127
+          "link": 148
        }
      ],
      "outputs": [
@ -279,17 +484,15 @@
          "name": "images",
          "type": "IMAGE",
          "links": [
-            118
-          ],
-          "slot_index": 0,
-          "shape": 3
+            134
+          ]
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoDecode"
      },
      "widgets_values": [
-        false,
+        true,
        240,
        360,
        0.2,
@ -301,8 +504,8 @@
      "id": 44,
      "type": "VHS_VideoCombine",
      "pos": {
-        "0": 1927,
-        "1": 146
+        "0": 1884,
+        "1": -6
      },
      "size": [
        605.3909912109375,
@ -315,22 +518,25 @@
        {
          "name": "images",
          "type": "IMAGE",
-          "link": 118
+          "link": 134
        },
        {
          "name": "audio",
          "type": "AUDIO",
-          "link": null
+          "link": null,
+          "shape": 7
        },
        {
          "name": "meta_batch",
          "type": "VHS_BatchManager",
-          "link": null
+          "link": null,
+          "shape": 7
        },
        {
          "name": "vae",
          "type": "VAE",
-          "link": null
+          "link": null,
+          "shape": 7
        }
      ],
      "outputs": [
@ -367,180 +573,6 @@
          "muted": false
        }
      }
-    },
-    {
-      "id": 36,
-      "type": "LoadImage",
-      "pos": {
-        "0": 365,
-        "1": 685
-      },
-      "size": {
-        "0": 402.06353759765625,
-        "1": 396.6225891113281
-      },
-      "flags": {},
-      "order": 1,
-      "mode": 0,
-      "inputs": [],
-      "outputs": [
-        {
-          "name": "IMAGE",
-          "type": "IMAGE",
-          "links": [
-            71
-          ],
-          "slot_index": 0,
-          "shape": 3
-        },
-        {
-          "name": "MASK",
-          "type": "MASK",
-          "links": null,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "LoadImage"
-      },
-      "widgets_values": [
-        "sd3stag.png",
-        "image"
-      ]
-    },
-    {
-      "id": 57,
-      "type": "CogVideoSampler",
-      "pos": {
-        "0": 1138,
-        "1": 150
-      },
-      "size": [
-        399.878095897654,
-        350
-      ],
-      "flags": {},
-      "order": 7,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "pipeline",
-          "type": "COGVIDEOPIPE",
-          "link": 121
-        },
-        {
-          "name": "positive",
-          "type": "CONDITIONING",
-          "link": 122
-        },
-        {
-          "name": "negative",
-          "type": "CONDITIONING",
-          "link": 123
-        },
-        {
-          "name": "samples",
-          "type": "LATENT",
-          "link": null
-        },
-        {
-          "name": "image_cond_latents",
-          "type": "LATENT",
-          "link": 129
-        },
-        {
-          "name": "context_options",
-          "type": "COGCONTEXT",
-          "link": null
-        }
-      ],
-      "outputs": [
-        {
-          "name": "cogvideo_pipe",
-          "type": "COGVIDEOPIPE",
-          "links": [
-            128
-          ],
-          "slot_index": 0,
-          "shape": 3
-        },
-        {
-          "name": "samples",
-          "type": "LATENT",
-          "links": [
-            127
-          ],
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CogVideoSampler"
-      },
-      "widgets_values": [
-        480,
-        720,
-        49,
-        20,
-        6,
-        65334758276105,
-        "fixed",
-        "CogVideoXDPMScheduler",
-        1
-      ]
-    },
-    {
-      "id": 1,
-      "type": "DownloadAndLoadCogVideoModel",
-      "pos": {
-        "0": 633,
-        "1": 44
-      },
-      "size": {
-        "0": 337.8885192871094,
-        "1": 194
-      },
-      "flags": {},
-      "order": 2,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "pab_config",
-          "type": "PAB_CONFIG",
-          "link": null
-        },
-        {
-          "name": "block_edit",
-          "type": "TRANSFORMERBLOCKS",
-          "link": null
-        },
-        {
-          "name": "lora",
-          "type": "COGLORA",
-          "link": null
-        }
-      ],
-      "outputs": [
-        {
-          "name": "cogvideo_pipe",
-          "type": "COGVIDEOPIPE",
-          "links": [
-            121,
-            124
-          ],
-          "slot_index": 0,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "DownloadAndLoadCogVideoModel"
-      },
-      "widgets_values": [
-        "THUDM/CogVideoX-5b-I2V",
-        "bf16",
-        "disabled",
-        "disabled",
-        false
-      ]
    }
  ],
  "links": [
@ -552,14 +584,6 @@
      0,
      "CLIP"
    ],
-    [
-      56,
-      20,
-      0,
-      31,
-      0,
-      "CLIP"
-    ],
    [
      71,
      36,
@ -569,86 +593,94 @@
      "IMAGE"
    ],
    [
-      118,
-      56,
+      132,
+      59,
+      1,
+      60,
+      0,
+      "VAE"
+    ],
+    [
+      134,
+      60,
      0,
      44,
      0,
      "IMAGE"
    ],
    [
-      121,
+      141,
+      59,
      1,
+      62,
      0,
-      57,
-      0,
-      "COGVIDEOPIPE"
+      "VAE"
    ],
    [
-      122,
-      30,
-      0,
-      57,
-      1,
-      "CONDITIONING"
-    ],
-    [
-      123,
-      31,
-      0,
-      57,
-      2,
-      "CONDITIONING"
-    ],
-    [
-      124,
-      1,
-      0,
-      58,
-      0,
-      "COGVIDEOPIPE"
-    ],
-    [
-      125,
+      142,
      37,
      0,
-      58,
+      62,
      1,
      "IMAGE"
    ],
    [
-      127,
-      57,
-      1,
-      56,
-      1,
-      "LATENT"
+      144,
+      59,
+      0,
+      63,
+      0,
+      "COGVIDEOMODEL"
    ],
    [
-      128,
-      57,
+      145,
+      30,
      0,
-      56,
-      0,
-      "COGVIDEOPIPE"
+      63,
+      1,
+      "CONDITIONING"
    ],
    [
-      129,
-      58,
+      146,
+      31,
      0,
-      57,
+      63,
+      2,
+      "CONDITIONING"
+    ],
+    [
+      147,
+      62,
+      0,
+      63,
      4,
      "LATENT"
+    ],
+    [
+      148,
+      63,
+      0,
+      60,
+      1,
+      "LATENT"
+    ],
+    [
+      149,
+      30,
+      1,
+      31,
+      0,
+      "CLIP"
    ]
  ],
  "groups": [],
  "config": {},
  "extra": {
    "ds": {
-      "scale": 0.6934334949442514,
+      "scale": 0.7627768444387059,
      "offset": [
-        -24.154349208343916,
-        155.20539218330134
+        648.7113591814891,
+        185.9907078691075
      ]
    }
  },
--- a/examples/cogvideox_1_0_5b_I2V_Tora_02.json
+++ b/examples/cogvideox_1_0_5b_I2V_Tora_02.json
--- a/examples/cogvideox_1_0_5b_T2V_02.json
+++ b/examples/cogvideox_1_0_5b_T2V_02.json
@ -1,48 +1,7 @@
 {
-  "last_node_id": 34,
-  "last_link_id": 64,
+  "last_node_id": 37,
+  "last_link_id": 72,
  "nodes": [
-    {
-      "id": 31,
-      "type": "CogVideoTextEncode",
-      "pos": {
-        "0": 503,
-        "1": 521
-      },
-      "size": {
-        "0": 463.01251220703125,
-        "1": 124
-      },
-      "flags": {},
-      "order": 3,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "clip",
-          "type": "CLIP",
-          "link": 56
-        }
-      ],
-      "outputs": [
-        {
-          "name": "conditioning",
-          "type": "CONDITIONING",
-          "links": [
-            62
-          ],
-          "slot_index": 0,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CogVideoTextEncode"
-      },
-      "widgets_values": [
-        "",
-        1,
-        true
-      ]
-    },
    {
      "id": 30,
      "type": "CogVideoTextEncode",
@ -50,12 +9,12 @@
        "0": 500,
        "1": 308
      },
-      "size": {
-        "0": 471.90142822265625,
-        "1": 168.08047485351562
-      },
+      "size": [
+        470.99399664051055,
+        237.5088638951354
+      ],
      "flags": {},
-      "order": 2,
+      "order": 3,
      "mode": 0,
      "inputs": [
        {
@ -69,10 +28,18 @@
          "name": "conditioning",
          "type": "CONDITIONING",
          "links": [
-            61
+            67
          ],
          "slot_index": 0,
          "shape": 3
+        },
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "links": [
+            65
+          ],
+          "slot_index": 1
        }
      ],
      "properties": {
@ -81,192 +48,79 @@
      "widgets_values": [
        "A golden retriever, sporting sleek black sunglasses, with its lengthy fur flowing in the breeze, sprints playfully across a rooftop terrace, recently refreshed by a light rain. The scene unfolds from a distance, the dog's energetic bounds growing larger as it approaches the camera, its tail wagging with unrestrained joy, while droplets of water glisten on the concrete behind it. The overcast sky provides a dramatic backdrop, emphasizing the vibrant golden coat of the canine as it dashes towards the viewer.\n\n",
        1,
-        true
+        false
      ]
    },
    {
-      "id": 33,
-      "type": "VHS_VideoCombine",
+      "id": 31,
+      "type": "CogVideoTextEncode",
      "pos": {
-        "0": 1441,
-        "1": 129
+        "0": 503,
+        "1": 602
      },
      "size": [
-        778.7022705078125,
-        310
+        464.4980515341475,
+        169.87479027400514
      ],
      "flags": {},
-      "order": 6,
+      "order": 4,
      "mode": 0,
      "inputs": [
        {
-          "name": "images",
-          "type": "IMAGE",
-          "link": 59
-        },
-        {
-          "name": "audio",
-          "type": "AUDIO",
-          "link": null
-        },
-        {
-          "name": "meta_batch",
-          "type": "VHS_BatchManager",
-          "link": null
-        },
-        {
-          "name": "vae",
-          "type": "VAE",
-          "link": null
-        }
-      ],
-      "outputs": [
-        {
-          "name": "Filenames",
-          "type": "VHS_FILENAMES",
-          "links": null,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "VHS_VideoCombine"
-      },
-      "widgets_values": {
-        "frame_rate": 8,
-        "loop_count": 0,
-        "filename_prefix": "CogVideoX5B",
-        "format": "video/h264-mp4",
-        "pix_fmt": "yuv420p",
-        "crf": 19,
-        "save_metadata": true,
-        "pingpong": false,
-        "save_output": false,
-        "videopreview": {
-          "hidden": false,
-          "paused": false,
-          "params": {
-            "filename": "CogVideoX5B_00009.mp4",
-            "subfolder": "",
-            "type": "temp",
-            "format": "video/h264-mp4",
-            "frame_rate": 8
-          },
-          "muted": false
-        }
-      }
-    },
-    {
-      "id": 20,
-      "type": "CLIPLoader",
-      "pos": {
-        "0": -26,
-        "1": 400
-      },
-      "size": {
-        "0": 451.30548095703125,
-        "1": 82
-      },
-      "flags": {},
-      "order": 0,
-      "mode": 0,
-      "inputs": [],
-      "outputs": [
-        {
-          "name": "CLIP",
+          "name": "clip",
          "type": "CLIP",
-          "links": [
-            54,
-            56
-          ],
-          "slot_index": 0,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CLIPLoader"
-      },
-      "widgets_values": [
-        "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
-        "sd3"
-      ]
-    },
-    {
-      "id": 1,
-      "type": "DownloadAndLoadCogVideoModel",
-      "pos": {
-        "0": 642,
-        "1": 90
-      },
-      "size": {
-        "0": 315,
-        "1": 194
-      },
-      "flags": {},
-      "order": 1,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "pab_config",
-          "type": "PAB_CONFIG",
-          "link": null
-        },
-        {
-          "name": "block_edit",
-          "type": "TRANSFORMERBLOCKS",
-          "link": null
-        },
-        {
-          "name": "lora",
-          "type": "COGLORA",
-          "link": null
+          "link": 65
        }
      ],
      "outputs": [
        {
-          "name": "cogvideo_pipe",
-          "type": "COGVIDEOPIPE",
+          "name": "conditioning",
+          "type": "CONDITIONING",
          "links": [
-            60
+            68
          ],
          "slot_index": 0,
          "shape": 3
+        },
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "links": null
        }
      ],
      "properties": {
-        "Node name for S&R": "DownloadAndLoadCogVideoModel"
+        "Node name for S&R": "CogVideoTextEncode"
      },
      "widgets_values": [
-        "THUDM/CogVideoX-5b",
-        "bf16",
-        "disabled",
-        "disabled",
-        false
+        "",
+        1,
+        true
      ]
    },
    {
      "id": 11,
      "type": "CogVideoDecode",
      "pos": {
-        "0": 1051,
-        "1": 748
+        "0": 1416,
+        "1": 40
      },
      "size": {
        "0": 300.396484375,
        "1": 198
      },
      "flags": {},
-      "order": 5,
+      "order": 6,
      "mode": 0,
      "inputs": [
        {
-          "name": "pipeline",
-          "type": "COGVIDEOPIPE",
-          "link": 63
+          "name": "vae",
+          "type": "VAE",
+          "link": 71
        },
        {
          "name": "samples",
          "type": "LATENT",
-          "link": 64
+          "link": 69
        }
      ],
      "outputs": [
@ -293,83 +147,297 @@
      ]
    },
    {
-      "id": 34,
-      "type": "CogVideoSampler",
+      "id": 36,
+      "type": "DownloadAndLoadCogVideoModel",
      "pos": {
-        "0": 1041,
-        "1": 342
+        "0": 645,
+        "1": 17
      },
      "size": {
-        "0": 315.8404846191406,
-        "1": 358
+        "0": 315,
+        "1": 218
      },
      "flags": {},
-      "order": 4,
+      "order": 0,
      "mode": 0,
      "inputs": [
        {
-          "name": "pipeline",
-          "type": "COGVIDEOPIPE",
-          "link": 60
+          "name": "block_edit",
+          "type": "TRANSFORMERBLOCKS",
+          "link": null,
+          "shape": 7
        },
        {
-          "name": "positive",
-          "type": "CONDITIONING",
-          "link": 61
+          "name": "lora",
+          "type": "COGLORA",
+          "link": null,
+          "shape": 7
        },
        {
-          "name": "negative",
-          "type": "CONDITIONING",
-          "link": 62
-        },
-        {
-          "name": "samples",
-          "type": "LATENT",
-          "link": null
-        },
-        {
-          "name": "image_cond_latents",
-          "type": "LATENT",
-          "link": null
-        },
-        {
-          "name": "context_options",
-          "type": "COGCONTEXT",
-          "link": null
+          "name": "compile_args",
+          "type": "COMPILEARGS",
+          "link": null,
+          "shape": 7
        }
      ],
      "outputs": [
        {
-          "name": "cogvideo_pipe",
-          "type": "COGVIDEOPIPE",
+          "name": "model",
+          "type": "COGVIDEOMODEL",
          "links": [
-            63
+            70
+          ]
+        },
+        {
+          "name": "vae",
+          "type": "VAE",
+          "links": [
+            71
          ],
+          "slot_index": 1
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "DownloadAndLoadCogVideoModel"
+      },
+      "widgets_values": [
+        "THUDM/CogVideoX-5b",
+        "bf16",
+        "disabled",
+        false,
+        "sdpa",
+        "main_device"
+      ]
+    },
+    {
+      "id": 20,
+      "type": "CLIPLoader",
+      "pos": {
+        "0": 5,
+        "1": 308
+      },
+      "size": {
+        "0": 451.30548095703125,
+        "1": 82
+      },
+      "flags": {},
+      "order": 1,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "CLIP",
+          "type": "CLIP",
+          "links": [
+            54
+          ],
+          "slot_index": 0,
          "shape": 3
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "CLIPLoader"
+      },
+      "widgets_values": [
+        "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
+        "sd3"
+      ]
+    },
+    {
+      "id": 37,
+      "type": "EmptyLatentImage",
+      "pos": {
+        "0": 643,
+        "1": 827
+      },
+      "size": {
+        "0": 315,
+        "1": 106
+      },
+      "flags": {},
+      "order": 2,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "LATENT",
+          "type": "LATENT",
+          "links": [
+            72
+          ]
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "EmptyLatentImage"
+      },
+      "widgets_values": [
+        720,
+        480,
+        1
+      ]
+    },
+    {
+      "id": 35,
+      "type": "CogVideoSampler",
+      "pos": {
+        "0": 1042,
+        "1": 291
+      },
+      "size": [
+        330,
+        574
+      ],
+      "flags": {},
+      "order": 5,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "COGVIDEOMODEL",
+          "link": 70
+        },
+        {
+          "name": "positive",
+          "type": "CONDITIONING",
+          "link": 67
+        },
+        {
+          "name": "negative",
+          "type": "CONDITIONING",
+          "link": 68
        },
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "link": 72,
+          "shape": 7
+        },
+        {
+          "name": "image_cond_latents",
+          "type": "LATENT",
+          "link": null,
+          "shape": 7
+        },
+        {
+          "name": "context_options",
+          "type": "COGCONTEXT",
+          "link": null,
+          "shape": 7
+        },
+        {
+          "name": "controlnet",
+          "type": "COGVIDECONTROLNET",
+          "link": null,
+          "shape": 7
+        },
+        {
+          "name": "tora_trajectory",
+          "type": "TORAFEATURES",
+          "link": null,
+          "shape": 7
+        },
+        {
+          "name": "fastercache",
+          "type": "FASTERCACHEARGS",
+          "link": null,
+          "shape": 7
+        }
+      ],
+      "outputs": [
        {
          "name": "samples",
          "type": "LATENT",
          "links": [
-            64
-          ],
-          "shape": 3
+            69
+          ]
        }
      ],
      "properties": {
        "Node name for S&R": "CogVideoSampler"
      },
      "widgets_values": [
-        480,
-        720,
        49,
        50,
        6,
-        806286757407563,
+        0,
        "fixed",
-        "DPM++",
+        "CogVideoXDDIM",
        1
      ]
+    },
+    {
+      "id": 33,
+      "type": "VHS_VideoCombine",
+      "pos": {
+        "0": 1767,
+        "1": 39
+      },
+      "size": [
+        778.7022705078125,
+        829.801513671875
+      ],
+      "flags": {},
+      "order": 7,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "link": 59
+        },
+        {
+          "name": "audio",
+          "type": "AUDIO",
+          "link": null,
+          "shape": 7
+        },
+        {
+          "name": "meta_batch",
+          "type": "VHS_BatchManager",
+          "link": null,
+          "shape": 7
+        },
+        {
+          "name": "vae",
+          "type": "VAE",
+          "link": null,
+          "shape": 7
+        }
+      ],
+      "outputs": [
+        {
+          "name": "Filenames",
+          "type": "VHS_FILENAMES",
+          "links": null,
+          "shape": 3
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "VHS_VideoCombine"
+      },
+      "widgets_values": {
+        "frame_rate": 8,
+        "loop_count": 0,
+        "filename_prefix": "CogVideoX5B-T2V",
+        "format": "video/h264-mp4",
+        "pix_fmt": "yuv420p",
+        "crf": 19,
+        "save_metadata": true,
+        "pingpong": false,
+        "save_output": false,
+        "videopreview": {
+          "hidden": false,
+          "paused": false,
+          "params": {
+            "filename": "CogVideoX5B_00001.mp4",
+            "subfolder": "",
+            "type": "temp",
+            "format": "video/h264-mp4",
+            "frame_rate": 8
+          },
+          "muted": false
+        }
+      }
    }
  ],
  "links": [
@ -381,14 +449,6 @@
      0,
      "CLIP"
    ],
-    [
-      56,
-      20,
-      0,
-      31,
-      0,
-      "CLIP"
-    ],
    [
      59,
      11,
@ -398,43 +458,59 @@
      "IMAGE"
    ],
    [
-      60,
+      65,
+      30,
      1,
+      31,
      0,
-      34,
-      0,
-      "COGVIDEOPIPE"
+      "CLIP"
    ],
    [
-      61,
+      67,
      30,
      0,
-      34,
+      35,
      1,
      "CONDITIONING"
    ],
    [
-      62,
+      68,
      31,
      0,
-      34,
+      35,
      2,
      "CONDITIONING"
    ],
    [
-      63,
-      34,
+      69,
+      35,
      0,
      11,
-      0,
-      "COGVIDEOPIPE"
+      1,
+      "LATENT"
    ],
    [
-      64,
-      34,
+      70,
+      36,
+      0,
+      35,
+      0,
+      "COGVIDEOMODEL"
+    ],
+    [
+      71,
+      36,
      1,
      11,
-      1,
+      0,
+      "VAE"
+    ],
+    [
+      72,
+      37,
+      0,
+      35,
+      3,
      "LATENT"
    ]
  ],
@ -442,10 +518,10 @@
  "config": {},
  "extra": {
    "ds": {
-      "scale": 0.6934334949442514,
+      "scale": 0.7627768444387061,
      "offset": [
-        -24.154349208343916,
-        155.20539218330134
+        734.1791945221892,
+        237.29437844909364
      ]
    }
  },
--- a/examples/cogvideox_interpolation_example_01.json
+++ b/examples/cogvideox_interpolation_example_01.json
--- a/examples/cogvideo_5b_vid2vid_example_01.json
+++ b/examples/cogvideo_5b_vid2vid_example_01.json
--- a/examples/cogvideox_5b_Tora_I2V_testing_01.json
+++ b/examples/cogvideox_5b_Tora_I2V_testing_01.json
--- a/examples/cogvideox_5b_tora_trajectory_example_01.json
+++ b/examples/cogvideox_5b_tora_trajectory_example_01.json
--- a/examples/cogvidex_fun_i2v_example_02.json
+++ b/examples/cogvidex_fun_i2v_example_02.json
--- a/examples/cogvideox_Fun_I2V_Tora.json
+++ b/examples/cogvideox_Fun_I2V_Tora.json
--- a/examples/cogvideox_fun_pose_example_01.json
+++ b/examples/cogvideox_fun_pose_example_01.json
--- a/examples/cogvideox_fun_img2vid_tora_01.json
+++ b/examples/cogvideox_fun_img2vid_tora_01.json
--- a/examples/cogvidex_fun_5b_GGUF_10GB_VRAM_example_02.json
+++ b/examples/cogvidex_fun_5b_GGUF_10GB_VRAM_example_02.json
@ -1,622 +0,0 @@
-{
-  "last_node_id": 51,
-  "last_link_id": 114,
-  "nodes": [
-    {
-      "id": 20,
-      "type": "CLIPLoader",
-      "pos": {
-        "0": -26,
-        "1": 400
-      },
-      "size": {
-        "0": 451.30548095703125,
-        "1": 82
-      },
-      "flags": {},
-      "order": 0,
-      "mode": 0,
-      "inputs": [],
-      "outputs": [
-        {
-          "name": "CLIP",
-          "type": "CLIP",
-          "links": [
-            54
-          ],
-          "slot_index": 0,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CLIPLoader"
-      },
-      "widgets_values": [
-        "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
-        "sd3"
-      ]
-    },
-    {
-      "id": 31,
-      "type": "CogVideoTextEncode",
-      "pos": {
-        "0": 497,
-        "1": 520
-      },
-      "size": {
-        "0": 463.01251220703125,
-        "1": 144
-      },
-      "flags": {},
-      "order": 5,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "clip",
-          "type": "CLIP",
-          "link": 108
-        }
-      ],
-      "outputs": [
-        {
-          "name": "conditioning",
-          "type": "CONDITIONING",
-          "links": [
-            111
-          ],
-          "slot_index": 0,
-          "shape": 3
-        },
-        {
-          "name": "clip",
-          "type": "CLIP",
-          "links": null
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CogVideoTextEncode"
-      },
-      "widgets_values": [
-        "The video is not of a high quality, it has a low resolution. Watermark present in each frame. Strange motion trajectory. ",
-        1,
-        true
-      ]
-    },
-    {
-      "id": 44,
-      "type": "VHS_VideoCombine",
-      "pos": {
-        "0": 1842,
-        "1": 345
-      },
-      "size": [
-        855.81494140625,
-        881.2099609375
-      ],
-      "flags": {},
-      "order": 8,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "images",
-          "type": "IMAGE",
-          "link": 97
-        },
-        {
-          "name": "audio",
-          "type": "AUDIO",
-          "link": null,
-          "shape": 7
-        },
-        {
-          "name": "meta_batch",
-          "type": "VHS_BatchManager",
-          "link": null,
-          "shape": 7
-        },
-        {
-          "name": "vae",
-          "type": "VAE",
-          "link": null,
-          "shape": 7
-        }
-      ],
-      "outputs": [
-        {
-          "name": "Filenames",
-          "type": "VHS_FILENAMES",
-          "links": null,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "VHS_VideoCombine"
-      },
-      "widgets_values": {
-        "frame_rate": 16,
-        "loop_count": 0,
-        "filename_prefix": "CogVideoX_Fun",
-        "format": "video/h264-mp4",
-        "pix_fmt": "yuv420p",
-        "crf": 19,
-        "save_metadata": true,
-        "pingpong": false,
-        "save_output": false,
-        "videopreview": {
-          "hidden": false,
-          "paused": false,
-          "params": {
-            "filename": "CogVideoX_Fun_00003.mp4",
-            "subfolder": "",
-            "type": "temp",
-            "format": "video/h264-mp4",
-            "frame_rate": 16
-          },
-          "muted": false
-        }
-      }
-    },
-    {
-      "id": 36,
-      "type": "LoadImage",
-      "pos": {
-        "0": 227,
-        "1": 700
-      },
-      "size": {
-        "0": 391.3421325683594,
-        "1": 456.8497009277344
-      },
-      "flags": {},
-      "order": 1,
-      "mode": 0,
-      "inputs": [],
-      "outputs": [
-        {
-          "name": "IMAGE",
-          "type": "IMAGE",
-          "links": [
-            71
-          ],
-          "slot_index": 0,
-          "shape": 3
-        },
-        {
-          "name": "MASK",
-          "type": "MASK",
-          "links": null,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "LoadImage"
-      },
-      "widgets_values": [
-        "sd3stag.png",
-        "image"
-      ]
-    },
-    {
-      "id": 37,
-      "type": "ImageResizeKJ",
-      "pos": {
-        "0": 688,
-        "1": 708
-      },
-      "size": {
-        "0": 315,
-        "1": 266
-      },
-      "flags": {},
-      "order": 4,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "image",
-          "type": "IMAGE",
-          "link": 71
-        },
-        {
-          "name": "get_image_size",
-          "type": "IMAGE",
-          "link": null,
-          "shape": 7
-        },
-        {
-          "name": "width_input",
-          "type": "INT",
-          "link": null,
-          "widget": {
-            "name": "width_input"
-          }
-        },
-        {
-          "name": "height_input",
-          "type": "INT",
-          "link": null,
-          "widget": {
-            "name": "height_input"
-          }
-        }
-      ],
-      "outputs": [
-        {
-          "name": "IMAGE",
-          "type": "IMAGE",
-          "links": [
-            112
-          ],
-          "slot_index": 0,
-          "shape": 3
-        },
-        {
-          "name": "width",
-          "type": "INT",
-          "links": null,
-          "shape": 3
-        },
-        {
-          "name": "height",
-          "type": "INT",
-          "links": null,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "ImageResizeKJ"
-      },
-      "widgets_values": [
-        720,
-        480,
-        "lanczos",
-        true,
-        16,
-        0,
-        0,
-        "disabled"
-      ]
-    },
-    {
-      "id": 11,
-      "type": "CogVideoDecode",
-      "pos": {
-        "0": 1477,
-        "1": 344
-      },
-      "size": {
-        "0": 300.396484375,
-        "1": 198
-      },
-      "flags": {},
-      "order": 7,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "pipeline",
-          "type": "COGVIDEOPIPE",
-          "link": 113
-        },
-        {
-          "name": "samples",
-          "type": "LATENT",
-          "link": 114
-        }
-      ],
-      "outputs": [
-        {
-          "name": "images",
-          "type": "IMAGE",
-          "links": [
-            97
-          ],
-          "slot_index": 0,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CogVideoDecode"
-      },
-      "widgets_values": [
-        true,
-        240,
-        360,
-        0.2,
-        0.2,
-        true
-      ]
-    },
-    {
-      "id": 30,
-      "type": "CogVideoTextEncode",
-      "pos": {
-        "0": 493,
-        "1": 303
-      },
-      "size": {
-        "0": 471.90142822265625,
-        "1": 168.08047485351562
-      },
-      "flags": {},
-      "order": 3,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "clip",
-          "type": "CLIP",
-          "link": 54
-        }
-      ],
-      "outputs": [
-        {
-          "name": "conditioning",
-          "type": "CONDITIONING",
-          "links": [
-            110
-          ],
-          "slot_index": 0,
-          "shape": 3
-        },
-        {
-          "name": "clip",
-          "type": "CLIP",
-          "links": [
-            108
-          ],
-          "slot_index": 1
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CogVideoTextEncode"
-      },
-      "widgets_values": [
-        "majestic stag grazing in a forest and basking in the setting sun",
-        1,
-        false
-      ]
-    },
-    {
-      "id": 51,
-      "type": "CogVideoXFunSampler",
-      "pos": {
-        "0": 1058,
-        "1": 345
-      },
-      "size": {
-        "0": 367.79998779296875,
-        "1": 434
-      },
-      "flags": {},
-      "order": 6,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "pipeline",
-          "type": "COGVIDEOPIPE",
-          "link": 109
-        },
-        {
-          "name": "positive",
-          "type": "CONDITIONING",
-          "link": 110
-        },
-        {
-          "name": "negative",
-          "type": "CONDITIONING",
-          "link": 111
-        },
-        {
-          "name": "start_img",
-          "type": "IMAGE",
-          "link": 112,
-          "shape": 7
-        },
-        {
-          "name": "end_img",
-          "type": "IMAGE",
-          "link": null,
-          "shape": 7
-        },
-        {
-          "name": "context_options",
-          "type": "COGCONTEXT",
-          "link": null,
-          "shape": 7
-        },
-        {
-          "name": "tora_trajectory",
-          "type": "TORAFEATURES",
-          "link": null,
-          "shape": 7
-        },
-        {
-          "name": "fastercache",
-          "type": "FASTERCACHEARGS",
-          "link": null,
-          "shape": 7
-        },
-        {
-          "name": "vid2vid_images",
-          "type": "IMAGE",
-          "link": null,
-          "shape": 7
-        }
-      ],
-      "outputs": [
-        {
-          "name": "cogvideo_pipe",
-          "type": "COGVIDEOPIPE",
-          "links": [
-            113
-          ]
-        },
-        {
-          "name": "samples",
-          "type": "LATENT",
-          "links": [
-            114
-          ]
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CogVideoXFunSampler"
-      },
-      "widgets_values": [
-        49,
-        720,
-        480,
-        43,
-        "randomize",
-        50,
-        6,
-        "DDIM",
-        0.0563,
-        1
-      ]
-    },
-    {
-      "id": 48,
-      "type": "DownloadAndLoadCogVideoGGUFModel",
-      "pos": {
-        "0": 585,
-        "1": 34
-      },
-      "size": {
-        "0": 378,
-        "1": 198
-      },
-      "flags": {},
-      "order": 2,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "pab_config",
-          "type": "PAB_CONFIG",
-          "link": null,
-          "shape": 7
-        },
-        {
-          "name": "block_edit",
-          "type": "TRANSFORMERBLOCKS",
-          "link": null,
-          "shape": 7
-        }
-      ],
-      "outputs": [
-        {
-          "name": "cogvideo_pipe",
-          "type": "COGVIDEOPIPE",
-          "links": [
-            109
-          ],
-          "slot_index": 0,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "DownloadAndLoadCogVideoGGUFModel"
-      },
-      "widgets_values": [
-        "CogVideoX_5b_fun_1_1_GGUF_Q4_0.safetensors",
-        "bf16",
-        false,
-        "offload_device",
-        false,
-        "disabled"
-      ]
-    }
-  ],
-  "links": [
-    [
-      54,
-      20,
-      0,
-      30,
-      0,
-      "CLIP"
-    ],
-    [
-      71,
-      36,
-      0,
-      37,
-      0,
-      "IMAGE"
-    ],
-    [
-      97,
-      11,
-      0,
-      44,
-      0,
-      "IMAGE"
-    ],
-    [
-      108,
-      30,
-      1,
-      31,
-      0,
-      "CLIP"
-    ],
-    [
-      109,
-      48,
-      0,
-      51,
-      0,
-      "COGVIDEOPIPE"
-    ],
-    [
-      110,
-      30,
-      0,
-      51,
-      1,
-      "CONDITIONING"
-    ],
-    [
-      111,
-      31,
-      0,
-      51,
-      2,
-      "CONDITIONING"
-    ],
-    [
-      112,
-      37,
-      0,
-      51,
-      3,
-      "IMAGE"
-    ],
-    [
-      113,
-      51,
-      0,
-      11,
-      0,
-      "COGVIDEOPIPE"
-    ],
-    [
-      114,
-      51,
-      1,
-      11,
-      1,
-      "LATENT"
-    ]
-  ],
-  "groups": [],
-  "config": {},
-  "extra": {
-    "ds": {
-      "scale": 0.7513148009015784,
-      "offset": [
-        724.7448506313632,
-        128.336592104936
-      ]
-    }
-  },
-  "version": 0.4
-}
--- a/nodes.py
+++ b/nodes.py
@ -595,14 +595,14 @@ class CogVideoSampler:
    FUNCTION = "process"
    CATEGORY = "CogVideoWrapper"

-    def process(self, pipeline, positive, negative, steps, cfg, seed, scheduler, num_frames, samples=None,
+    def process(self, model, positive, negative, steps, cfg, seed, scheduler, num_frames, samples=None,
                denoise_strength=1.0, image_cond_latents=None, context_options=None, controlnet=None, tora_trajectory=None, fastercache=None):
        mm.soft_empty_cache()

-        model_name = pipeline.get("model_name", "")
+        model_name = model.get("model_name", "")
        supports_image_conds = True if "I2V" in model_name or "interpolation" in model_name.lower() or "fun" in model_name.lower() else False

-        if "fun" in model_name.lower() and image_cond_latents is not None:
+        if "fun" in model_name.lower() and "pose" not in model_name.lower() and image_cond_latents is not None:
            assert image_cond_latents["mask"] is not None, "For fun inpaint models use CogVideoImageEncodeFunInP"
            fun_mask = image_cond_latents["mask"]
        else:
@ -632,11 +632,11 @@ class CogVideoSampler:

        device = mm.get_torch_device()
        offload_device = mm.unet_offload_device()
-        pipe = pipeline["pipe"]
-        dtype = pipeline["dtype"]
-        scheduler_config = pipeline["scheduler_config"]
+        pipe = model["pipe"]
+        dtype = model["dtype"]
+        scheduler_config = model["scheduler_config"]
        
-        if not pipeline["cpu_offloading"] and pipeline["manual_offloading"]:
+        if not model["cpu_offloading"] and model["manual_offloading"]:
            pipe.transformer.to(device)
        generator = torch.Generator(device=torch.device("cpu")).manual_seed(seed)

@ -683,10 +683,10 @@ class CogVideoSampler:
        except:
            pass
  
-        autocastcondition = not pipeline["onediff"] or not dtype == torch.float32
+        autocastcondition = not model["onediff"] or not dtype == torch.float32
        autocast_context = torch.autocast(mm.get_autocast_device(device), dtype=dtype) if autocastcondition else nullcontext()
        with autocast_context:
-            latents = pipeline["pipe"](
+            latents = model["pipe"](
                num_inference_steps=steps,
                height = height,
                width = width,
@ -708,7 +708,7 @@ class CogVideoSampler:
                controlnet=controlnet,
                tora=tora_trajectory if tora_trajectory is not None else None,
            )
-        if not pipeline["cpu_offloading"] and pipeline["manual_offloading"]:
+        if not model["cpu_offloading"] and model["manual_offloading"]:
            pipe.transformer.to(offload_device)

        if fastercache is not None:
@ -763,18 +763,16 @@ class CogVideoDecode:
    @classmethod
    def INPUT_TYPES(s):
        return {"required": {
-            "samples": ("LATENT", ),
-            "vae": ("VAE", {"default": None}),
-            "enable_vae_tiling": ("BOOLEAN", {"default": True, "tooltip": "Drastically reduces memory use but may introduce seams"}),
-            },
-            "optional": {
-            "tile_sample_min_height": ("INT", {"default": 240, "min": 16, "max": 2048, "step": 8, "tooltip": "Minimum tile height, default is half the height"}),
-            "tile_sample_min_width": ("INT", {"default": 360, "min": 16, "max": 2048, "step": 8, "tooltip": "Minimum tile width, default is half the width"}),
-            "tile_overlap_factor_height": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 1.0, "step": 0.001}),
-            "tile_overlap_factor_width": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 1.0, "step": 0.001}),
-            "auto_tile_size": ("BOOLEAN", {"default": True, "tooltip": "Auto size based on height and width, default is half the size"}),
-            }
-        }
+                    "vae": ("VAE",),
+                    "samples": ("LATENT",),
+                    "enable_vae_tiling": ("BOOLEAN", {"default": True, "tooltip": "Drastically reduces memory use but may introduce seams"}),
+                    "tile_sample_min_height": ("INT", {"default": 240, "min": 16, "max": 2048, "step": 8, "tooltip": "Minimum tile height, default is half the height"}),
+                    "tile_sample_min_width": ("INT", {"default": 360, "min": 16, "max": 2048, "step": 8, "tooltip": "Minimum tile width, default is half the width"}),
+                    "tile_overlap_factor_height": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 1.0, "step": 0.001}),
+                    "tile_overlap_factor_width": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 1.0, "step": 0.001}),
+                    "auto_tile_size": ("BOOLEAN", {"default": True, "tooltip": "Auto size based on height and width, default is half the size"}),
+                    },            
+                }

    RETURN_TYPES = ("IMAGE",)
    RETURN_NAMES = ("images",)