From 3de01139277de7052a61849b6479bf0e2d1483a6 Mon Sep 17 00:00:00 2001
From: kijai <40791699+kijai@users.noreply.github.com>
Date: Tue, 1 Oct 2024 17:09:44 +0300
Subject: [PATCH] Separate CogVideoX-Fun vid2vid and control samplers, add
 automatic tile size for decode

---
 examples/cogvideox_fun_pose_example_01.json | 1174 +++++++++----------
 nodes.py                                    |  224 ++--
 2 files changed, 731 insertions(+), 667 deletions(-)

diff --git a/examples/cogvideox_fun_pose_example_01.json b/examples/cogvideox_fun_pose_example_01.json
index 7fe634e..e4c827f 100644
--- a/examples/cogvideox_fun_pose_example_01.json
+++ b/examples/cogvideox_fun_pose_example_01.json
@@ -1,6 +1,6 @@
 {
-  "last_node_id": 77,
-  "last_link_id": 159,
+  "last_node_id": 80,
+  "last_link_id": 174,
   "nodes": [
     {
       "id": 31,
@@ -28,7 +28,7 @@
           "name": "conditioning",
           "type": "CONDITIONING",
           "links": [
-            114
+            167
           ],
           "slot_index": 0,
           "shape": 3
@@ -85,225 +85,6 @@
         ""
       ]
     },
-    {
-      "id": 37,
-      "type": "ImageResizeKJ",
-      "pos": {
-        "0": 666,
-        "1": 745
-      },
-      "size": {
-        "0": 315,
-        "1": 266
-      },
-      "flags": {},
-      "order": 8,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "image",
-          "type": "IMAGE",
-          "link": 159
-        },
-        {
-          "name": "get_image_size",
-          "type": "IMAGE",
-          "link": null
-        },
-        {
-          "name": "width_input",
-          "type": "INT",
-          "link": null,
-          "widget": {
-            "name": "width_input"
-          }
-        },
-        {
-          "name": "height_input",
-          "type": "INT",
-          "link": null,
-          "widget": {
-            "name": "height_input"
-          }
-        }
-      ],
-      "outputs": [
-        {
-          "name": "IMAGE",
-          "type": "IMAGE",
-          "links": [
-            130
-          ],
-          "slot_index": 0,
-          "shape": 3
-        },
-        {
-          "name": "width",
-          "type": "INT",
-          "links": null,
-          "shape": 3
-        },
-        {
-          "name": "height",
-          "type": "INT",
-          "links": null,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "ImageResizeKJ"
-      },
-      "widgets_values": [
-        512,
-        512,
-        "lanczos",
-        true,
-        8,
-        0,
-        0,
-        "disabled"
-      ]
-    },
-    {
-      "id": 61,
-      "type": "GetImageSizeAndCount",
-      "pos": {
-        "0": 1024,
-        "1": 769
-      },
-      "size": {
-        "0": 277.20001220703125,
-        "1": 86
-      },
-      "flags": {},
-      "order": 9,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "image",
-          "type": "IMAGE",
-          "link": 130
-        }
-      ],
-      "outputs": [
-        {
-          "name": "image",
-          "type": "IMAGE",
-          "links": [
-            131,
-            135
-          ],
-          "slot_index": 0,
-          "shape": 3
-        },
-        {
-          "name": "512 width",
-          "type": "INT",
-          "links": null,
-          "shape": 3
-        },
-        {
-          "name": "368 height",
-          "type": "INT",
-          "links": null,
-          "shape": 3
-        },
-        {
-          "name": "49 count",
-          "type": "INT",
-          "links": null,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "GetImageSizeAndCount"
-      },
-      "widgets_values": []
-    },
-    {
-      "id": 20,
-      "type": "CLIPLoader",
-      "pos": {
-        "0": -26,
-        "1": 400
-      },
-      "size": {
-        "0": 451.30548095703125,
-        "1": 82
-      },
-      "flags": {},
-      "order": 0,
-      "mode": 0,
-      "inputs": [],
-      "outputs": [
-        {
-          "name": "CLIP",
-          "type": "CLIP",
-          "links": [
-            54,
-            56
-          ],
-          "slot_index": 0,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CLIPLoader"
-      },
-      "widgets_values": [
-        "t5\\clip\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
-        "sd3"
-      ]
-    },
-    {
-      "id": 11,
-      "type": "CogVideoDecode",
-      "pos": {
-        "0": 1451,
-        "1": 363
-      },
-      "size": {
-        "0": 300.396484375,
-        "1": 198
-      },
-      "flags": {},
-      "order": 11,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "pipeline",
-          "type": "COGVIDEOPIPE",
-          "link": 115
-        },
-        {
-          "name": "samples",
-          "type": "LATENT",
-          "link": 116
-        }
-      ],
-      "outputs": [
-        {
-          "name": "images",
-          "type": "IMAGE",
-          "links": [
-            124
-          ],
-          "slot_index": 0,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CogVideoDecode"
-      },
-      "widgets_values": [
-        true,
-        240,
-        360,
-        0.2,
-        0.2,
-        true
-      ]
-    },
     {
       "id": 59,
       "type": "AddLabel",
@@ -368,12 +149,494 @@
         ""
       ]
     },
+    {
+      "id": 11,
+      "type": "CogVideoDecode",
+      "pos": {
+        "0": 1451,
+        "1": 363
+      },
+      "size": {
+        "0": 282.7455749511719,
+        "1": 198
+      },
+      "flags": {},
+      "order": 11,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "pipeline",
+          "type": "COGVIDEOPIPE",
+          "link": 170
+        },
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "link": 171
+        }
+      ],
+      "outputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "links": [
+            124
+          ],
+          "slot_index": 0,
+          "shape": 3
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "CogVideoDecode"
+      },
+      "widgets_values": [
+        true,
+        240,
+        360,
+        0.2,
+        0.2,
+        true
+      ]
+    },
+    {
+      "id": 79,
+      "type": "CogVideoXFunControlSampler",
+      "pos": {
+        "0": 1085,
+        "1": 312
+      },
+      "size": {
+        "0": 313.41632080078125,
+        "1": 330
+      },
+      "flags": {},
+      "order": 10,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "pipeline",
+          "type": "COGVIDEOPIPE",
+          "link": 165
+        },
+        {
+          "name": "positive",
+          "type": "CONDITIONING",
+          "link": 166
+        },
+        {
+          "name": "negative",
+          "type": "CONDITIONING",
+          "link": 167
+        },
+        {
+          "name": "control_video",
+          "type": "IMAGE",
+          "link": 168
+        },
+        {
+          "name": "video_length",
+          "type": "INT",
+          "link": 169,
+          "widget": {
+            "name": "video_length"
+          }
+        }
+      ],
+      "outputs": [
+        {
+          "name": "cogvideo_pipe",
+          "type": "COGVIDEOPIPE",
+          "links": [
+            170
+          ],
+          "shape": 3
+        },
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "links": [
+            171
+          ],
+          "shape": 3
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "CogVideoXFunControlSampler"
+      },
+      "widgets_values": [
+        49,
+        512,
+        42,
+        "fixed",
+        25,
+        6,
+        "DPM++",
+        0.7000000000000001,
+        0,
+        1
+      ]
+    },
+    {
+      "id": 30,
+      "type": "CogVideoTextEncode",
+      "pos": {
+        "0": 513,
+        "1": 286
+      },
+      "size": {
+        "0": 471.90142822265625,
+        "1": 168.08047485351562
+      },
+      "flags": {},
+      "order": 3,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "link": 54
+        }
+      ],
+      "outputs": [
+        {
+          "name": "conditioning",
+          "type": "CONDITIONING",
+          "links": [
+            128,
+            166
+          ],
+          "slot_index": 0,
+          "shape": 3
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "CogVideoTextEncode"
+      },
+      "widgets_values": [
+        "a brown bear is dancing in a forest, in front of a waterfall",
+        1,
+        true
+      ]
+    },
+    {
+      "id": 65,
+      "type": "VHS_LoadVideo",
+      "pos": {
+        "0": -191,
+        "1": 564
+      },
+      "size": [
+        390.1356201171875,
+        910.0188802083334
+      ],
+      "flags": {},
+      "order": 5,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "meta_batch",
+          "type": "VHS_BatchManager",
+          "link": null
+        },
+        {
+          "name": "vae",
+          "type": "VAE",
+          "link": null
+        },
+        {
+          "name": "frame_load_cap",
+          "type": "INT",
+          "link": 152,
+          "widget": {
+            "name": "frame_load_cap"
+          }
+        }
+      ],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            173
+          ],
+          "slot_index": 0,
+          "shape": 3
+        },
+        {
+          "name": "frame_count",
+          "type": "INT",
+          "links": null,
+          "shape": 3
+        },
+        {
+          "name": "audio",
+          "type": "AUDIO",
+          "links": null,
+          "shape": 3
+        },
+        {
+          "name": "video_info",
+          "type": "VHS_VIDEOINFO",
+          "links": [],
+          "slot_index": 3,
+          "shape": 3
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "VHS_LoadVideo"
+      },
+      "widgets_values": {
+        "video": "01.mp4",
+        "force_rate": 0,
+        "force_size": "Disabled",
+        "custom_width": 512,
+        "custom_height": 512,
+        "frame_load_cap": 17,
+        "skip_first_frames": 0,
+        "select_every_nth": 1,
+        "choose video to upload": "image",
+        "videopreview": {
+          "hidden": false,
+          "paused": false,
+          "params": {
+            "frame_load_cap": 17,
+            "skip_first_frames": 0,
+            "force_rate": 0,
+            "filename": "01.mp4",
+            "type": "input",
+            "format": "video/mp4",
+            "select_every_nth": 1
+          },
+          "muted": false
+        }
+      }
+    },
+    {
+      "id": 20,
+      "type": "CLIPLoader",
+      "pos": {
+        "0": 2,
+        "1": 412
+      },
+      "size": {
+        "0": 451.30548095703125,
+        "1": 82
+      },
+      "flags": {},
+      "order": 0,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "CLIP",
+          "type": "CLIP",
+          "links": [
+            54,
+            56
+          ],
+          "slot_index": 0,
+          "shape": 3
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "CLIPLoader"
+      },
+      "widgets_values": [
+        "t5\\clip\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
+        "sd3"
+      ]
+    },
+    {
+      "id": 80,
+      "type": "DWPreprocessor",
+      "pos": {
+        "0": 260,
+        "1": 742
+      },
+      "size": {
+        "0": 364.7358703613281,
+        "1": 198
+      },
+      "flags": {},
+      "order": 7,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "link": 173
+        }
+      ],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            174
+          ],
+          "slot_index": 0,
+          "shape": 3
+        },
+        {
+          "name": "POSE_KEYPOINT",
+          "type": "POSE_KEYPOINT",
+          "links": null,
+          "shape": 3
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "DWPreprocessor"
+      },
+      "widgets_values": [
+        "enable",
+        "enable",
+        "enable",
+        512,
+        "yolox_l.torchscript.pt",
+        "dw-ll_ucoco_384_bs5.torchscript.pt"
+      ]
+    },
+    {
+      "id": 37,
+      "type": "ImageResizeKJ",
+      "pos": {
+        "0": 666,
+        "1": 743
+      },
+      "size": {
+        "0": 315,
+        "1": 266
+      },
+      "flags": {},
+      "order": 8,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "link": 174
+        },
+        {
+          "name": "get_image_size",
+          "type": "IMAGE",
+          "link": null
+        },
+        {
+          "name": "width_input",
+          "type": "INT",
+          "link": null,
+          "widget": {
+            "name": "width_input"
+          }
+        },
+        {
+          "name": "height_input",
+          "type": "INT",
+          "link": null,
+          "widget": {
+            "name": "height_input"
+          }
+        }
+      ],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            130
+          ],
+          "slot_index": 0,
+          "shape": 3
+        },
+        {
+          "name": "width",
+          "type": "INT",
+          "links": null,
+          "shape": 3
+        },
+        {
+          "name": "height",
+          "type": "INT",
+          "links": null,
+          "shape": 3
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "ImageResizeKJ"
+      },
+      "widgets_values": [
+        512,
+        512,
+        "lanczos",
+        true,
+        16,
+        0,
+        0,
+        "disabled"
+      ]
+    },
+    {
+      "id": 61,
+      "type": "GetImageSizeAndCount",
+      "pos": {
+        "0": 1018,
+        "1": 743
+      },
+      "size": {
+        "0": 277.20001220703125,
+        "1": 86
+      },
+      "flags": {},
+      "order": 9,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "link": 130
+        }
+      ],
+      "outputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "links": [
+            135,
+            168
+          ],
+          "slot_index": 0,
+          "shape": 3
+        },
+        {
+          "name": "288 width",
+          "type": "INT",
+          "links": null,
+          "shape": 3
+        },
+        {
+          "name": "512 height",
+          "type": "INT",
+          "links": null,
+          "shape": 3
+        },
+        {
+          "name": "49 count",
+          "type": "INT",
+          "links": null,
+          "shape": 3
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "GetImageSizeAndCount"
+      },
+      "widgets_values": []
+    },
     {
       "id": 58,
       "type": "ImageConcatMulti",
       "pos": {
-        "0": 1476,
-        "1": 714
+        "0": 1439,
+        "1": 735
       },
       "size": {
         "0": 210,
@@ -414,57 +677,63 @@
       ]
     },
     {
-      "id": 30,
-      "type": "CogVideoTextEncode",
+      "id": 71,
+      "type": "DownloadAndLoadCogVideoGGUFModel",
       "pos": {
-        "0": 513,
-        "1": 286
+        "0": 515,
+        "1": 35
       },
       "size": {
-        "0": 471.90142822265625,
-        "1": 168.08047485351562
+        "0": 466.3737487792969,
+        "1": 174
       },
       "flags": {},
-      "order": 3,
+      "order": 1,
       "mode": 0,
       "inputs": [
         {
-          "name": "clip",
-          "type": "CLIP",
-          "link": 54
+          "name": "pab_config",
+          "type": "PAB_CONFIG",
+          "link": null
+        },
+        {
+          "name": "block_edit",
+          "type": "TRANSFORMERBLOCKS",
+          "link": null
         }
       ],
       "outputs": [
         {
-          "name": "conditioning",
-          "type": "CONDITIONING",
+          "name": "cogvideo_pipe",
+          "type": "COGVIDEOPIPE",
           "links": [
-            113,
-            128
+            165
           ],
           "slot_index": 0,
           "shape": 3
         }
       ],
       "properties": {
-        "Node name for S&R": "CogVideoTextEncode"
+        "Node name for S&R": "DownloadAndLoadCogVideoGGUFModel"
       },
       "widgets_values": [
-        "fireball travels across a movie scene",
-        1,
-        true
+        "CogVideoX_5b_fun_1_1_Pose_GGUF_Q4_0.safetensors",
+        "bf16",
+        false,
+        "main_device",
+        false
       ]
     },
     {
       "id": 44,
       "type": "VHS_VideoCombine",
       "pos": {
-        "0": 1847,
-        "1": -22
+        "0": 1842,
+        "1": -5
       },
       "size": [
-        1635.8468017578125,
-        980.4377632141113
+        1186.0863037109375,
+        1442.1649487639127
       ],
       "flags": {},
       "order": 14,
@@ -516,7 +785,7 @@
           "hidden": false,
           "paused": false,
           "params": {
-            "filename": "CogVideoX_Fun_00054.mp4",
+            "filename": "CogVideoX_Fun_Pose_00004.mp4",
             "subfolder": "",
             "type": "temp",
             "format": "video/h264-mp4",
@@ -526,162 +795,19 @@
         }
       }
     },
-    {
-      "id": 56,
-      "type": "DWPreprocessor",
-      "pos": {
-        "0": 211,
-        "1": 746
-      },
-      "size": {
-        "0": 371.6333312988281,
-        "1": 222
-      },
-      "flags": {},
-      "order": 7,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "image",
-          "type": "IMAGE",
-          "link": 158
-        }
-      ],
-      "outputs": [
-        {
-          "name": "IMAGE",
-          "type": "IMAGE",
-          "links": [
-            159
-          ],
-          "slot_index": 0,
-          "shape": 3
-        },
-        {
-          "name": "POSE_KEYPOINT",
-          "type": "POSE_KEYPOINT",
-          "links": null,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "DWPreprocessor"
-      },
-      "widgets_values": [
-        "enable",
-        "enable",
-        "enable",
-        512,
-        "yolox_l.torchscript.pt",
-        "dw-ll_ucoco_384_bs5.torchscript.pt"
-      ]
-    },
-    {
-      "id": 65,
-      "type": "VHS_LoadVideo",
-      "pos": {
-        "0": -510,
-        "1": 568
-      },
-      "size": [
-        642.7533569335938,
-        702.6101525779661
-      ],
-      "flags": {},
-      "order": 5,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "meta_batch",
-          "type": "VHS_BatchManager",
-          "link": null
-        },
-        {
-          "name": "vae",
-          "type": "VAE",
-          "link": null
-        },
-        {
-          "name": "frame_load_cap",
-          "type": "INT",
-          "link": 152,
-          "widget": {
-            "name": "frame_load_cap"
-          }
-        }
-      ],
-      "outputs": [
-        {
-          "name": "IMAGE",
-          "type": "IMAGE",
-          "links": [
-            158
-          ],
-          "slot_index": 0,
-          "shape": 3
-        },
-        {
-          "name": "frame_count",
-          "type": "INT",
-          "links": null,
-          "shape": 3
-        },
-        {
-          "name": "audio",
-          "type": "AUDIO",
-          "links": null,
-          "shape": 3
-        },
-        {
-          "name": "video_info",
-          "type": "VHS_VIDEOINFO",
-          "links": [],
-          "slot_index": 3,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "VHS_LoadVideo"
-      },
-      "widgets_values": {
-        "video": "pose_slide.mp4",
-        "force_rate": 0,
-        "force_size": "Disabled",
-        "custom_width": 512,
-        "custom_height": 512,
-        "frame_load_cap": 17,
-        "skip_first_frames": 0,
-        "select_every_nth": 2,
-        "choose video to upload": "image",
-        "videopreview": {
-          "hidden": false,
-          "paused": false,
-          "params": {
-            "frame_load_cap": 17,
-            "skip_first_frames": 0,
-            "force_rate": 0,
-            "filename": "pose_slide.mp4",
-            "type": "input",
-            "format": "video/mp4",
-            "select_every_nth": 2
-          },
-          "muted": false
-        }
-      }
-    },
     {
       "id": 72,
       "type": "INTConstant",
       "pos": {
-        "0": -515,
-        "1": 288
+        "0": -265,
+        "1": 347
       },
       "size": {
         "0": 210,
         "1": 58
       },
       "flags": {},
-      "order": 1,
+      "order": 2,
       "mode": 0,
       "inputs": [],
       "outputs": [
@@ -689,8 +815,8 @@
           "name": "value",
           "type": "INT",
           "links": [
-            151,
-            152
+            152,
+            169
           ],
           "slot_index": 0,
           "shape": 3
@@ -705,138 +831,6 @@
       ],
       "color": "#1b4669",
       "bgcolor": "#29699c"
-    },
-    {
-      "id": 71,
-      "type": "DownloadAndLoadCogVideoGGUFModel",
-      "pos": {
-        "0": 478,
-        "1": -3
-      },
-      "size": {
-        "0": 466.3737487792969,
-        "1": 174
-      },
-      "flags": {},
-      "order": 2,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "pab_config",
-          "type": "PAB_CONFIG",
-          "link": null
-        },
-        {
-          "name": "block_edit",
-          "type": "TRANSFORMERBLOCKS",
-          "link": null
-        }
-      ],
-      "outputs": [
-        {
-          "name": "cogvideo_pipe",
-          "type": "COGVIDEOPIPE",
-          "links": [
-            148
-          ],
-          "slot_index": 0,
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "DownloadAndLoadCogVideoGGUFModel"
-      },
-      "widgets_values": [
-        "CogVideoX_5b_fun_1_1_Pose_GGUF_Q4_0.safetensors",
-        "bf16",
-        false,
-        "main_device",
-        false
-      ]
-    },
-    {
-      "id": 54,
-      "type": "CogVideoXFunVid2VidSampler",
-      "pos": {
-        "0": 1067,
-        "1": 283
-      },
-      "size": {
-        "0": 315,
-        "1": 378
-      },
-      "flags": {},
-      "order": 10,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "pipeline",
-          "type": "COGVIDEOPIPE",
-          "link": 148
-        },
-        {
-          "name": "positive",
-          "type": "CONDITIONING",
-          "link": 113
-        },
-        {
-          "name": "negative",
-          "type": "CONDITIONING",
-          "link": 114
-        },
-        {
-          "name": "validation_video",
-          "type": "IMAGE",
-          "link": null
-        },
-        {
-          "name": "control_video",
-          "type": "IMAGE",
-          "link": 131
-        },
-        {
-          "name": "video_length",
-          "type": "INT",
-          "link": 151,
-          "widget": {
-            "name": "video_length"
-          }
-        }
-      ],
-      "outputs": [
-        {
-          "name": "cogvideo_pipe",
-          "type": "COGVIDEOPIPE",
-          "links": [
-            115
-          ],
-          "shape": 3
-        },
-        {
-          "name": "samples",
-          "type": "LATENT",
-          "links": [
-            116
-          ],
-          "shape": 3
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CogVideoXFunVid2VidSampler"
-      },
-      "widgets_values": [
-        17,
-        512,
-        88311810545489,
-        "fixed",
-        25,
-        6,
-        "DPM++",
-        1,
-        0.7000000000000001,
-        0,
-        1
-      ]
     }
   ],
   "links": [
@@ -856,38 +850,6 @@
       0,
       "CLIP"
     ],
-    [
-      113,
-      30,
-      0,
-      54,
-      1,
-      "CONDITIONING"
-    ],
-    [
-      114,
-      31,
-      0,
-      54,
-      2,
-      "CONDITIONING"
-    ],
-    [
-      115,
-      54,
-      0,
-      11,
-      0,
-      "COGVIDEOPIPE"
-    ],
-    [
-      116,
-      54,
-      1,
-      11,
-      1,
-      "LATENT"
-    ],
     [
       124,
       11,
@@ -928,14 +890,6 @@
       0,
       "IMAGE"
     ],
-    [
-      131,
-      61,
-      0,
-      54,
-      4,
-      "IMAGE"
-    ],
     [
       135,
       61,
@@ -944,14 +898,6 @@
       0,
       "IMAGE"
     ],
-    [
-      148,
-      71,
-      0,
-      54,
-      0,
-      "COGVIDEOPIPE"
-    ],
     [
       150,
       59,
@@ -960,14 +906,6 @@
       0,
       "IMAGE"
     ],
-    [
-      151,
-      72,
-      0,
-      54,
-      5,
-      "INT"
-    ],
     [
       152,
       72,
@@ -977,16 +915,72 @@
       "INT"
     ],
     [
-      158,
+      165,
+      71,
+      0,
+      79,
+      0,
+      "COGVIDEOPIPE"
+    ],
+    [
+      166,
+      30,
+      0,
+      79,
+      1,
+      "CONDITIONING"
+    ],
+    [
+      167,
+      31,
+      0,
+      79,
+      2,
+      "CONDITIONING"
+    ],
+    [
+      168,
+      61,
+      0,
+      79,
+      3,
+      "IMAGE"
+    ],
+    [
+      169,
+      72,
+      0,
+      79,
+      4,
+      "INT"
+    ],
+    [
+      170,
+      79,
+      0,
+      11,
+      0,
+      "COGVIDEOPIPE"
+    ],
+    [
+      171,
+      79,
+      1,
+      11,
+      1,
+      "LATENT"
+    ],
+    [
+      173,
       65,
       0,
-      56,
+      80,
       0,
       "IMAGE"
     ],
     [
-      159,
-      56,
+      174,
+      80,
       0,
       37,
       0,
@@ -997,10 +991,10 @@
   "config": {},
   "extra": {
     "ds": {
-      "scale": 0.5730855330117133,
+      "scale": 0.5209868481924667,
       "offset": [
-        798.5395320681218,
-        157.60944992071092
+        329.16752736137005,
+        119.68471403460902
       ]
     }
   },
diff --git a/nodes.py b/nodes.py
index 41d3ec7..95811ff 100644
--- a/nodes.py
+++ b/nodes.py
@@ -532,30 +532,13 @@ class DownloadAndLoadCogVideoGGUFModel:
             vae.load_state_dict(vae_sd)
             pipe = CogVideoXPipeline(vae, transformer, scheduler, pab_config=pab_config)
 
-        # compilation
-        # if compile == "torch":
-        #     torch._dynamo.config.suppress_errors = True
-        #     pipe.transformer.to(memory_format=torch.channels_last)
-        #     pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
-        # elif compile == "onediff":
-        #     from onediffx import compile_pipe
-        #     os.environ['NEXFORT_FX_FORCE_TRITON_SDPA'] = '1'
-            
-        #     pipe = compile_pipe(
-        #     pipe,
-        #     backend="nexfort",
-        #     options= {"mode": "max-optimize:max-autotune:max-autotune", "memory_format": "channels_last", "options": {"inductor.optimize_linear_epilogue": False, "triton.fuse_attention_allow_fp16_reduction": False}},
-        #     ignores=["vae"],
-        #     fuse_qkv_projections=True,
-        #     )
-
         if enable_sequential_cpu_offload:
             pipe.enable_sequential_cpu_offload()
 
         pipeline = {
             "pipe": pipe,
             "dtype": vae_dtype,
-            "base_path": "Fun" if "fun" in model else "sad",
+            "base_path": model,
             "onediff": True if compile == "onediff" else False,
             "cpu_offloading": enable_sequential_cpu_offload,
             "scheduler_config": scheduler_config
@@ -833,7 +816,7 @@ class CogVideoSampler:
 
         base_path = pipeline["base_path"]
 
-        assert "Fun" not in base_path, "'Fun' models not supported in 'CogVideoSampler', use the 'CogVideoXFunSampler'"
+        assert "fun" not in base_path.lower(), "'Fun' models not supported in 'CogVideoSampler', use the 'CogVideoXFunSampler'"
         assert t_tile_length > t_tile_overlap, "t_tile_length must be greater than t_tile_overlap"
         assert t_tile_length <= num_frames, "t_tile_length must be equal or less than num_frames"
         t_tile_length = t_tile_length // 4
@@ -898,7 +881,7 @@ class CogVideoDecode:
             "tile_sample_min_width": ("INT", {"default": 360, "min": 16, "max": 2048, "step": 8, "tooltip": "Minimum tile width, default is half the width"}),
             "tile_overlap_factor_height": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 1.0, "step": 0.001}),
             "tile_overlap_factor_width": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 1.0, "step": 0.001}),
-            "enable_vae_slicing": ("BOOLEAN", {"default": True, "tooltip": "VAE will split the input tensor in slices to compute decoding in several steps. This is useful to save some memory and allow larger batch sizes."}),
+            "auto_tile_size": ("BOOLEAN", {"default": True, "tooltip": "Auto size based on height and width, default is half the size"}),
             }
         }
 
@@ -907,24 +890,26 @@ class CogVideoDecode:
     FUNCTION = "decode"
     CATEGORY = "CogVideoWrapper"
 
-    def decode(self, pipeline, samples, enable_vae_tiling, tile_sample_min_height, tile_sample_min_width, tile_overlap_factor_height, tile_overlap_factor_width, enable_vae_slicing=True):
+    def decode(self, pipeline, samples, enable_vae_tiling, tile_sample_min_height, tile_sample_min_width, tile_overlap_factor_height, tile_overlap_factor_width, auto_tile_size=True):
         device = mm.get_torch_device()
         offload_device = mm.unet_offload_device()
         latents = samples["samples"]
         vae = pipeline["pipe"].vae
-        if enable_vae_slicing:
-            vae.enable_slicing()
-        else:
-            vae.disable_slicing()
+
+        vae.enable_slicing()
+
         if not pipeline["cpu_offloading"]:
             vae.to(device)
         if enable_vae_tiling:
-            vae.enable_tiling(
-                tile_sample_min_height=tile_sample_min_height,
-                tile_sample_min_width=tile_sample_min_width,
-                tile_overlap_factor_height=tile_overlap_factor_height,
-                tile_overlap_factor_width=tile_overlap_factor_width,
-            )
+            if auto_tile_size:
+                vae.enable_tiling()
+            else:
+                vae.enable_tiling(
+                    tile_sample_min_height=tile_sample_min_height,
+                    tile_sample_min_width=tile_sample_min_width,
+                    tile_overlap_factor_height=tile_overlap_factor_height,
+                    tile_overlap_factor_width=tile_overlap_factor_width,
+                )
         else:
             vae.disable_tiling()
         latents = latents.to(vae.dtype)
@@ -1005,7 +990,8 @@ class CogVideoXFunSampler:
         pipe = pipeline["pipe"]
         dtype = pipeline["dtype"]
         base_path = pipeline["base_path"]
-        assert "Fun" in base_path, "'Unfun' models not supported in 'CogVideoXFunSampler', use the 'CogVideoSampler'"
+        assert "fun" in base_path.lower(), "'Unfun' models not supported in 'CogVideoXFunSampler', use the 'CogVideoSampler'"
+        assert "pose" not in base_path.lower(), "'Pose' models not supported in 'CogVideoXFunSampler', use the 'CogVideoXFunControlSampler'"
 
         if not pipeline["cpu_offloading"]:
             pipe.enable_model_cpu_offload(device=device)
@@ -1075,19 +1061,10 @@ class CogVideoXFunVid2VidSampler:
                 "negative": ("CONDITIONING", ),
                 "video_length": ("INT", {"default": 49, "min": 5, "max": 49, "step": 4}),
                 "base_resolution": (
-                    [ 
-                        256,
-                        320,
-                        384,
-                        448,
-                        512,
-                        768,
-                        960,
-                        1024,
-                    ], {"default": 768}
+                    [256,320,384,448,512,768,960,1024,], {"default": 512}
                 ),
-                "seed": ("INT", {"default": 43, "min": 0, "max": 0xffffffffffffffff}),
-                "steps": ("INT", {"default": 50, "min": 1, "max": 200, "step": 1}),
+                "seed": ("INT", {"default": 42, "min": 0, "max": 0xffffffffffffffff}),
+                "steps": ("INT", {"default": 25, "min": 1, "max": 200, "step": 1}),
                 "cfg": ("FLOAT", {"default": 6.0, "min": 1.0, "max": 20.0, "step": 0.01}),
                 "scheduler": (
                     [ 
@@ -1108,13 +1085,7 @@ class CogVideoXFunVid2VidSampler:
                     }
                 ),
                 "denoise_strength": ("FLOAT", {"default": 0.70, "min": 0.05, "max": 1.00, "step": 0.01}),
-            },
-             "optional":{
                 "validation_video": ("IMAGE",),
-                "control_video": ("IMAGE",),
-                "control_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
-                "control_start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01}),
-                "control_end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
             },
         }
     
@@ -1124,14 +1095,15 @@ class CogVideoXFunVid2VidSampler:
     CATEGORY = "CogVideoWrapper"
 
     def process(self, pipeline, positive, negative, video_length, base_resolution, seed, steps, cfg, denoise_strength, scheduler, 
-                validation_video=None, control_video=None, control_strength=1.0, control_start_percent=0.0, control_end_percent=1.0):
+                validation_video):
         device = mm.get_torch_device()
         offload_device = mm.unet_offload_device()
         pipe = pipeline["pipe"]
         dtype = pipeline["dtype"]
         base_path = pipeline["base_path"]
 
-        assert "Fun" in base_path, "'Unfun' models not supported in 'CogVideoXFunSampler', use the 'CogVideoSampler'"
+        assert "fun" in base_path.lower(), "'Unfun' models not supported in 'CogVideoXFunSampler', use the 'CogVideoSampler'"
+        assert "pose" not in base_path.lower(), "'Pose' models not supported in 'CogVideoXFunVid2VidSampler', use the 'CogVideoXFunControlSampler'"
 
         if not pipeline["cpu_offloading"]:
             pipe.enable_model_cpu_offload(device=device)
@@ -1141,12 +1113,8 @@ class CogVideoXFunVid2VidSampler:
         # Count most suitable height and width
         aspect_ratio_sample_size    = {key : [x / 512 * base_resolution for x in ASPECT_RATIO_512[key]] for key in ASPECT_RATIO_512.keys()}
 
-        if validation_video is not None:
-            validation_video = np.array(validation_video.cpu().numpy() * 255, np.uint8)
-            original_width, original_height = Image.fromarray(validation_video[0]).size
-        elif control_video is not None:
-            control_video = np.array(control_video.cpu().numpy() * 255, np.uint8)
-            original_width, original_height = Image.fromarray(control_video[0]).size
+        validation_video = np.array(validation_video.cpu().numpy() * 255, np.uint8)
+        original_width, original_height = Image.fromarray(validation_video[0]).size
 
         closest_size, closest_ratio = get_closest_ratio(original_height, original_width, ratios=aspect_ratio_sample_size)
         height, width = [int(x / 16) * 16 for x in closest_size]
@@ -1165,10 +1133,7 @@ class CogVideoXFunVid2VidSampler:
         autocast_context = torch.autocast(mm.get_autocast_device(device)) if autocastcondition else nullcontext()
         with autocast_context:
             video_length = int((video_length - 1) // pipe.vae.config.temporal_compression_ratio * pipe.vae.config.temporal_compression_ratio) + 1 if video_length != 1 else 1
-            if validation_video is not None:
-                input_video, input_video_mask, clip_image = get_video_to_video_latent(validation_video, video_length=video_length, sample_size=(height, width))
-            elif control_video is not None:
-                input_video, input_video_mask, clip_image = get_video_to_video_latent(control_video, video_length=video_length, sample_size=(height, width))
+            input_video, input_video_mask, clip_image = get_video_to_video_latent(validation_video, video_length=video_length, sample_size=(height, width))
 
             # for _lora_path, _lora_weight in zip(cogvideoxfun_model.get("loras", []), cogvideoxfun_model.get("strength_model", [])):
             #     pipeline = merge_lora(pipeline, _lora_path, _lora_weight)
@@ -1185,21 +1150,124 @@ class CogVideoXFunVid2VidSampler:
                 "comfyui_progressbar": True,
             }
 
-            if control_video is not None:
-                latents = pipe(
-                    **common_params,
-                    control_video=input_video,
-                    control_strength=control_strength,
-                    control_start_percent=control_start_percent,
-                    control_end_percent=control_end_percent
-                )
-            else:
-                latents = pipe(
-                    **common_params,
-                    video=input_video,
-                    mask_video=input_video_mask,
-                    strength=float(denoise_strength)
-                )
+            latents = pipe(
+                **common_params,
+                video=input_video,
+                mask_video=input_video_mask,
+                strength=float(denoise_strength)
+            )
+
+            # for _lora_path, _lora_weight in zip(cogvideoxfun_model.get("loras", []), cogvideoxfun_model.get("strength_model", [])):
+            #     pipeline = unmerge_lora(pipeline, _lora_path, _lora_weight)
+        return (pipeline, {"samples": latents})
+    
+class CogVideoXFunControlSampler:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "pipeline": ("COGVIDEOPIPE",),
+                "positive": ("CONDITIONING", ),
+                "negative": ("CONDITIONING", ),
+                "video_length": ("INT", {"default": 49, "min": 5, "max": 49, "step": 4}),
+                "base_resolution": (
+                    [256,320,384,448,512,768,960,1024,], {"default": 512}
+                ),
+                "seed": ("INT", {"default": 42, "min": 0, "max": 0xffffffffffffffff}),
+                "steps": ("INT", {"default": 25, "min": 1, "max": 200, "step": 1}),
+                "cfg": ("FLOAT", {"default": 6.0, "min": 1.0, "max": 20.0, "step": 0.01}),
+                "scheduler": (
+                    [ 
+                        "Euler",
+                        "Euler A",
+                        "DPM++",
+                        "PNDM",
+                        "DDIM",
+                        "SASolverScheduler",
+                        "UniPCMultistepScheduler",
+                        "HeunDiscreteScheduler",
+                        "DEISMultistepScheduler",
+                        "CogVideoXDDIM",
+                        "CogVideoXDPMScheduler",
+                    ],
+                    {
+                        "default": 'DDIM'
+                    }
+                ),
+                "control_video": ("IMAGE",),
+                "control_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                "control_start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01}),
+                "control_end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
+            },
+        }
+    
+    RETURN_TYPES = ("COGVIDEOPIPE", "LATENT",)
+    RETURN_NAMES = ("cogvideo_pipe", "samples",)
+    FUNCTION = "process"
+    CATEGORY = "CogVideoWrapper"
+
+    def process(self, pipeline, positive, negative, video_length, base_resolution, seed, steps, cfg, scheduler, 
+                control_video=None, control_strength=1.0, control_start_percent=0.0, control_end_percent=1.0):
+        device = mm.get_torch_device()
+        offload_device = mm.unet_offload_device()
+        pipe = pipeline["pipe"]
+        dtype = pipeline["dtype"]
+        base_path = pipeline["base_path"]
+
+        assert "fun" in base_path.lower(), "'Unfun' models not supported in 'CogVideoXFunSampler', use the 'CogVideoSampler'"
+
+        if not pipeline["cpu_offloading"]:
+            pipe.enable_model_cpu_offload(device=device)
+
+        mm.soft_empty_cache()
+
+        # Count most suitable height and width
+        aspect_ratio_sample_size    = {key : [x / 512 * base_resolution for x in ASPECT_RATIO_512[key]] for key in ASPECT_RATIO_512.keys()}
+
+        control_video = np.array(control_video.cpu().numpy() * 255, np.uint8)
+        original_width, original_height = Image.fromarray(control_video[0]).size
+
+        closest_size, closest_ratio = get_closest_ratio(original_height, original_width, ratios=aspect_ratio_sample_size)
+        height, width = [int(x / 16) * 16 for x in closest_size]
+
+        # Load Sampler
+        scheduler_config = pipeline["scheduler_config"]
+        if scheduler in scheduler_mapping:
+            noise_scheduler = scheduler_mapping[scheduler].from_config(scheduler_config)
+            pipe.scheduler = noise_scheduler
+        else:
+            raise ValueError(f"Unknown scheduler: {scheduler}")
+
+        generator= torch.Generator(device).manual_seed(seed)
+
+        autocastcondition = not pipeline["onediff"]
+        autocast_context = torch.autocast(mm.get_autocast_device(device)) if autocastcondition else nullcontext()
+        with autocast_context:
+            video_length = int((video_length - 1) // pipe.vae.config.temporal_compression_ratio * pipe.vae.config.temporal_compression_ratio) + 1 if video_length != 1 else 1
+            input_video, input_video_mask, clip_image = get_video_to_video_latent(control_video, video_length=video_length, sample_size=(height, width))
+
+            # for _lora_path, _lora_weight in zip(cogvideoxfun_model.get("loras", []), cogvideoxfun_model.get("strength_model", [])):
+            #     pipeline = merge_lora(pipeline, _lora_path, _lora_weight)
+
+            common_params = {
+                "prompt_embeds": positive.to(dtype).to(device),
+                "negative_prompt_embeds": negative.to(dtype).to(device),
+                "num_frames": video_length,
+                "height": height,
+                "width": width,
+                "generator": generator,
+                "guidance_scale": cfg,
+                "num_inference_steps": steps,
+                "comfyui_progressbar": True,
+            }
+
+            latents = pipe(
+                **common_params,
+                control_video=input_video,
+                control_strength=control_strength,
+                control_start_percent=control_start_percent,
+                control_end_percent=control_end_percent
+            )
 
             # for _lora_path, _lora_weight in zip(cogvideoxfun_model.get("loras", []), cogvideoxfun_model.get("strength_model", [])):
             #     pipeline = unmerge_lora(pipeline, _lora_path, _lora_weight)
@@ -1214,6 +1282,7 @@ NODE_CLASS_MAPPINGS = {
     "CogVideoImageEncode": CogVideoImageEncode,
     "CogVideoXFunSampler": CogVideoXFunSampler,
     "CogVideoXFunVid2VidSampler": CogVideoXFunVid2VidSampler,
+    "CogVideoXFunControlSampler": CogVideoXFunControlSampler,
     "CogVideoTextEncodeCombine": CogVideoTextEncodeCombine,
     "DownloadAndLoadCogVideoGGUFModel": DownloadAndLoadCogVideoGGUFModel,
     "CogVideoPABConfig": CogVideoPABConfig,
@@ -1228,6 +1297,7 @@ NODE_DISPLAY_NAME_MAPPINGS = {
     "CogVideoImageEncode": "CogVideo ImageEncode",
     "CogVideoXFunSampler": "CogVideoXFun Sampler",
     "CogVideoXFunVid2VidSampler": "CogVideoXFun Vid2Vid Sampler",
+    "CogVideoXFunControlSampler": "CogVideoXFun Control Sampler",
     "CogVideoTextEncodeCombine": "CogVideo TextEncode Combine",
     "DownloadAndLoadCogVideoGGUFModel": "(Down)load CogVideo GGUF Model",
     "CogVideoPABConfig": "CogVideo PABConfig",