make compatible with comfy cliptextencode

2024-10-28 12:23:14 +02:00 · 2024-10-28 12:23:14 +02:00 · 3dce06b28b
commit 3dce06b28b
parent ce903c0384
3 changed files with 909 additions and 284 deletions
--- a/examples/mochi_example_49_frames_16GB.json
+++ b/examples/mochi_example_49_frames_16GB.json
@ -0,0 +1,557 @@
+{
+  "last_node_id": 17,
+  "last_link_id": 25,
+  "nodes": [
+    {
+      "id": 1,
+      "type": "MochiTextEncode",
+      "pos": {
+        "0": 483,
+        "1": 281
+      },
+      "size": [
+        381.8630768000736,
+        227.23898384078808
+      ],
+      "flags": {},
+      "order": 3,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "link": 1
+        }
+      ],
+      "outputs": [
+        {
+          "name": "conditioning",
+          "type": "CONDITIONING",
+          "links": [
+            17
+          ],
+          "slot_index": 0
+        },
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "links": [
+            20
+          ],
+          "slot_index": 1
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "MochiTextEncode"
+      },
+      "widgets_values": [
+        "nature video of a red panda eating bamboo in front of a waterfall",
+        1,
+        false
+      ]
+    },
+    {
+      "id": 2,
+      "type": "CLIPLoader",
+      "pos": {
+        "0": -41,
+        "1": 457
+      },
+      "size": [
+        479.5359523201174,
+        82
+      ],
+      "flags": {},
+      "order": 0,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "CLIP",
+          "type": "CLIP",
+          "links": [
+            1
+          ]
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "CLIPLoader"
+      },
+      "widgets_values": [
+        "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
+        "sd3"
+      ]
+    },
+    {
+      "id": 8,
+      "type": "MochiTextEncode",
+      "pos": {
+        "0": 487,
+        "1": 563
+      },
+      "size": [
+        378.8630768000736,
+        183.64429832064002
+      ],
+      "flags": {},
+      "order": 4,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "link": 20
+        }
+      ],
+      "outputs": [
+        {
+          "name": "conditioning",
+          "type": "CONDITIONING",
+          "links": [
+            18,
+            21
+          ],
+          "slot_index": 0
+        },
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "links": null
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "MochiTextEncode"
+      },
+      "widgets_values": [
+        "",
+        1,
+        true
+      ]
+    },
+    {
+      "id": 9,
+      "type": "VHS_VideoCombine",
+      "pos": {
+        "0": 1785,
+        "1": 227
+      },
+      "size": [
+        1261.0787353515625,
+        310
+      ],
+      "flags": {},
+      "order": 9,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "link": 24
+        },
+        {
+          "name": "audio",
+          "type": "AUDIO",
+          "link": null,
+          "shape": 7
+        },
+        {
+          "name": "meta_batch",
+          "type": "VHS_BatchManager",
+          "link": null,
+          "shape": 7
+        },
+        {
+          "name": "vae",
+          "type": "VAE",
+          "link": null,
+          "shape": 7
+        }
+      ],
+      "outputs": [
+        {
+          "name": "Filenames",
+          "type": "VHS_FILENAMES",
+          "links": null
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "VHS_VideoCombine"
+      },
+      "widgets_values": {
+        "frame_rate": 24,
+        "loop_count": 0,
+        "filename_prefix": "Mochi_preview",
+        "format": "video/h264-mp4",
+        "pix_fmt": "yuv420p",
+        "crf": 19,
+        "save_metadata": true,
+        "pingpong": false,
+        "save_output": false,
+        "videopreview": {
+          "hidden": false,
+          "paused": false,
+          "params": {
+            "filename": "Mochi_preview_00021.mp4",
+            "subfolder": "",
+            "type": "temp",
+            "format": "video/h264-mp4",
+            "frame_rate": 24
+          },
+          "muted": false
+        }
+      }
+    },
+    {
+      "id": 4,
+      "type": "DownloadAndLoadMochiModel",
+      "pos": {
+        "0": 465,
+        "1": 20
+      },
+      "size": {
+        "0": 437.7432556152344,
+        "1": 174
+      },
+      "flags": {},
+      "order": 5,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "trigger",
+          "type": "CONDITIONING",
+          "link": 21,
+          "shape": 7
+        },
+        {
+          "name": "compile_args",
+          "type": "MOCHICOMPILEARGS",
+          "link": null,
+          "shape": 7
+        }
+      ],
+      "outputs": [
+        {
+          "name": "mochi_model",
+          "type": "MOCHIMODEL",
+          "links": [
+            16
+          ],
+          "slot_index": 0
+        },
+        {
+          "name": "mochi_vae",
+          "type": "MOCHIVAE",
+          "links": [
+            23
+          ],
+          "slot_index": 1
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "DownloadAndLoadMochiModel"
+      },
+      "widgets_values": [
+        "mochi_preview_dit_GGUF_Q8_0.safetensors",
+        "mochi_preview_vae_bf16.safetensors",
+        "fp8_e4m3fn",
+        "sdpa",
+        false
+      ]
+    },
+    {
+      "id": 14,
+      "type": "MochiSampler",
+      "pos": {
+        "0": 960,
+        "1": 243
+      },
+      "size": [
+        315,
+        286
+      ],
+      "flags": {},
+      "order": 6,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "MOCHIMODEL",
+          "link": 16
+        },
+        {
+          "name": "positive",
+          "type": "CONDITIONING",
+          "link": 17
+        },
+        {
+          "name": "negative",
+          "type": "CONDITIONING",
+          "link": 18
+        },
+        {
+          "name": "opt_sigmas",
+          "type": "SIGMAS",
+          "link": null,
+          "shape": 7
+        },
+        {
+          "name": "cfg_schedule",
+          "type": "FLOAT",
+          "link": null,
+          "widget": {
+            "name": "cfg_schedule"
+          },
+          "shape": 7
+        }
+      ],
+      "outputs": [
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "links": [
+            22,
+            25
+          ],
+          "slot_index": 0
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "MochiSampler"
+      },
+      "widgets_values": [
+        848,
+        480,
+        49,
+        30,
+        4.5,
+        0,
+        "fixed",
+        0
+      ]
+    },
+    {
+      "id": 15,
+      "type": "MochiDecodeSpatialTiling",
+      "pos": {
+        "0": 1340,
+        "1": 226
+      },
+      "size": {
+        "0": 390.5999755859375,
+        "1": 198
+      },
+      "flags": {},
+      "order": 7,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "vae",
+          "type": "MOCHIVAE",
+          "link": 23
+        },
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "link": 22
+        }
+      ],
+      "outputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "links": [
+            24
+          ],
+          "slot_index": 0
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "MochiDecodeSpatialTiling"
+      },
+      "widgets_values": [
+        true,
+        4,
+        4,
+        16,
+        1,
+        6
+      ]
+    },
+    {
+      "id": 12,
+      "type": "Note",
+      "pos": {
+        "0": 1349,
+        "1": -156
+      },
+      "size": {
+        "0": 365.5867919921875,
+        "1": 208.3488311767578
+      },
+      "flags": {},
+      "order": 1,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [],
+      "title": "Note: WIP!",
+      "properties": {},
+      "widgets_values": [
+        "VAE decoding is extremely heavy so tiling is necessary, I have not found best settings for it yet so testing help is appreciated, you can keep decoding after sampling as the latents are still in memory to see what works.\n\nYou can also save the latents to disk and decode separately.\n\nIncrease the number of tiles until it fits your VRAM, and/or reduce per_batch to split the decoding time wise, this WILL cause frame skipping!\n"
+      ],
+      "color": "#432",
+      "bgcolor": "#653"
+    },
+    {
+      "id": 17,
+      "type": "LoadLatent",
+      "pos": {
+        "0": 1775,
+        "1": -51
+      },
+      "size": {
+        "0": 315,
+        "1": 58
+      },
+      "flags": {},
+      "order": 2,
+      "mode": 2,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "LATENT",
+          "type": "LATENT",
+          "links": null
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "LoadLatent"
+      },
+      "widgets_values": [
+        "mochi_00001_.latent"
+      ]
+    },
+    {
+      "id": 16,
+      "type": "SaveLatent",
+      "pos": {
+        "0": 1772,
+        "1": -168
+      },
+      "size": {
+        "0": 315,
+        "1": 58
+      },
+      "flags": {},
+      "order": 8,
+      "mode": 2,
+      "inputs": [
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "link": 25
+        }
+      ],
+      "outputs": [],
+      "properties": {
+        "Node name for S&R": "SaveLatent"
+      },
+      "widgets_values": [
+        "latents/mochi_latent"
+      ]
+    }
+  ],
+  "links": [
+    [
+      1,
+      2,
+      0,
+      1,
+      0,
+      "CLIP"
+    ],
+    [
+      16,
+      4,
+      0,
+      14,
+      0,
+      "MOCHIMODEL"
+    ],
+    [
+      17,
+      1,
+      0,
+      14,
+      1,
+      "CONDITIONING"
+    ],
+    [
+      18,
+      8,
+      0,
+      14,
+      2,
+      "CONDITIONING"
+    ],
+    [
+      20,
+      1,
+      1,
+      8,
+      0,
+      "CLIP"
+    ],
+    [
+      21,
+      8,
+      0,
+      4,
+      0,
+      "CONDITIONING"
+    ],
+    [
+      22,
+      14,
+      0,
+      15,
+      1,
+      "LATENT"
+    ],
+    [
+      23,
+      4,
+      1,
+      15,
+      0,
+      "MOCHIVAE"
+    ],
+    [
+      24,
+      15,
+      0,
+      9,
+      0,
+      "IMAGE"
+    ],
+    [
+      25,
+      14,
+      0,
+      16,
+      0,
+      "LATENT"
+    ]
+  ],
+  "groups": [],
+  "config": {},
+  "extra": {
+    "ds": {
+      "scale": 0.693433494944238,
+      "offset": [
+        64.81666033991527,
+        428.7032954894722
+      ]
+    }
+  },
+  "version": 0.4
+}
--- a/examples/mochi_test_163_frames_01.json
+++ b/examples/mochi_test_163_frames_01.json
@ -1,216 +1,7 @@
 {
-  "last_node_id": 12,
-  "last_link_id": 15,
+  "last_node_id": 14,
+  "last_link_id": 21,
  "nodes": [
-    {
-      "id": 4,
-      "type": "DownloadAndLoadMochiModel",
-      "pos": {
-        "0": 393,
-        "1": 59
-      },
-      "size": {
-        "0": 437.7432556152344,
-        "1": 126
-      },
-      "flags": {},
-      "order": 0,
-      "mode": 0,
-      "inputs": [],
-      "outputs": [
-        {
-          "name": "mochi_model",
-          "type": "MOCHIMODEL",
-          "links": [
-            3
-          ],
-          "slot_index": 0
-        },
-        {
-          "name": "mochi_vae",
-          "type": "MOCHIVAE",
-          "links": [
-            11
-          ],
-          "slot_index": 1
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "DownloadAndLoadMochiModel"
-      },
-      "widgets_values": [
-        "mochi_preview_dit_fp8_e4m3fn.safetensors",
-        "mochi_preview_vae_bf16.safetensors",
-        "fp8_e4m3fn"
-      ]
-    },
-    {
-      "id": 1,
-      "type": "MochiTextEncode",
-      "pos": {
-        "0": 484,
-        "1": 258
-      },
-      "size": {
-        "0": 413.45361328125,
-        "1": 268.5947265625
-      },
-      "flags": {},
-      "order": 3,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "clip",
-          "type": "CLIP",
-          "link": 1
-        }
-      ],
-      "outputs": [
-        {
-          "name": "conditioning",
-          "type": "CONDITIONING",
-          "links": [
-            7
-          ],
-          "slot_index": 0
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "MochiTextEncode"
-      },
-      "widgets_values": [
-        "nature video of a red panda eating bamboo in front of a waterfall",
-        1,
-        true
-      ]
-    },
-    {
-      "id": 8,
-      "type": "MochiTextEncode",
-      "pos": {
-        "0": 481,
-        "1": 577
-      },
-      "size": {
-        "0": 400,
-        "1": 200
-      },
-      "flags": {},
-      "order": 4,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "clip",
-          "type": "CLIP",
-          "link": 8
-        }
-      ],
-      "outputs": [
-        {
-          "name": "conditioning",
-          "type": "CONDITIONING",
-          "links": [
-            9
-          ],
-          "slot_index": 0
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "MochiTextEncode"
-      },
-      "widgets_values": [
-        "",
-        1,
-        true
-      ]
-    },
-    {
-      "id": 2,
-      "type": "CLIPLoader",
-      "pos": {
-        "0": -3,
-        "1": 462
-      },
-      "size": {
-        "0": 429.837646484375,
-        "1": 82
-      },
-      "flags": {},
-      "order": 1,
-      "mode": 0,
-      "inputs": [],
-      "outputs": [
-        {
-          "name": "CLIP",
-          "type": "CLIP",
-          "links": [
-            1,
-            8
-          ]
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CLIPLoader"
-      },
-      "widgets_values": [
-        "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
-        "sd3"
-      ]
-    },
-    {
-      "id": 5,
-      "type": "MochiSampler",
-      "pos": {
-        "0": 960,
-        "1": 243
-      },
-      "size": {
-        "0": 315,
-        "1": 242
-      },
-      "flags": {},
-      "order": 5,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "model",
-          "type": "MOCHIMODEL",
-          "link": 3
-        },
-        {
-          "name": "positive",
-          "type": "CONDITIONING",
-          "link": 7
-        },
-        {
-          "name": "negative",
-          "type": "CONDITIONING",
-          "link": 9
-        }
-      ],
-      "outputs": [
-        {
-          "name": "model",
-          "type": "LATENT",
-          "links": [
-            12
-          ],
-          "slot_index": 0
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "MochiSampler"
-      },
-      "widgets_values": [
-        848,
-        480,
-        163,
-        50,
-        4.5,
-        0,
-        "fixed"
-      ]
-    },
    {
      "id": 10,
      "type": "MochiDecode",
@ -234,7 +25,7 @@
        {
          "name": "samples",
          "type": "LATENT",
-          "link": 12
+          "link": 19
        }
      ],
      "outputs": [
@ -291,17 +82,17 @@
          "slot_index": 0
        },
        {
-          "name": "854 width",
+          "name": "width",
          "type": "INT",
          "links": null
        },
        {
-          "name": "480 height",
+          "name": "height",
          "type": "INT",
          "links": null
        },
        {
-          "name": "158 count",
+          "name": "count",
          "type": "INT",
          "links": null
        }
@ -320,7 +111,7 @@
      },
      "size": [
        1261.0787353515625,
-        1019.9320011317172
+        310
      ],
      "flags": {},
      "order": 8,
@ -391,12 +182,12 @@
        "0": 1271,
        "1": -119
      },
-      "size": [
-        365.586792085973,
-        208.34883369101206
-      ],
+      "size": {
+        "0": 365.5867919921875,
+        "1": 208.3488311767578
+      },
      "flags": {},
-      "order": 2,
+      "order": 0,
      "mode": 0,
      "inputs": [],
      "outputs": [],
@ -407,6 +198,258 @@
      ],
      "color": "#432",
      "bgcolor": "#653"
+    },
+    {
+      "id": 14,
+      "type": "MochiSampler",
+      "pos": {
+        "0": 960,
+        "1": 243
+      },
+      "size": [
+        315,
+        286
+      ],
+      "flags": {},
+      "order": 5,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "MOCHIMODEL",
+          "link": 16
+        },
+        {
+          "name": "positive",
+          "type": "CONDITIONING",
+          "link": 17
+        },
+        {
+          "name": "negative",
+          "type": "CONDITIONING",
+          "link": 18
+        },
+        {
+          "name": "opt_sigmas",
+          "type": "SIGMAS",
+          "link": null,
+          "shape": 7
+        },
+        {
+          "name": "cfg_schedule",
+          "type": "FLOAT",
+          "link": null,
+          "widget": {
+            "name": "cfg_schedule"
+          },
+          "shape": 7
+        }
+      ],
+      "outputs": [
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "links": [
+            19
+          ]
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "MochiSampler"
+      },
+      "widgets_values": [
+        848,
+        480,
+        163,
+        50,
+        4.5,
+        0,
+        "fixed",
+        0
+      ]
+    },
+    {
+      "id": 4,
+      "type": "DownloadAndLoadMochiModel",
+      "pos": {
+        "0": 452,
+        "1": -20
+      },
+      "size": {
+        "0": 437.7432556152344,
+        "1": 174
+      },
+      "flags": {},
+      "order": 4,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "trigger",
+          "type": "CONDITIONING",
+          "link": 21,
+          "shape": 7
+        },
+        {
+          "name": "compile_args",
+          "type": "MOCHICOMPILEARGS",
+          "link": null,
+          "shape": 7
+        }
+      ],
+      "outputs": [
+        {
+          "name": "mochi_model",
+          "type": "MOCHIMODEL",
+          "links": [
+            16
+          ],
+          "slot_index": 0
+        },
+        {
+          "name": "mochi_vae",
+          "type": "MOCHIVAE",
+          "links": [
+            11
+          ],
+          "slot_index": 1
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "DownloadAndLoadMochiModel"
+      },
+      "widgets_values": [
+        "mochi_preview_dit_GGUF_Q8_0.safetensors",
+        "mochi_preview_vae_bf16.safetensors",
+        "fp8_e4m3fn",
+        "sdpa",
+        false
+      ]
+    },
+    {
+      "id": 1,
+      "type": "MochiTextEncode",
+      "pos": {
+        "0": 483,
+        "1": 281
+      },
+      "size": [
+        381.8630768000736,
+        227.23898384078808
+      ],
+      "flags": {},
+      "order": 2,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "link": 1
+        }
+      ],
+      "outputs": [
+        {
+          "name": "conditioning",
+          "type": "CONDITIONING",
+          "links": [
+            17
+          ],
+          "slot_index": 0
+        },
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "links": [
+            20
+          ],
+          "slot_index": 1
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "MochiTextEncode"
+      },
+      "widgets_values": [
+        "nature video of a red panda eating bamboo in front of a waterfall",
+        1,
+        false
+      ]
+    },
+    {
+      "id": 2,
+      "type": "CLIPLoader",
+      "pos": {
+        "0": -41,
+        "1": 457
+      },
+      "size": [
+        479.5359523201174,
+        82
+      ],
+      "flags": {},
+      "order": 1,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "CLIP",
+          "type": "CLIP",
+          "links": [
+            1
+          ]
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "CLIPLoader"
+      },
+      "widgets_values": [
+        "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
+        "sd3"
+      ]
+    },
+    {
+      "id": 8,
+      "type": "MochiTextEncode",
+      "pos": {
+        "0": 487,
+        "1": 563
+      },
+      "size": [
+        378.8630768000736,
+        183.64429832064002
+      ],
+      "flags": {},
+      "order": 3,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "link": 20
+        }
+      ],
+      "outputs": [
+        {
+          "name": "conditioning",
+          "type": "CONDITIONING",
+          "links": [
+            18,
+            21
+          ],
+          "slot_index": 0
+        },
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "links": null
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "MochiTextEncode"
+      },
+      "widgets_values": [
+        "",
+        1,
+        true
+      ]
    }
  ],
  "links": [
@ -418,38 +461,6 @@
      0,
      "CLIP"
    ],
-    [
-      3,
-      4,
-      0,
-      5,
-      0,
-      "MOCHIMODEL"
-    ],
-    [
-      7,
-      1,
-      0,
-      5,
-      1,
-      "CONDITIONING"
-    ],
-    [
-      8,
-      2,
-      0,
-      8,
-      0,
-      "CLIP"
-    ],
-    [
-      9,
-      8,
-      0,
-      5,
-      2,
-      "CONDITIONING"
-    ],
    [
      11,
      4,
@ -458,14 +469,6 @@
      0,
      "MOCHIVAE"
    ],
-    [
-      12,
-      5,
-      0,
-      10,
-      1,
-      "LATENT"
-    ],
    [
      14,
      10,
@ -481,16 +484,64 @@
      9,
      0,
      "IMAGE"
+    ],
+    [
+      16,
+      4,
+      0,
+      14,
+      0,
+      "MOCHIMODEL"
+    ],
+    [
+      17,
+      1,
+      0,
+      14,
+      1,
+      "CONDITIONING"
+    ],
+    [
+      18,
+      8,
+      0,
+      14,
+      2,
+      "CONDITIONING"
+    ],
+    [
+      19,
+      14,
+      0,
+      10,
+      1,
+      "LATENT"
+    ],
+    [
+      20,
+      1,
+      1,
+      8,
+      0,
+      "CLIP"
+    ],
+    [
+      21,
+      8,
+      0,
+      4,
+      0,
+      "CONDITIONING"
    ]
  ],
  "groups": [],
  "config": {},
  "extra": {
    "ds": {
-      "scale": 0.6934334949442466,
+      "scale": 0.8390545288825276,
      "offset": [
-        -193.29818918510955,
-        307.42265737796134
+        74.08380372279714,
+        307.44392783781285
      ]
    }
  },
--- a/nodes.py
+++ b/nodes.py
@ -65,14 +65,14 @@ class DownloadAndLoadMochiModel:
                    {"tooltip": "Downloads from 'https://huggingface.co/Kijai/Mochi_preview_comfy' to 'models/vae/mochi'", },
                ),
                 "precision": (["fp8_e4m3fn","fp8_e4m3fn_fast","fp16", "fp32", "bf16"],
-                    {"default": "fp8_e4m3fn", }),
+                    {"default": "fp8_e4m3fn", "tooltip": "The precision to use for the model weights. Has no effect with GGUF models"},),
                "attention_mode": (["sdpa","flash_attn","sage_attn", "comfy"],
                ),
            },
            "optional": {
                "trigger": ("CONDITIONING", {"tooltip": "Dummy input for forcing execution order",}),
                "compile_args": ("MOCHICOMPILEARGS", {"tooltip": "Optional torch.compile arguments",}),
-                "cublas_ops": ("BOOLEAN", {"tooltip": "tested on 4090, unsure of gpu requirements, enables faster linear ops from'https://github.com/aredden/torch-cublas-hgemm'",}),
+                "cublas_ops": ("BOOLEAN", {"tooltip": "tested on 4090, unsure of gpu requirements, enables faster linear ops for the GGUF models, for more info:'https://github.com/aredden/torch-cublas-hgemm'",}),
            },
        }

@ -169,7 +169,7 @@ class MochiModelLoader:
            "optional": {
                "trigger": ("CONDITIONING", {"tooltip": "Dummy input for forcing execution order",}),
                "compile_args": ("MOCHICOMPILEARGS", {"tooltip": "Optional torch.compile arguments",}),
-                "cublas_ops": ("BOOLEAN", {"tooltip": "tested on 4090, unsure of gpu requirements, enables faster linear ops from'https://github.com/aredden/torch-cublas-hgemm'",}),
+                "cublas_ops": ("BOOLEAN", {"tooltip": "tested on 4090, unsure of gpu requirements, enables faster linear ops for the GGUF models, for more info:'https://github.com/aredden/torch-cublas-hgemm'",}),
           
            },
        }
@ -315,18 +315,23 @@ class MochiTextEncode:
        load_device = mm.text_encoder_device()
        offload_device = mm.text_encoder_offload_device()

-        clip.tokenizer.t5xxl.pad_to_max_length = True
-        clip.tokenizer.t5xxl.max_length = max_tokens
-        clip.cond_stage_model.t5xxl.return_attention_masks = True
-        clip.cond_stage_model.t5xxl.enable_attention_masks = True
-        clip.cond_stage_model.t5_attention_mask = True
-        clip.cond_stage_model.to(load_device)
-        tokens = clip.tokenizer.t5xxl.tokenize_with_weights(prompt, return_word_ids=True)
-        
        try:
-            embeds, _, attention_mask = clip.cond_stage_model.t5xxl.encode_token_weights(tokens)
+            clip.tokenizer.t5xxl.pad_to_max_length = True
+            clip.tokenizer.t5xxl.max_length = max_tokens
+            clip.cond_stage_model.t5xxl.return_attention_masks = True
+            clip.cond_stage_model.t5xxl.enable_attention_masks = True
+            clip.cond_stage_model.t5_attention_mask = True
+            clip.cond_stage_model.to(load_device)
+            tokens = clip.tokenizer.t5xxl.tokenize_with_weights(prompt, return_word_ids=True)
+            try:
+                embeds, _, attention_mask = clip.cond_stage_model.t5xxl.encode_token_weights(tokens)
+            except:
+                NotImplementedError("Failed to get attention mask from T5, is your ComfyUI up to date?")
        except:
-            NotImplementedError("Failed to get attention mask from T5, is your ComfyUI up to date?")
+            clip.cond_stage_model.to(load_device)
+            tokens = clip.tokenizer.tokenize_with_weights(prompt, return_word_ids=True)
+            embeds, _, attention_mask = clip.cond_stage_model.encode_token_weights(tokens)
+        

        if embeds.shape[1] > 256:
            raise ValueError(f"Prompt is too long, max tokens supported is {max_tokens} or less, got {embeds.shape[1]}")
@ -358,8 +363,8 @@ class MochiSampler:
                #"batch_cfg": ("BOOLEAN", {"default": False, "tooltip": "Enable batched cfg"}),
            },
            "optional": {
-                "cfg_schedule": ("FLOAT", {"forceInput": True,}),
-                "opt_sigmas": ("SIGMAS",),
+                "cfg_schedule": ("FLOAT", {"forceInput": True, "tooltip": "Override cfg schedule with a list of ints"}),
+                "opt_sigmas": ("SIGMAS", {"tooltip": "Override sigma schedule and steps"}),
            }
        }

@ -373,16 +378,28 @@ class MochiSampler:

        if opt_sigmas is not None:
            sigma_schedule = opt_sigmas.tolist()
-            steps = len(sigma_schedule)
+            steps = int(len(sigma_schedule))
            sigma_schedule.extend([0.0])
-            
            logging.info(f"Using sigma_schedule: {sigma_schedule}")
        else:
            sigma_schedule = linear_quadratic_schedule(steps, 0.025)
-            logging.info(f"Using sigma_schedule: {sigma_schedule}")

-        cfg_schedule = cfg_schedule or [cfg] * steps
-        logging.info(f"Using cfg schedule: {cfg_schedule}")
+        if cfg_schedule is None:
+            cfg_schedule = [cfg] * steps
+        else:
+            logging.info(f"Using cfg schedule: {cfg_schedule}")
+
+        #For compatibility with Comfy CLIPTextEncode
+        if not isinstance(positive, dict):
+            positive = {
+                "embeds": positive[0][0],
+                "attention_mask": positive[0][1]["attention_mask"].bool(),
+                }
+        if not isinstance(negative, dict):
+            negative = {
+                "embeds": negative[0][0],
+                "attention_mask": negative[0][1]["attention_mask"].bool(),
+                }

        args = {
            "height": height,