From feeff366b546f270813dea7a77c7043c7dc22e1d Mon Sep 17 00:00:00 2001 From: kijai <40791699+kijai@users.noreply.github.com> Date: Tue, 19 Nov 2024 19:06:15 +0200 Subject: [PATCH] update --- examples/cogvideox_Fun_180_orbit_01.json | 1922 ++++++++++++++++++++++ model_loading.py | 62 +- readme.md | 27 + 3 files changed, 1980 insertions(+), 31 deletions(-) create mode 100644 examples/cogvideox_Fun_180_orbit_01.json diff --git a/examples/cogvideox_Fun_180_orbit_01.json b/examples/cogvideox_Fun_180_orbit_01.json new file mode 100644 index 0000000..2d482e8 --- /dev/null +++ b/examples/cogvideox_Fun_180_orbit_01.json @@ -0,0 +1,1922 @@ +{ + "last_node_id": 73, + "last_link_id": 165, + "nodes": [ + { + "id": 20, + "type": "CLIPLoader", + "pos": { + "0": -27, + "1": 42 + }, + "size": { + "0": 451.30548095703125, + "1": 82 + }, + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 54 + ], + "slot_index": 0, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors", + "sd3" + ] + }, + { + "id": 52, + "type": "CogVideoLoraSelect", + "pos": { + "0": -3, + "1": -383 + }, + "size": [ + 438.44762263180314, + 106 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "name": "prev_lora", + "type": "COGLORA", + "link": null, + "shape": 7 + } + ], + "outputs": [ + { + "name": "lora", + "type": "COGLORA", + "links": [ + 124 + ] + } + ], + "properties": { + "Node name for S&R": "CogVideoLoraSelect" + }, + "widgets_values": [ + "DimensionX_orbit_left_lora_rank256_bf16.safetensors", + 1, + true + ] + }, + { + "id": 55, + "type": "ImageFlip+", + "pos": { + "0": 1247, + "1": 770 + }, + "size": { + "0": 315, + "1": 58 + }, + "flags": {}, + "order": 22, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 130 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 131, + 151 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ImageFlip+" + }, + "widgets_values": [ + "x" + ] + }, + { + "id": 54, + "type": "ImageFlip+", + "pos": { + "0": 847, + "1": 802 + }, + "size": { + "0": 315, + "1": 58 + }, + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 128 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 129 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ImageFlip+" + }, + "widgets_values": [ + "x" + ] + }, + { + "id": 50, + "type": "CogVideoImageEncodeFunInP", + "pos": { + "0": 865, + "1": 567 + }, + "size": [ + 253.60000610351562, + 146 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 119 + }, + { + "name": "start_image", + "type": "IMAGE", + "link": 129 + }, + { + "name": "end_image", + "type": "IMAGE", + "link": null, + "shape": 7 + }, + { + "name": "num_frames", + "type": "INT", + "link": 126, + "widget": { + "name": "num_frames" + } + } + ], + "outputs": [ + { + "name": "image_cond_latents", + "type": "LATENT", + "links": [ + 120 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CogVideoImageEncodeFunInP" + }, + "widgets_values": [ + 33, + true, + 0.03 + ] + }, + { + "id": 63, + "type": "CogVideoImageEncodeFunInP", + "pos": { + "0": 936.3893432617188, + "1": 1048.5242919921875 + }, + "size": { + "0": 253.60000610351562, + "1": 146 + }, + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 144 + }, + { + "name": "start_image", + "type": "IMAGE", + "link": 146 + }, + { + "name": "end_image", + "type": "IMAGE", + "link": null, + "shape": 7 + }, + { + "name": "num_frames", + "type": "INT", + "link": 145, + "widget": { + "name": "num_frames" + } + } + ], + "outputs": [ + { + "name": "image_cond_latents", + "type": "LATENT", + "links": [ + 147 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CogVideoImageEncodeFunInP" + }, + "widgets_values": [ + 33, + true, + 0.03 + ] + }, + { + "id": 51, + "type": "CogVideoDecode", + "pos": { + "0": 1219, + "1": -134 + }, + "size": { + "0": 315, + "1": 198 + }, + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 122 + }, + { + "name": "samples", + "type": "LATENT", + "link": 123 + } + ], + "outputs": [ + { + "name": "images", + "type": "IMAGE", + "links": [ + 130 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CogVideoDecode" + }, + "widgets_values": [ + true, + 240, + 360, + 0.2, + 0.2, + true + ] + }, + { + "id": 53, + "type": "PrimitiveNode", + "pos": { + "0": 117, + "1": 399 + }, + "size": [ + 261.57286031534534, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "INT", + "type": "INT", + "links": [ + 126, + 127, + 143, + 145 + ], + "widget": { + "name": "num_frames" + }, + "slot_index": 0 + } + ], + "title": "num_frames", + "properties": { + "Run widget replace on values": false + }, + "widgets_values": [ + 33, + "fixed" + ] + }, + { + "id": 48, + "type": "CogVideoSampler", + "pos": { + "0": 1200, + "1": 124 + }, + "size": [ + 330, + 574 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "COGVIDEOMODEL", + "link": 114 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 116 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 117 + }, + { + "name": "samples", + "type": "LATENT", + "link": null, + "shape": 7 + }, + { + "name": "image_cond_latents", + "type": "LATENT", + "link": 120, + "shape": 7 + }, + { + "name": "context_options", + "type": "COGCONTEXT", + "link": null, + "shape": 7 + }, + { + "name": "controlnet", + "type": "COGVIDECONTROLNET", + "link": null, + "shape": 7 + }, + { + "name": "tora_trajectory", + "type": "TORAFEATURES", + "link": null, + "shape": 7 + }, + { + "name": "fastercache", + "type": "FASTERCACHEARGS", + "link": null, + "shape": 7 + }, + { + "name": "num_frames", + "type": "INT", + "link": 127, + "widget": { + "name": "num_frames" + } + }, + { + "name": "seed", + "type": "INT", + "link": 156, + "widget": { + "name": "seed" + } + } + ], + "outputs": [ + { + "name": "samples", + "type": "LATENT", + "links": [ + 123 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CogVideoSampler" + }, + "widgets_values": [ + 33, + 40, + 6, + 458091243358278, + "fixed", + "CogVideoXDDIM", + 1 + ] + }, + { + "id": 68, + "type": "PrimitiveNode", + "pos": { + "0": 514, + "1": 985 + }, + "size": [ + 295.90419649751334, + 82 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "INT", + "type": "INT", + "links": [ + 156, + 157 + ], + "widget": { + "name": "seed" + }, + "slot_index": 0 + } + ], + "title": "seed", + "properties": { + "Run widget replace on values": false + }, + "widgets_values": [ + 458091243358278, + "fixed" + ] + }, + { + "id": 69, + "type": "DownloadAndLoadFlorence2Model", + "pos": { + "0": -1305, + "1": -13 + }, + "size": [ + 442.37554309913344, + 106 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "lora", + "type": "PEFTLORA", + "link": null, + "shape": 7 + } + ], + "outputs": [ + { + "name": "florence2_model", + "type": "FL2MODEL", + "links": [ + 158 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "DownloadAndLoadFlorence2Model" + }, + "widgets_values": [ + "MiaoshouAI/Florence-2-base-PromptGen-v2.0", + "fp16", + "sdpa" + ] + }, + { + "id": 37, + "type": "ImageResizeKJ", + "pos": { + "0": -202, + "1": 588 + }, + "size": { + "0": 315, + "1": 266 + }, + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 71 + }, + { + "name": "get_image_size", + "type": "IMAGE", + "link": null, + "shape": 7 + }, + { + "name": "width_input", + "type": "INT", + "link": null, + "widget": { + "name": "width_input" + } + }, + { + "name": "height_input", + "type": "INT", + "link": null, + "widget": { + "name": "height_input" + } + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 128, + 146, + 159 + ], + "slot_index": 0, + "shape": 3 + }, + { + "name": "width", + "type": "INT", + "links": null, + "shape": 3 + }, + { + "name": "height", + "type": "INT", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "ImageResizeKJ" + }, + "widgets_values": [ + 768, + 768, + "lanczos", + true, + 2, + 0, + 0, + "disabled" + ] + }, + { + "id": 71, + "type": "StringConstantMultiline", + "pos": { + "0": -709, + "1": 20 + }, + "size": { + "0": 400, + "1": 200 + }, + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "links": [ + 160 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "StringConstantMultiline" + }, + "widgets_values": [ + "camera orbit", + false + ] + }, + { + "id": 72, + "type": "JoinStrings", + "pos": { + "0": -232, + "1": 231 + }, + "size": [ + 315, + 106 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "string1", + "type": "STRING", + "link": 160, + "widget": { + "name": "string1" + } + }, + { + "name": "string2", + "type": "STRING", + "link": 162, + "widget": { + "name": "string2" + } + } + ], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "links": [ + 163 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "JoinStrings" + }, + "widgets_values": [ + "", + "", + ", " + ] + }, + { + "id": 70, + "type": "Florence2Run", + "pos": { + "0": -1276, + "1": 170 + }, + "size": { + "0": 400, + "1": 352 + }, + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 159 + }, + { + "name": "florence2_model", + "type": "FL2MODEL", + "link": 158 + } + ], + "outputs": [ + { + "name": "image", + "type": "IMAGE", + "links": null, + "slot_index": 0 + }, + { + "name": "mask", + "type": "MASK", + "links": null + }, + { + "name": "caption", + "type": "STRING", + "links": [ + 161, + 162 + ], + "slot_index": 2 + }, + { + "name": "data", + "type": "JSON", + "links": null + } + ], + "properties": { + "Node name for S&R": "Florence2Run" + }, + "widgets_values": [ + "", + "more_detailed_caption", + true, + false, + 226, + 3, + true, + "", + 586007018516875, + "fixed" + ] + }, + { + "id": 73, + "type": "ShowText|pysssss", + "pos": { + "0": -793, + "1": 321 + }, + "size": [ + 502.3168660879171, + 180.55015376950485 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "text", + "type": "STRING", + "link": 161, + "widget": { + "name": "text" + } + } + ], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "links": null, + "shape": 6 + } + ], + "properties": { + "Node name for S&R": "ShowText|pysssss" + }, + "widgets_values": [ + "", + "A digital illustration shoot from a frontal camera angle about a dark knight in shining armor stands in a dimly lit forest, with a glowing fire in the background. the image also shows a mysterious and intense atmosphere. on the middle of the image, a male knight appears to be standing, facing the viewer, with his full body visible. he is wearing a full plate armor with a red cloth draped over his shoulders. the armor is shiny and detailed, with intricate designs and a chain attached to it. he has two curved horns on his head, and his eyes are glowing yellow. the background is dark and smoky, with tall trees and a warm, glowing fire." + ] + }, + { + "id": 59, + "type": "GIMMVFI_interpolate", + "pos": { + "0": 2880, + "1": -200 + }, + "size": { + "0": 330, + "1": 150 + }, + "flags": {}, + "order": 27, + "mode": 0, + "inputs": [ + { + "name": "gimmvfi_model", + "type": "GIMMVIF_MODEL", + "link": 134 + }, + { + "name": "images", + "type": "IMAGE", + "link": 165 + } + ], + "outputs": [ + { + "name": "images", + "type": "IMAGE", + "links": [ + 164 + ], + "slot_index": 0 + }, + { + "name": "flow_tensors", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "Node name for S&R": "GIMMVFI_interpolate" + }, + "widgets_values": [ + 1, + 2, + 223874235763998, + "fixed" + ] + }, + { + "id": 67, + "type": "ImageBatchMulti", + "pos": { + "0": 2900, + "1": 20 + }, + "size": { + "0": 210, + "1": 102 + }, + "flags": {}, + "order": 26, + "mode": 0, + "inputs": [ + { + "name": "image_1", + "type": "IMAGE", + "link": 152 + }, + { + "name": "image_2", + "type": "IMAGE", + "link": 153 + } + ], + "outputs": [ + { + "name": "images", + "type": "IMAGE", + "links": [ + 165 + ], + "slot_index": 0 + } + ], + "properties": {}, + "widgets_values": [ + 2, + null + ] + }, + { + "id": 66, + "type": "ReverseImageBatch", + "pos": { + "0": 2590, + "1": -20 + }, + "size": { + "0": 239.40000915527344, + "1": 26 + }, + "flags": {}, + "order": 25, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 151 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 152 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ReverseImageBatch" + } + }, + { + "id": 58, + "type": "DownloadAndLoadGIMMVFIModel", + "pos": { + "0": 2510, + "1": -210 + }, + "size": { + "0": 315, + "1": 58 + }, + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "gimmvfi_model", + "type": "GIMMVIF_MODEL", + "links": [ + 134 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "DownloadAndLoadGIMMVFIModel" + }, + "widgets_values": [ + "gimmvfi_r_arb_lpips_fp32.safetensors" + ] + }, + { + "id": 36, + "type": "LoadImage", + "pos": { + "0": -808, + "1": 573 + }, + "size": [ + 556.7343073028583, + 502.50569947324857 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 71 + ], + "slot_index": 0, + "shape": 3 + }, + { + "name": "MASK", + "type": "MASK", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "ComfyUI_temp_lhgah_00059_.png", + "image" + ] + }, + { + "id": 60, + "type": "VHS_VideoCombine", + "pos": { + "0": 2520, + "1": 180 + }, + "size": [ + 860.5738525390625, + 1444.76513671875 + ], + "flags": {}, + "order": 28, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 164 + }, + { + "name": "audio", + "type": "AUDIO", + "link": null, + "shape": 7 + }, + { + "name": "meta_batch", + "type": "VHS_BatchManager", + "link": null, + "shape": 7 + }, + { + "name": "vae", + "type": "VAE", + "link": null, + "shape": 7 + } + ], + "outputs": [ + { + "name": "Filenames", + "type": "VHS_FILENAMES", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "VHS_VideoCombine" + }, + "widgets_values": { + "frame_rate": 16, + "loop_count": 0, + "filename_prefix": "CogVideoX_Fun_orbits", + "format": "video/h264-mp4", + "pix_fmt": "yuv420p", + "crf": 19, + "save_metadata": true, + "pingpong": false, + "save_output": false, + "videopreview": { + "hidden": false, + "paused": false, + "params": { + "filename": "CogVideoX_Fun_orbits_00003.mp4", + "subfolder": "", + "type": "temp", + "format": "video/h264-mp4", + "frame_rate": 16 + }, + "muted": false + } + } + }, + { + "id": 30, + "type": "CogVideoTextEncode", + "pos": { + "0": 478, + "1": 90 + }, + "size": [ + 471.90142822265625, + 168.08047485351562 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 54 + }, + { + "name": "prompt", + "type": "STRING", + "link": 163, + "widget": { + "name": "prompt" + } + } + ], + "outputs": [ + { + "name": "conditioning", + "type": "CONDITIONING", + "links": [ + 116, + 140 + ], + "slot_index": 0, + "shape": 3 + }, + { + "name": "clip", + "type": "CLIP", + "links": [ + 110 + ], + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "CogVideoTextEncode" + }, + "widgets_values": [ + "camera orbit around a mouse knight standing in a fantasy forest", + 1, + false + ] + }, + { + "id": 31, + "type": "CogVideoTextEncode", + "pos": { + "0": 493, + "1": 334 + }, + "size": { + "0": 463.01251220703125, + "1": 144 + }, + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 110 + } + ], + "outputs": [ + { + "name": "conditioning", + "type": "CONDITIONING", + "links": [ + 117, + 141 + ], + "slot_index": 0, + "shape": 3 + }, + { + "name": "clip", + "type": "CLIP", + "links": null + } + ], + "properties": { + "Node name for S&R": "CogVideoTextEncode" + }, + "widgets_values": [ + "The video is not of a high quality, it has a low resolution. Watermark present in each frame. Strange motion trajectory. ", + 1, + true + ] + }, + { + "id": 62, + "type": "CogVideoSampler", + "pos": { + "0": 1258, + "1": 1151 + }, + "size": [ + 330, + 574 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "COGVIDEOMODEL", + "link": 139 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 140 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 141 + }, + { + "name": "samples", + "type": "LATENT", + "link": null, + "shape": 7 + }, + { + "name": "image_cond_latents", + "type": "LATENT", + "link": 147, + "shape": 7 + }, + { + "name": "context_options", + "type": "COGCONTEXT", + "link": null, + "shape": 7 + }, + { + "name": "controlnet", + "type": "COGVIDECONTROLNET", + "link": null, + "shape": 7 + }, + { + "name": "tora_trajectory", + "type": "TORAFEATURES", + "link": null, + "shape": 7 + }, + { + "name": "fastercache", + "type": "FASTERCACHEARGS", + "link": null, + "shape": 7 + }, + { + "name": "num_frames", + "type": "INT", + "link": 143, + "widget": { + "name": "num_frames" + } + }, + { + "name": "seed", + "type": "INT", + "link": 157, + "widget": { + "name": "seed" + } + } + ], + "outputs": [ + { + "name": "samples", + "type": "LATENT", + "links": [ + 148 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CogVideoSampler" + }, + "widgets_values": [ + 33, + 40, + 6, + 458091243358278, + "fixed", + "CogVideoXDDIM", + 1 + ] + }, + { + "id": 64, + "type": "CogVideoDecode", + "pos": { + "0": 1258, + "1": 889 + }, + "size": { + "0": 315, + "1": 198 + }, + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 149 + }, + { + "name": "samples", + "type": "LATENT", + "link": 148 + } + ], + "outputs": [ + { + "name": "images", + "type": "IMAGE", + "links": [ + 150, + 153 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CogVideoDecode" + }, + "widgets_values": [ + true, + 240, + 360, + 0.2, + 0.2, + true + ] + }, + { + "id": 44, + "type": "VHS_VideoCombine", + "pos": { + "0": 1652, + "1": -465 + }, + "size": [ + 592.7721081788095, + 1087.6961669921875 + ], + "flags": {}, + "order": 24, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 131 + }, + { + "name": "audio", + "type": "AUDIO", + "link": null, + "shape": 7 + }, + { + "name": "meta_batch", + "type": "VHS_BatchManager", + "link": null, + "shape": 7 + }, + { + "name": "vae", + "type": "VAE", + "link": null, + "shape": 7 + } + ], + "outputs": [ + { + "name": "Filenames", + "type": "VHS_FILENAMES", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "VHS_VideoCombine" + }, + "widgets_values": { + "frame_rate": 8, + "loop_count": 0, + "filename_prefix": "CogVideoX_Fun", + "format": "video/h264-mp4", + "pix_fmt": "yuv420p", + "crf": 19, + "save_metadata": true, + "pingpong": false, + "save_output": false, + "videopreview": { + "hidden": false, + "paused": false, + "params": { + "filename": "CogVideoX_Fun_00027.mp4", + "subfolder": "", + "type": "temp", + "format": "video/h264-mp4", + "frame_rate": 8 + }, + "muted": false + } + } + }, + { + "id": 65, + "type": "VHS_VideoCombine", + "pos": { + "0": 1674, + "1": 688 + }, + "size": [ + 620.0130829180325, + 1124.0174560546875 + ], + "flags": {}, + "order": 23, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 150 + }, + { + "name": "audio", + "type": "AUDIO", + "link": null, + "shape": 7 + }, + { + "name": "meta_batch", + "type": "VHS_BatchManager", + "link": null, + "shape": 7 + }, + { + "name": "vae", + "type": "VAE", + "link": null, + "shape": 7 + } + ], + "outputs": [ + { + "name": "Filenames", + "type": "VHS_FILENAMES", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "VHS_VideoCombine" + }, + "widgets_values": { + "frame_rate": 8, + "loop_count": 0, + "filename_prefix": "CogVideoX_Fun", + "format": "video/h264-mp4", + "pix_fmt": "yuv420p", + "crf": 19, + "save_metadata": true, + "pingpong": false, + "save_output": false, + "videopreview": { + "hidden": false, + "paused": false, + "params": { + "filename": "CogVideoX_Fun_00026.mp4", + "subfolder": "", + "type": "temp", + "format": "video/h264-mp4", + "frame_rate": 8 + }, + "muted": false + } + } + }, + { + "id": 49, + "type": "DownloadAndLoadCogVideoModel", + "pos": { + "0": 450, + "1": -217 + }, + "size": { + "0": 362.1656799316406, + "1": 218 + }, + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "block_edit", + "type": "TRANSFORMERBLOCKS", + "link": null, + "shape": 7 + }, + { + "name": "lora", + "type": "COGLORA", + "link": 124, + "shape": 7 + }, + { + "name": "compile_args", + "type": "COMPILEARGS", + "link": null, + "shape": 7 + } + ], + "outputs": [ + { + "name": "model", + "type": "COGVIDEOMODEL", + "links": [ + 114, + 139 + ] + }, + { + "name": "vae", + "type": "VAE", + "links": [ + 119, + 122, + 144, + 149 + ], + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "DownloadAndLoadCogVideoModel" + }, + "widgets_values": [ + "alibaba-pai/CogVideoX-Fun-V1.1-5b-InP", + "bf16", + "disabled", + false, + "sdpa", + "main_device" + ] + } + ], + "links": [ + [ + 54, + 20, + 0, + 30, + 0, + "CLIP" + ], + [ + 71, + 36, + 0, + 37, + 0, + "IMAGE" + ], + [ + 110, + 30, + 1, + 31, + 0, + "CLIP" + ], + [ + 114, + 49, + 0, + 48, + 0, + "COGVIDEOMODEL" + ], + [ + 116, + 30, + 0, + 48, + 1, + "CONDITIONING" + ], + [ + 117, + 31, + 0, + 48, + 2, + "CONDITIONING" + ], + [ + 119, + 49, + 1, + 50, + 0, + "VAE" + ], + [ + 120, + 50, + 0, + 48, + 4, + "LATENT" + ], + [ + 122, + 49, + 1, + 51, + 0, + "VAE" + ], + [ + 123, + 48, + 0, + 51, + 1, + "LATENT" + ], + [ + 124, + 52, + 0, + 49, + 1, + "COGLORA" + ], + [ + 126, + 53, + 0, + 50, + 3, + "INT" + ], + [ + 127, + 53, + 0, + 48, + 9, + "INT" + ], + [ + 128, + 37, + 0, + 54, + 0, + "IMAGE" + ], + [ + 129, + 54, + 0, + 50, + 1, + "IMAGE" + ], + [ + 130, + 51, + 0, + 55, + 0, + "IMAGE" + ], + [ + 131, + 55, + 0, + 44, + 0, + "IMAGE" + ], + [ + 134, + 58, + 0, + 59, + 0, + "GIMMVIF_MODEL" + ], + [ + 139, + 49, + 0, + 62, + 0, + "COGVIDEOMODEL" + ], + [ + 140, + 30, + 0, + 62, + 1, + "CONDITIONING" + ], + [ + 141, + 31, + 0, + 62, + 2, + "CONDITIONING" + ], + [ + 143, + 53, + 0, + 62, + 9, + "INT" + ], + [ + 144, + 49, + 1, + 63, + 0, + "VAE" + ], + [ + 145, + 53, + 0, + 63, + 3, + "INT" + ], + [ + 146, + 37, + 0, + 63, + 1, + "IMAGE" + ], + [ + 147, + 63, + 0, + 62, + 4, + "LATENT" + ], + [ + 148, + 62, + 0, + 64, + 1, + "LATENT" + ], + [ + 149, + 49, + 1, + 64, + 0, + "VAE" + ], + [ + 150, + 64, + 0, + 65, + 0, + "IMAGE" + ], + [ + 151, + 55, + 0, + 66, + 0, + "IMAGE" + ], + [ + 152, + 66, + 0, + 67, + 0, + "IMAGE" + ], + [ + 153, + 64, + 0, + 67, + 1, + "IMAGE" + ], + [ + 156, + 68, + 0, + 48, + 10, + "INT" + ], + [ + 157, + 68, + 0, + 62, + 10, + "INT" + ], + [ + 158, + 69, + 0, + 70, + 1, + "FL2MODEL" + ], + [ + 159, + 37, + 0, + 70, + 0, + "IMAGE" + ], + [ + 160, + 71, + 0, + 72, + 0, + "STRING" + ], + [ + 161, + 70, + 2, + 73, + 0, + "STRING" + ], + [ + 162, + 70, + 2, + 72, + 1, + "STRING" + ], + [ + 163, + 72, + 0, + 30, + 1, + "STRING" + ], + [ + 164, + 59, + 0, + 60, + 0, + "IMAGE" + ], + [ + 165, + 67, + 0, + 59, + 1, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 0.47362440744777223, + "offset": [ + 1633.9967545643788, + 525.3824652843582 + ] + } + }, + "version": 0.4 +} \ No newline at end of file diff --git a/model_loading.py b/model_loading.py index bc0cd75..695804a 100644 --- a/model_loading.py +++ b/model_loading.py @@ -240,37 +240,37 @@ class DownloadAndLoadCogVideoModel: #LoRAs if lora is not None: - # from .lora_utils import merge_lora#, load_lora_into_transformer - # if "fun" in model.lower(): - # for l in lora: - # log.info(f"Merging LoRA weights from {l['path']} with strength {l['strength']}") - # transformer = merge_lora(transformer, l["path"], l["strength"]) - #else: - adapter_list = [] - adapter_weights = [] - for l in lora: - fuse = True if l["fuse_lora"] else False - lora_sd = load_torch_file(l["path"]) - for key, val in lora_sd.items(): - if "lora_B" in key: - lora_rank = val.shape[1] - break - log.info(f"Merging rank {lora_rank} LoRA weights from {l['path']} with strength {l['strength']}") - adapter_name = l['path'].split("/")[-1].split(".")[0] - adapter_weight = l['strength'] - pipe.load_lora_weights(l['path'], weight_name=l['path'].split("/")[-1], lora_rank=lora_rank, adapter_name=adapter_name) - - #transformer = load_lora_into_transformer(lora, transformer) - adapter_list.append(adapter_name) - adapter_weights.append(adapter_weight) - for l in lora: - pipe.set_adapters(adapter_list, adapter_weights=adapter_weights) - if fuse: - lora_scale = 1 - dimension_loras = ["orbit", "dimensionx"] # for now dimensionx loras need scaling - if any(item in lora[-1]["path"].lower() for item in dimension_loras): - lora_scale = lora_scale / lora_rank - pipe.fuse_lora(lora_scale=lora_scale, components=["transformer"]) + try: + adapter_list = [] + adapter_weights = [] + for l in lora: + fuse = True if l["fuse_lora"] else False + lora_sd = load_torch_file(l["path"]) + for key, val in lora_sd.items(): + if "lora_B" in key: + lora_rank = val.shape[1] + break + log.info(f"Merging rank {lora_rank} LoRA weights from {l['path']} with strength {l['strength']}") + adapter_name = l['path'].split("/")[-1].split(".")[0] + adapter_weight = l['strength'] + pipe.load_lora_weights(l['path'], weight_name=l['path'].split("/")[-1], lora_rank=lora_rank, adapter_name=adapter_name) + + #transformer = load_lora_into_transformer(lora, transformer) + adapter_list.append(adapter_name) + adapter_weights.append(adapter_weight) + for l in lora: + pipe.set_adapters(adapter_list, adapter_weights=adapter_weights) + if fuse: + lora_scale = 1 + dimension_loras = ["orbit", "dimensionx"] # for now dimensionx loras need scaling + if any(item in lora[-1]["path"].lower() for item in dimension_loras): + lora_scale = lora_scale / lora_rank + pipe.fuse_lora(lora_scale=lora_scale, components=["transformer"]) + except: #Fun trainer LoRAs are loaded differently + from .lora_utils import merge_lora + for l in lora: + log.info(f"Merging LoRA weights from {l['path']} with strength {l['strength']}") + transformer = merge_lora(transformer, l["path"], l["strength"]) if "fused" in attention_mode: from diffusers.models.attention import Attention diff --git a/readme.md b/readme.md index 3639341..97c72c1 100644 --- a/readme.md +++ b/readme.md @@ -1,5 +1,32 @@ # WORK IN PROGRESS +## BREAKING Update8 + +This is big one, and unfortunately to do the necessary cleanup and refactoring this will break every old workflow as they are. +I apologize for the inconvenience, if I don't do this now I'll keep making it worse until maintaining becomes too much of a chore, so from my pov there was no choice. + +*Please either use the new workflows or fix the nodes in your old ones before posting issue reports!* + +Old version will be kept in a legacy branch, but not maintained + +- Support CogVideoX 1.5 models +- Major code cleanup (it was bad, still isn't great, wip) +- Merge Fun -model functionality into main pipeline: + - All Fun specific nodes, besides image encode node for Fun -InP models are gone + - Main CogVideo Sampler works with Fun models + - DimensionX LoRAs now work with Fun models as well + +- Remove width/height from the sampler widgets and detect from input instead, this meanst text2vid now requires using empty latents +- Separate VAE from the model, allow using fp32 VAE +- Add ability to load some of the non-GGUF models as single files (only few available for now: https://huggingface.co/Kijai/CogVideoX-comfy) +- Add some torchao quantizations as options +- Add interpolation as option for the main encode node, old interpolation specific node is gone +- torch.compile optimizations +- Remove PAB in favor of FasterCache and cleaner code +- other smaller things I forgot about at this point + +For Fun -model based workflows it's more drastic change, for others migrating generally means re-setting many of the nodes. + ## Update7 - Refactored the Fun version's sampler to accept any resolution, this should make it lot simpler to use with Tora. **BREAKS OLD WORKFLOWS**, old FunSampler nodes need to be remade.