cleanup code, update examples, fix fp8 on the 2b model and with DDIM

This commit is contained in:
kijai 2024-09-03 17:15:06 +03:00
parent ffb9aac826
commit f836f2c24d
5 changed files with 850 additions and 564 deletions

View File

@ -5,10 +5,18 @@
{
"id": 30,
"type": "CogVideoTextEncode",
"pos": [
500,
308
],
"pos": {
"0": 500,
"1": 308,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 474.8450012207031,
"1": 164.7423553466797
@ -44,10 +52,18 @@
{
"id": 20,
"type": "CLIPLoader",
"pos": [
-59,
397
],
"pos": {
"0": -59,
"1": 397,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 451.30548095703125,
"1": 82
@ -55,6 +71,7 @@
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "CLIP",
@ -78,10 +95,18 @@
{
"id": 31,
"type": "CogVideoTextEncode",
"pos": [
503,
521
],
"pos": {
"0": 503,
"1": 521,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 463.01251220703125,
"1": 98.10446166992188
@ -115,110 +140,144 @@
]
},
{
"id": 11,
"type": "CogVideoDecode",
"pos": [
1140,
783
],
"size": {
"0": 210,
"1": 78
"id": 32,
"type": "VHS_VideoCombine",
"pos": {
"0": 1439,
"1": 122,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": [
563.3333740234375,
688.2124814882384
],
"flags": {},
"order": 5,
"order": 7,
"mode": 0,
"inputs": [
{
"name": "pipeline",
"type": "COGVIDEOPIPE",
"link": 37
"name": "images",
"type": "IMAGE",
"link": 60,
"slot_index": 0
},
{
"name": "samples",
"type": "LATENT",
"link": 38
"name": "audio",
"type": "VHS_AUDIO",
"link": null
},
{
"name": "meta_batch",
"type": "VHS_BatchManager",
"link": null
},
{
"name": "vae",
"type": "VAE",
"link": null
}
],
"outputs": [
{
"name": "images",
"type": "IMAGE",
"name": "Filenames",
"type": "VHS_FILENAMES",
"links": null,
"shape": 3
}
],
"properties": {
"Node name for S&R": "VHS_VideoCombine"
},
"widgets_values": {
"frame_rate": 8,
"loop_count": 0,
"filename_prefix": "CogVideo2B_long",
"format": "video/h264-mp4",
"pix_fmt": "yuv420p",
"crf": 19,
"save_metadata": true,
"pingpong": false,
"save_output": false,
"videopreview": {
"hidden": false,
"paused": false,
"params": {
"filename": "CogVideo2B_long_00001.mp4",
"subfolder": "",
"type": "temp",
"format": "video/h264-mp4",
"frame_rate": 8
}
}
}
},
{
"id": 1,
"type": "DownloadAndLoadCogVideoModel",
"pos": {
"0": 653,
"1": 90,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 315,
"1": 154
},
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "cogvideo_pipe",
"type": "COGVIDEOPIPE",
"links": [
59
36
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "CogVideoDecode"
"Node name for S&R": "DownloadAndLoadCogVideoModel"
},
"widgets_values": [
"THUDM/CogVideoX-2b",
"fp16",
"disabled",
"disabled",
false
]
},
{
"id": 33,
"type": "GetImageSizeAndCount",
"pos": [
1189,
134
],
"size": {
"0": 210,
"1": 86
},
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "image",
"type": "IMAGE",
"link": 59
}
],
"outputs": [
{
"name": "image",
"type": "IMAGE",
"links": [
60
],
"slot_index": 0,
"shape": 3
},
{
"name": "720 width",
"type": "INT",
"links": null,
"shape": 3
},
{
"name": "480 height",
"type": "INT",
"links": null,
"shape": 3
},
{
"name": "32 count",
"type": "INT",
"links": null,
"shape": 3
}
],
"properties": {
"Node name for S&R": "GetImageSizeAndCount"
}
},
{
"id": 22,
"type": "CogVideoSampler",
"pos": [
1041,
342
],
"pos": {
"0": 1041,
"1": 342,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 315,
"1": 382
@ -278,114 +337,129 @@
6,
6,
806286757407563,
"DDIM",
"DDIM_tiled",
48,
8,
1
]
},
{
"id": 1,
"type": "DownloadAndLoadCogVideoModel",
"pos": [
649,
182
],
"size": {
"0": 315,
"1": 82
"id": 11,
"type": "CogVideoDecode",
"pos": {
"0": 1049,
"1": 772,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": [
295.70112532900725,
198
],
"flags": {},
"order": 1,
"order": 5,
"mode": 0,
"inputs": [
{
"name": "pipeline",
"type": "COGVIDEOPIPE",
"link": 37
},
{
"name": "samples",
"type": "LATENT",
"link": 38
}
],
"outputs": [
{
"name": "cogvideo_pipe",
"type": "COGVIDEOPIPE",
"name": "images",
"type": "IMAGE",
"links": [
36
59
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "DownloadAndLoadCogVideoModel"
"Node name for S&R": "CogVideoDecode"
},
"widgets_values": [
"THUDM/CogVideoX-2b",
"fp16"
true,
96,
96,
0.083,
0.083,
true
]
},
{
"id": 32,
"type": "VHS_VideoCombine",
"pos": [
1439,
122
],
"size": [
563.3333740234375,
310
],
"id": 33,
"type": "GetImageSizeAndCount",
"pos": {
"0": 1176,
"1": 122,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 210,
"1": 86
},
"flags": {},
"order": 7,
"order": 6,
"mode": 0,
"inputs": [
{
"name": "images",
"name": "image",
"type": "IMAGE",
"link": 60,
"slot_index": 0
},
{
"name": "audio",
"type": "VHS_AUDIO",
"link": null
},
{
"name": "meta_batch",
"type": "VHS_BatchManager",
"link": null
},
{
"name": "vae",
"type": "VAE",
"link": null
"link": 59
}
],
"outputs": [
{
"name": "Filenames",
"type": "VHS_FILENAMES",
"name": "image",
"type": "IMAGE",
"links": [
60
],
"slot_index": 0,
"shape": 3
},
{
"name": "728 width",
"type": "INT",
"links": null,
"shape": 3
},
{
"name": "485 height",
"type": "INT",
"links": null,
"shape": 3
},
{
"name": "96 count",
"type": "INT",
"links": null,
"shape": 3
}
],
"properties": {
"Node name for S&R": "VHS_VideoCombine"
},
"widgets_values": {
"frame_rate": 8,
"loop_count": 0,
"filename_prefix": "CogVideo2B_long",
"format": "video/h264-mp4",
"pix_fmt": "yuv420p",
"crf": 19,
"save_metadata": true,
"pingpong": false,
"save_output": false,
"videopreview": {
"hidden": false,
"paused": false,
"params": {
"filename": "AnimateDiff_00001.mp4",
"subfolder": "",
"type": "temp",
"format": "video/h264-mp4",
"frame_rate": 8
}
}
"Node name for S&R": "GetImageSizeAndCount"
}
}
],
@ -467,10 +541,10 @@
"config": {},
"extra": {
"ds": {
"scale": 0.8264462809917354,
"scale": 0.7513148009015777,
"offset": [
86.92928825501215,
77.5537144406024
253.3863163213836,
255.76127216744268
]
}
},

View File

@ -5,10 +5,18 @@
{
"id": 20,
"type": "CLIPLoader",
"pos": [
-29,
407
],
"pos": {
"0": -29,
"1": 407,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 451.30548095703125,
"1": 82
@ -16,6 +24,7 @@
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "CLIP",
@ -39,10 +48,18 @@
{
"id": 31,
"type": "CogVideoTextEncode",
"pos": [
503,
521
],
"pos": {
"0": 503,
"1": 521,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 463.01251220703125,
"1": 98.10446166992188
@ -78,10 +95,18 @@
{
"id": 41,
"type": "ImageResizeKJ",
"pos": [
206,
-69
],
"pos": {
"0": 206,
"1": -69,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 315,
"1": 242
@ -154,100 +179,21 @@
"disabled"
]
},
{
"id": 37,
"type": "CogVideoImageEncode",
"pos": [
939,
-53
],
"size": {
"0": 210,
"1": 46
},
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "pipeline",
"type": "COGVIDEOPIPE",
"link": 83,
"slot_index": 0
},
{
"name": "image",
"type": "IMAGE",
"link": 129,
"slot_index": 1
}
],
"outputs": [
{
"name": "samples",
"type": "LATENT",
"links": [
172
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "CogVideoImageEncode"
}
},
{
"id": 11,
"type": "CogVideoDecode",
"pos": [
1224,
737
],
"size": {
"0": 210,
"1": 78
},
"flags": {},
"order": 11,
"mode": 0,
"inputs": [
{
"name": "pipeline",
"type": "COGVIDEOPIPE",
"link": 166
},
{
"name": "samples",
"type": "LATENT",
"link": 167
}
],
"outputs": [
{
"name": "images",
"type": "IMAGE",
"links": [
118
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "CogVideoDecode"
},
"widgets_values": [
false
]
},
{
"id": 30,
"type": "CogVideoTextEncode",
"pos": [
500,
308
],
"pos": {
"0": 500,
"1": 308,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 474.8450012207031,
"1": 164.7423553466797
@ -283,10 +229,18 @@
{
"id": 57,
"type": "GetImageSizeAndCount",
"pos": [
603,
-65
],
"pos": {
"0": 603,
"1": -65,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 202.2143096923828,
"1": 99.23601531982422
@ -332,7 +286,7 @@
"shape": 3
},
{
"name": "32 count",
"name": "33 count",
"type": "INT",
"links": [
178,
@ -349,10 +303,18 @@
{
"id": 45,
"type": "VHS_LoadVideo",
"pos": [
-93,
-153
],
"pos": {
"0": -93,
"1": -153,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": [
235.1999969482422,
359.5999984741211
@ -440,10 +402,18 @@
{
"id": 70,
"type": "GetImageSizeAndCount",
"pos": [
214,
-234
],
"pos": {
"0": 214,
"1": -234,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 202.2143096923828,
"1": 99.23601531982422
@ -484,7 +454,7 @@
"shape": 3
},
{
"name": "32 count",
"name": "33 count",
"type": "INT",
"links": [],
"slot_index": 3,
@ -498,10 +468,18 @@
{
"id": 69,
"type": "INTConstant",
"pos": [
-90,
-305
],
"pos": {
"0": -90,
"1": -305,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 210,
"1": 58
@ -509,6 +487,7 @@
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "value",
@ -529,13 +508,145 @@
"color": "#1b4669",
"bgcolor": "#29699c"
},
{
"id": 47,
"type": "VHS_VideoCombine",
"pos": {
"0": 1560,
"1": -379,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": [
1110,
310
],
"flags": {},
"order": 14,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 132
},
{
"name": "audio",
"type": "VHS_AUDIO",
"link": null
},
{
"name": "meta_batch",
"type": "VHS_BatchManager",
"link": null
},
{
"name": "vae",
"type": "VAE",
"link": null
}
],
"outputs": [
{
"name": "Filenames",
"type": "VHS_FILENAMES",
"links": null,
"shape": 3
}
],
"properties": {
"Node name for S&R": "VHS_VideoCombine"
},
"widgets_values": {
"frame_rate": 8,
"loop_count": 0,
"filename_prefix": "CogVideoX_vid2vid",
"format": "video/h264-mp4",
"pix_fmt": "yuv420p",
"crf": 19,
"save_metadata": true,
"pingpong": false,
"save_output": false,
"videopreview": {
"hidden": false,
"paused": false,
"params": {
"filename": "AnimateDiff_00001.mp4",
"subfolder": "",
"type": "temp",
"format": "video/h264-mp4",
"frame_rate": 8
}
}
}
},
{
"id": 1,
"type": "DownloadAndLoadCogVideoModel",
"pos": {
"0": 606,
"1": 85,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 315,
"1": 154
},
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "cogvideo_pipe",
"type": "COGVIDEOPIPE",
"links": [
83,
159
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "DownloadAndLoadCogVideoModel"
},
"widgets_values": [
"THUDM/CogVideoX-5b",
"fp16",
"disabled",
"disabled",
false
]
},
{
"id": 64,
"type": "CogVideoSampler",
"pos": [
1090,
290
],
"pos": {
"0": 1090,
"1": 290,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 312.9841613769531,
"1": 342.8801574707031
@ -627,98 +738,144 @@
6,
9,
"fixed",
"DDIM",
"DPM",
"DDIM",
8,
0.85
]
},
{
"id": 1,
"type": "DownloadAndLoadCogVideoModel",
"pos": [
649,
182
],
"size": {
"0": 315,
"1": 82
"id": 11,
"type": "CogVideoDecode",
"pos": {
"0": 1097,
"1": 681,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": [
301.1664045038119,
198
],
"flags": {},
"order": 2,
"mode": 0,
"outputs": [
{
"name": "cogvideo_pipe",
"type": "COGVIDEOPIPE",
"links": [
83,
159
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "DownloadAndLoadCogVideoModel"
},
"widgets_values": [
"THUDM/CogVideoX-2b",
"fp16"
]
},
{
"id": 58,
"type": "ImageConcanate",
"pos": [
1499,
433
],
"size": {
"0": 315,
"1": 102
},
"flags": {},
"order": 13,
"order": 11,
"mode": 0,
"inputs": [
{
"name": "image1",
"type": "IMAGE",
"link": 191
"name": "pipeline",
"type": "COGVIDEOPIPE",
"link": 166
},
{
"name": "image2",
"type": "IMAGE",
"link": 170
"name": "samples",
"type": "LATENT",
"link": 167
}
],
"outputs": [
{
"name": "IMAGE",
"name": "images",
"type": "IMAGE",
"links": [
132
118
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "ImageConcanate"
"Node name for S&R": "CogVideoDecode"
},
"widgets_values": [
"right",
false
false,
96,
96,
0.083,
0.083,
true
]
},
{
"id": 37,
"type": "CogVideoImageEncode",
"pos": {
"0": 975,
"1": -73,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 210,
"1": 122
},
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "pipeline",
"type": "COGVIDEOPIPE",
"link": 83,
"slot_index": 0
},
{
"name": "image",
"type": "IMAGE",
"link": 129,
"slot_index": 1
},
{
"name": "mask",
"type": "MASK",
"link": null
}
],
"outputs": [
{
"name": "samples",
"type": "LATENT",
"links": [
172
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "CogVideoImageEncode"
},
"widgets_values": [
8,
true
]
},
{
"id": 55,
"type": "GetImageSizeAndCount",
"pos": [
1223,
122
],
"pos": {
"0": 1195,
"1": 154,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 210,
"1": 86
@ -769,75 +926,57 @@
}
},
{
"id": 47,
"type": "VHS_VideoCombine",
"pos": [
1560,
-379
],
"size": [
1110,
711.3333333333333
],
"id": 58,
"type": "ImageConcanate",
"pos": {
"0": 1434,
"1": 289,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 315,
"1": 102
},
"flags": {},
"order": 14,
"order": 13,
"mode": 0,
"inputs": [
{
"name": "images",
"name": "image1",
"type": "IMAGE",
"link": 132
"link": 191
},
{
"name": "audio",
"type": "VHS_AUDIO",
"link": null
},
{
"name": "meta_batch",
"type": "VHS_BatchManager",
"link": null
},
{
"name": "vae",
"type": "VAE",
"link": null
"name": "image2",
"type": "IMAGE",
"link": 170
}
],
"outputs": [
{
"name": "Filenames",
"type": "VHS_FILENAMES",
"links": null,
"name": "IMAGE",
"type": "IMAGE",
"links": [
132
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "VHS_VideoCombine"
"Node name for S&R": "ImageConcanate"
},
"widgets_values": {
"frame_rate": 8,
"loop_count": 0,
"filename_prefix": "CogVideoX_vid2vid",
"format": "video/h264-mp4",
"pix_fmt": "yuv420p",
"bitrate": 10,
"megabit": true,
"save_metadata": true,
"pingpong": false,
"save_output": false,
"videopreview": {
"hidden": false,
"paused": false,
"params": {
"filename": "AnimateDiff_00001.mp4",
"subfolder": "",
"type": "temp",
"format": "video/h264-mp4",
"frame_rate": 8
}
}
}
"widgets_values": [
"right",
false
]
}
],
"links": [
@ -1022,10 +1161,10 @@
"config": {},
"extra": {
"ds": {
"scale": 0.620921323059155,
"scale": 0.7513148009015777,
"offset": [
298.59028824596885,
694.562497939138
280.8935954961883,
403.945992992638
]
}
},

View File

@ -5,10 +5,18 @@
{
"id": 31,
"type": "CogVideoTextEncode",
"pos": [
503,
521
],
"pos": {
"0": 503,
"1": 521,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 463.01251220703125,
"1": 98.10446166992188
@ -41,94 +49,25 @@
""
]
},
{
"id": 11,
"type": "CogVideoDecode",
"pos": [
1140,
783
],
"size": {
"0": 210,
"1": 78
},
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "pipeline",
"type": "COGVIDEOPIPE",
"link": 63
},
{
"name": "samples",
"type": "LATENT",
"link": 64
}
],
"outputs": [
{
"name": "images",
"type": "IMAGE",
"links": [
59
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "CogVideoDecode"
},
"widgets_values": [
false
]
},
{
"id": 1,
"type": "DownloadAndLoadCogVideoModel",
"pos": [
649,
182
],
"size": {
"0": 315,
"1": 82
},
"flags": {},
"order": 0,
"mode": 0,
"outputs": [
{
"name": "cogvideo_pipe",
"type": "COGVIDEOPIPE",
"links": [
60
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "DownloadAndLoadCogVideoModel"
},
"widgets_values": [
"THUDM/CogVideoX-5b",
"bf16"
]
},
{
"id": 30,
"type": "CogVideoTextEncode",
"pos": [
500,
308
],
"size": [
471.90143257018326,
168.0804709842023
],
"pos": {
"0": 500,
"1": 308,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 471.90142822265625,
"1": 168.08047485351562
},
"flags": {},
"order": 2,
"mode": 0,
@ -160,14 +99,22 @@
{
"id": 34,
"type": "CogVideoSampler",
"pos": [
1041,
342
],
"size": [
315.84047081854465,
358
],
"pos": {
"0": 1041,
"1": 342,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 315.8404846191406,
"1": 358
},
"flags": {},
"order": 4,
"mode": 0,
@ -231,13 +178,21 @@
{
"id": 33,
"type": "VHS_VideoCombine",
"pos": [
1441,
129
],
"pos": {
"0": 1441,
"1": 129,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": [
778.7022705078125,
853.801513671875
310
],
"flags": {},
"order": 6,
@ -281,8 +236,7 @@
"filename_prefix": "CogVideoX5B",
"format": "video/h264-mp4",
"pix_fmt": "yuv420p",
"bitrate": 10,
"megabit": true,
"crf": 19,
"save_metadata": true,
"pingpong": false,
"save_output": false,
@ -303,17 +257,26 @@
{
"id": 20,
"type": "CLIPLoader",
"pos": [
-26,
400
],
"pos": {
"0": -26,
"1": 400,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 451.30548095703125,
"1": 82
},
"flags": {},
"order": 1,
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "CLIP",
@ -333,6 +296,108 @@
"t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
"sd3"
]
},
{
"id": 1,
"type": "DownloadAndLoadCogVideoModel",
"pos": {
"0": 642,
"1": 90,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": {
"0": 315,
"1": 154
},
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "cogvideo_pipe",
"type": "COGVIDEOPIPE",
"links": [
60
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "DownloadAndLoadCogVideoModel"
},
"widgets_values": [
"THUDM/CogVideoX-5b",
"bf16",
"disabled",
"disabled",
false
]
},
{
"id": 11,
"type": "CogVideoDecode",
"pos": {
"0": 1051,
"1": 748,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0
},
"size": [
300.3964783563508,
198
],
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "pipeline",
"type": "COGVIDEOPIPE",
"link": 63
},
{
"name": "samples",
"type": "LATENT",
"link": 64
}
],
"outputs": [
{
"name": "images",
"type": "IMAGE",
"links": [
59
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "CogVideoDecode"
},
"widgets_values": [
false,
96,
96,
0.083,
0.083,
true
]
}
],
"links": [
@ -405,10 +470,10 @@
"config": {},
"extra": {
"ds": {
"scale": 0.7513148009015777,
"scale": 0.8264462809917354,
"offset": [
209.1392882550122,
105.74671444060245
161.910286780368,
124.7586178095323
]
}
},

View File

@ -48,10 +48,6 @@ class DownloadAndLoadCogVideoModel:
mm.soft_empty_cache()
dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[precision]
if fp8_transformer != "disabled":
transformer_dtype = torch.float8_e4m3fn
else:
transformer_dtype = dtype
if "2b" in model:
base_path = os.path.join(folder_paths.models_dir, "CogVideo", "CogVideo2B")
@ -68,12 +64,15 @@ class DownloadAndLoadCogVideoModel:
local_dir=base_path,
local_dir_use_symlinks=False,
)
transformer = CogVideoXTransformer3DModel.from_pretrained(base_path, subfolder="transformer").to(transformer_dtype).to(offload_device)
if fp8_transformer == "fastmode":
from .fp8_optimization import convert_fp8_linear
convert_fp8_linear(transformer, dtype)
if fp8_transformer == "enabled" or fp8_transformer == "fastmode":
transformer = CogVideoXTransformer3DModel.from_pretrained(base_path, subfolder="transformer").to(torch.float8_e4m3fn).to(offload_device)
if fp8_transformer == "fastmode":
from .fp8_optimization import convert_fp8_linear
convert_fp8_linear(transformer, dtype)
else:
transformer = CogVideoXTransformer3DModel.from_pretrained(base_path, subfolder="transformer").to(dtype).to(offload_device)
vae = AutoencoderKLCogVideoX.from_pretrained(base_path, subfolder="vae").to(dtype).to(offload_device)
scheduler = CogVideoXDDIMScheduler.from_pretrained(base_path, subfolder="scheduler")
pipe = CogVideoXPipeline(vae, transformer, scheduler)
@ -95,8 +94,6 @@ class DownloadAndLoadCogVideoModel:
fuse_qkv_projections=True,
)
pipeline = {
"pipe": pipe,
"dtype": dtype,
@ -215,6 +212,8 @@ class CogVideoImageEncode:
# mask = mask.unsqueeze(-1).repeat(1, 1, 1, C)
# print(mask.shape)
# input_image = input_image * (1 -mask)
else:
pipeline["pipe"].original_mask = None
input_image = input_image * 2.0 - 1.0
input_image = input_image.to(vae.dtype).to(device)
@ -265,7 +264,7 @@ class CogVideoSampler:
"steps": ("INT", {"default": 50, "min": 1}),
"cfg": ("FLOAT", {"default": 6.0, "min": 0.0, "max": 30.0, "step": 0.01}),
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
"scheduler": (["DDIM", "DPM"], {"tooltip": "5B likes DPM, but it doesn't support temporal tiling"}),
"scheduler": (["DDIM", "DPM", "DDIM_tiled"], {"tooltip": "5B likes DPM, but it doesn't support temporal tiling"}),
"t_tile_length": ("INT", {"default": 16, "min": 2, "max": 128, "step": 1, "tooltip": "Length of temporal tiling, use same alue as num_frames to disable, disabled automatically for DPM"}),
"t_tile_overlap": ("INT", {"default": 8, "min": 2, "max": 128, "step": 1, "tooltip": "Overlap of temporal tiling"}),
},
@ -298,7 +297,7 @@ class CogVideoSampler:
pipe.transformer.to(device)
generator = torch.Generator(device=device).manual_seed(seed)
if scheduler == "DDIM":
if scheduler == "DDIM" or scheduler == "DDIM_tiled":
pipe.scheduler = CogVideoXDDIMScheduler.from_pretrained(base_path, subfolder="scheduler")
elif scheduler == "DPM":
pipe.scheduler = CogVideoXDPMScheduler.from_pretrained(base_path, subfolder="scheduler")
@ -324,7 +323,8 @@ class CogVideoSampler:
prompt_embeds=positive.to(dtype).to(device),
negative_prompt_embeds=negative.to(dtype).to(device),
generator=generator,
device=device
device=device,
scheduler_name=scheduler
)
if not pipeline["cpu_offloading"]:
pipe.transformer.to(offload_device)

View File

@ -332,10 +332,11 @@ class CogVideoXPipeline(DiffusionPipeline):
num_videos_per_prompt: int = 1,
eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.Tensor] = None,
device = torch.device("cuda"),
scheduler_name: str = "DPM",
):
"""
Function invoked when calling the pipeline for generation.
@ -421,8 +422,11 @@ class CogVideoXPipeline(DiffusionPipeline):
if latents is None and num_frames == t_tile_length:
num_frames += 1
image_latents = latents
original_image_latents = image_latents
if self.original_mask is not None:
image_latents = latents
original_image_latents = image_latents
latents, timesteps, noise = self.prepare_latents(
batch_size * num_videos_per_prompt,
latent_channels,
@ -439,15 +443,9 @@ class CogVideoXPipeline(DiffusionPipeline):
)
latents = latents.to(self.transformer.dtype)
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
t_tile_weights = self._gaussian_weights(t_tile_length=t_tile_length, t_batch_size=1).to(latents.device).to(latents.dtype)
print("latents.shape", latents.shape)
print("latents.device", latents.device)
# 6.5. Create rotary embeds if required
image_rotary_emb = (
self._prepare_rotary_positional_embeddings(height, width, latents.size(1), device)
@ -471,15 +469,23 @@ class CogVideoXPipeline(DiffusionPipeline):
# 7. Denoising loop
num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
comfy_pbar = ProgressBar(num_inference_steps)
# 8. Temporal tiling prep
if "tiled" in scheduler_name:
t_tile_weights = self._gaussian_weights(t_tile_length=t_tile_length, t_batch_size=1).to(latents.device).to(self.vae.dtype)
temporal_tiling = True
print("Temporal tiling enabled")
else:
temporal_tiling = False
print("Temporal tiling disabled")
print("latents.shape", latents.shape)
with self.progress_bar(total=num_inference_steps) as progress_bar:
# for DPM-solver++
old_pred_original_sample = None
with self.progress_bar(total=num_inference_steps) as progress_bar:
old_pred_original_sample = None # for DPM-solver++
for i, t in enumerate(timesteps):
if self.interrupt:
continue
if not isinstance(self.scheduler, CogVideoXDPMScheduler):
if temporal_tiling and isinstance(self.scheduler, CogVideoXDDIMScheduler):
#temporal tiling code based on https://github.com/mayuelala/FollowYourEmoji/blob/main/models/video_pipeline.py
# =====================================================
grid_ts = 0
@ -532,12 +538,12 @@ class CogVideoXPipeline(DiffusionPipeline):
noise_pred = noise_pred_uncond + self._guidance_scale * (noise_pred_text - noise_pred_uncond)
# compute the previous noisy sample x_t -> x_t-1
latents_tile = self.scheduler.step(noise_pred, t, latents_tile, **extra_step_kwargs, return_dict=False)[0]
latents_tile = self.scheduler.step(noise_pred, t, latents_tile.to(self.vae.dtype), **extra_step_kwargs, return_dict=False)[0]
latents_all_list.append(latents_tile)
# ==========================================
latents_all = torch.zeros(latents.shape, device=latents.device, dtype=latents.dtype)
contributors = torch.zeros(latents.shape, device=latents.device, dtype=latents.dtype)
latents_all = torch.zeros(latents.shape, device=latents.device, dtype=self.vae.dtype)
contributors = torch.zeros(latents.shape, device=latents.device, dtype=self.vae.dtype)
# Add each tile contribution to overall latents
for t_i in range(grid_ts):
if t_i < grid_ts - 1:
@ -573,7 +579,6 @@ class CogVideoXPipeline(DiffusionPipeline):
comfy_pbar.update(1)
# ==========================================
else:
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
@ -590,25 +595,28 @@ class CogVideoXPipeline(DiffusionPipeline):
)[0]
noise_pred = noise_pred.float()
self._guidance_scale = 1 + guidance_scale * (
(1 - math.cos(math.pi * ((num_inference_steps - t.item()) / num_inference_steps) ** 5.0)) / 2
)
if isinstance(self.scheduler, CogVideoXDPMScheduler):
self._guidance_scale = 1 + guidance_scale * (
(1 - math.cos(math.pi * ((num_inference_steps - t.item()) / num_inference_steps) ** 5.0)) / 2
)
if do_classifier_free_guidance:
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
noise_pred = noise_pred_uncond + self._guidance_scale * (noise_pred_text - noise_pred_uncond)
# compute the previous noisy sample x_t -> x_t-1
latents, old_pred_original_sample = self.scheduler.step(
noise_pred,
old_pred_original_sample,
t,
timesteps[i - 1] if i > 0 else None,
latents.to(self.vae.dtype),
**extra_step_kwargs,
return_dict=False,
)
if not isinstance(self.scheduler, CogVideoXDPMScheduler):
latents = self.scheduler.step(noise_pred, t, latents.to(self.vae.dtype), **extra_step_kwargs, return_dict=False)[0]
else:
latents, old_pred_original_sample = self.scheduler.step(
noise_pred,
old_pred_original_sample,
t,
timesteps[i - 1] if i > 0 else None,
latents.to(self.vae.dtype),
**extra_step_kwargs,
return_dict=False,
)
# start diff diff
if i < len(timesteps) - 1 and self.original_mask is not None:
noise_timestep = timesteps[i + 1]