mirror of
https://git.datalinker.icu/kijai/ComfyUI-CogVideoXWrapper.git
synced 2026-05-31 14:27:16 +08:00
Refactor Fun sampler to be easier to use with Tora (breaks old workflows!)
The FunSampler node in old workflows needs to be remade. I moved the forced bucket resize to it's own node if anyone still wants to use that.
This commit is contained in:
parent
666f7832f9
commit
9202921920
1315
examples/cogvideox_fun_img2vid_tora_01.json
Normal file
1315
examples/cogvideox_fun_img2vid_tora_01.json
Normal file
File diff suppressed because one or more lines are too long
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"last_node_id": 48,
|
"last_node_id": 51,
|
||||||
"last_link_id": 101,
|
"last_link_id": 114,
|
||||||
"nodes": [
|
"nodes": [
|
||||||
{
|
{
|
||||||
"id": 20,
|
"id": 20,
|
||||||
@ -22,8 +22,7 @@
|
|||||||
"name": "CLIP",
|
"name": "CLIP",
|
||||||
"type": "CLIP",
|
"type": "CLIP",
|
||||||
"links": [
|
"links": [
|
||||||
54,
|
54
|
||||||
56
|
|
||||||
],
|
],
|
||||||
"slot_index": 0,
|
"slot_index": 0,
|
||||||
"shape": 3
|
"shape": 3
|
||||||
@ -46,16 +45,16 @@
|
|||||||
},
|
},
|
||||||
"size": {
|
"size": {
|
||||||
"0": 463.01251220703125,
|
"0": 463.01251220703125,
|
||||||
"1": 124
|
"1": 144
|
||||||
},
|
},
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 4,
|
"order": 5,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"inputs": [
|
"inputs": [
|
||||||
{
|
{
|
||||||
"name": "clip",
|
"name": "clip",
|
||||||
"type": "CLIP",
|
"type": "CLIP",
|
||||||
"link": 56
|
"link": 108
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -63,10 +62,15 @@
|
|||||||
"name": "conditioning",
|
"name": "conditioning",
|
||||||
"type": "CONDITIONING",
|
"type": "CONDITIONING",
|
||||||
"links": [
|
"links": [
|
||||||
86
|
111
|
||||||
],
|
],
|
||||||
"slot_index": 0,
|
"slot_index": 0,
|
||||||
"shape": 3
|
"shape": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"properties": {
|
"properties": {
|
||||||
@ -87,7 +91,7 @@
|
|||||||
},
|
},
|
||||||
"size": [
|
"size": [
|
||||||
855.81494140625,
|
855.81494140625,
|
||||||
927.6441243489584
|
881.2099609375
|
||||||
],
|
],
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 8,
|
"order": 8,
|
||||||
@ -101,17 +105,20 @@
|
|||||||
{
|
{
|
||||||
"name": "audio",
|
"name": "audio",
|
||||||
"type": "AUDIO",
|
"type": "AUDIO",
|
||||||
"link": null
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "meta_batch",
|
"name": "meta_batch",
|
||||||
"type": "VHS_BatchManager",
|
"type": "VHS_BatchManager",
|
||||||
"link": null
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "vae",
|
"name": "vae",
|
||||||
"type": "VAE",
|
"type": "VAE",
|
||||||
"link": null
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -139,7 +146,7 @@
|
|||||||
"hidden": false,
|
"hidden": false,
|
||||||
"paused": false,
|
"paused": false,
|
||||||
"params": {
|
"params": {
|
||||||
"filename": "CogVideoX_Fun_00012.mp4",
|
"filename": "CogVideoX_Fun_00003.mp4",
|
||||||
"subfolder": "",
|
"subfolder": "",
|
||||||
"type": "temp",
|
"type": "temp",
|
||||||
"format": "video/h264-mp4",
|
"format": "video/h264-mp4",
|
||||||
@ -149,61 +156,12 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"id": 11,
|
|
||||||
"type": "CogVideoDecode",
|
|
||||||
"pos": {
|
|
||||||
"0": 1448,
|
|
||||||
"1": 345
|
|
||||||
},
|
|
||||||
"size": {
|
|
||||||
"0": 300.396484375,
|
|
||||||
"1": 198
|
|
||||||
},
|
|
||||||
"flags": {},
|
|
||||||
"order": 7,
|
|
||||||
"mode": 0,
|
|
||||||
"inputs": [
|
|
||||||
{
|
|
||||||
"name": "pipeline",
|
|
||||||
"type": "COGVIDEOPIPE",
|
|
||||||
"link": 89
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "samples",
|
|
||||||
"type": "LATENT",
|
|
||||||
"link": 88
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "images",
|
|
||||||
"type": "IMAGE",
|
|
||||||
"links": [
|
|
||||||
97
|
|
||||||
],
|
|
||||||
"slot_index": 0,
|
|
||||||
"shape": 3
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"properties": {
|
|
||||||
"Node name for S&R": "CogVideoDecode"
|
|
||||||
},
|
|
||||||
"widgets_values": [
|
|
||||||
true,
|
|
||||||
240,
|
|
||||||
360,
|
|
||||||
0.2,
|
|
||||||
0.2,
|
|
||||||
true
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": 36,
|
"id": 36,
|
||||||
"type": "LoadImage",
|
"type": "LoadImage",
|
||||||
"pos": {
|
"pos": {
|
||||||
"0": 364,
|
"0": 227,
|
||||||
"1": 715
|
"1": 700
|
||||||
},
|
},
|
||||||
"size": {
|
"size": {
|
||||||
"0": 391.3421325683594,
|
"0": 391.3421325683594,
|
||||||
@ -242,15 +200,15 @@
|
|||||||
"id": 37,
|
"id": 37,
|
||||||
"type": "ImageResizeKJ",
|
"type": "ImageResizeKJ",
|
||||||
"pos": {
|
"pos": {
|
||||||
"0": 824,
|
"0": 688,
|
||||||
"1": 715
|
"1": 708
|
||||||
},
|
},
|
||||||
"size": {
|
"size": {
|
||||||
"0": 315,
|
"0": 315,
|
||||||
"1": 266
|
"1": 266
|
||||||
},
|
},
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 5,
|
"order": 4,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"inputs": [
|
"inputs": [
|
||||||
{
|
{
|
||||||
@ -261,7 +219,8 @@
|
|||||||
{
|
{
|
||||||
"name": "get_image_size",
|
"name": "get_image_size",
|
||||||
"type": "IMAGE",
|
"type": "IMAGE",
|
||||||
"link": null
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "width_input",
|
"name": "width_input",
|
||||||
@ -285,7 +244,7 @@
|
|||||||
"name": "IMAGE",
|
"name": "IMAGE",
|
||||||
"type": "IMAGE",
|
"type": "IMAGE",
|
||||||
"links": [
|
"links": [
|
||||||
87
|
112
|
||||||
],
|
],
|
||||||
"slot_index": 0,
|
"slot_index": 0,
|
||||||
"shape": 3
|
"shape": 3
|
||||||
@ -317,6 +276,55 @@
|
|||||||
"disabled"
|
"disabled"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"id": 11,
|
||||||
|
"type": "CogVideoDecode",
|
||||||
|
"pos": {
|
||||||
|
"0": 1477,
|
||||||
|
"1": 344
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 300.396484375,
|
||||||
|
"1": 198
|
||||||
|
},
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "pipeline",
|
||||||
|
"type": "COGVIDEOPIPE",
|
||||||
|
"link": 113
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 114
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [
|
||||||
|
97
|
||||||
|
],
|
||||||
|
"slot_index": 0,
|
||||||
|
"shape": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CogVideoDecode"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
true,
|
||||||
|
240,
|
||||||
|
360,
|
||||||
|
0.2,
|
||||||
|
0.2,
|
||||||
|
true
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": 30,
|
"id": 30,
|
||||||
"type": "CogVideoTextEncode",
|
"type": "CogVideoTextEncode",
|
||||||
@ -343,10 +351,18 @@
|
|||||||
"name": "conditioning",
|
"name": "conditioning",
|
||||||
"type": "CONDITIONING",
|
"type": "CONDITIONING",
|
||||||
"links": [
|
"links": [
|
||||||
85
|
110
|
||||||
],
|
],
|
||||||
"slot_index": 0,
|
"slot_index": 0,
|
||||||
"shape": 3
|
"shape": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [
|
||||||
|
108
|
||||||
|
],
|
||||||
|
"slot_index": 1
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"properties": {
|
"properties": {
|
||||||
@ -355,55 +371,19 @@
|
|||||||
"widgets_values": [
|
"widgets_values": [
|
||||||
"majestic stag grazing in a forest and basking in the setting sun",
|
"majestic stag grazing in a forest and basking in the setting sun",
|
||||||
1,
|
1,
|
||||||
true
|
false
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 48,
|
"id": 51,
|
||||||
"type": "DownloadAndLoadCogVideoGGUFModel",
|
|
||||||
"pos": {
|
|
||||||
"0": 584,
|
|
||||||
"1": 103
|
|
||||||
},
|
|
||||||
"size": {
|
|
||||||
"0": 378,
|
|
||||||
"1": 130
|
|
||||||
},
|
|
||||||
"flags": {},
|
|
||||||
"order": 2,
|
|
||||||
"mode": 0,
|
|
||||||
"inputs": [],
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "cogvideo_pipe",
|
|
||||||
"type": "COGVIDEOPIPE",
|
|
||||||
"links": [
|
|
||||||
101
|
|
||||||
],
|
|
||||||
"shape": 3,
|
|
||||||
"slot_index": 0
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"properties": {
|
|
||||||
"Node name for S&R": "DownloadAndLoadCogVideoGGUFModel"
|
|
||||||
},
|
|
||||||
"widgets_values": [
|
|
||||||
"CogVideoX_5b_fun_GGUF_Q4_0.safetensors",
|
|
||||||
"bf16",
|
|
||||||
false,
|
|
||||||
"offload_device"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 41,
|
|
||||||
"type": "CogVideoXFunSampler",
|
"type": "CogVideoXFunSampler",
|
||||||
"pos": {
|
"pos": {
|
||||||
"0": 1058,
|
"0": 1058,
|
||||||
"1": 345
|
"1": 345
|
||||||
},
|
},
|
||||||
"size": {
|
"size": {
|
||||||
"0": 315,
|
"0": 367.79998779296875,
|
||||||
"1": 302
|
"1": 434
|
||||||
},
|
},
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 6,
|
"order": 6,
|
||||||
@ -412,32 +392,53 @@
|
|||||||
{
|
{
|
||||||
"name": "pipeline",
|
"name": "pipeline",
|
||||||
"type": "COGVIDEOPIPE",
|
"type": "COGVIDEOPIPE",
|
||||||
"link": 101
|
"link": 109
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "positive",
|
"name": "positive",
|
||||||
"type": "CONDITIONING",
|
"type": "CONDITIONING",
|
||||||
"link": 85
|
"link": 110
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "negative",
|
"name": "negative",
|
||||||
"type": "CONDITIONING",
|
"type": "CONDITIONING",
|
||||||
"link": 86
|
"link": 111
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "start_img",
|
"name": "start_img",
|
||||||
"type": "IMAGE",
|
"type": "IMAGE",
|
||||||
"link": 87
|
"link": 112,
|
||||||
|
"shape": 7
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "end_img",
|
"name": "end_img",
|
||||||
"type": "IMAGE",
|
"type": "IMAGE",
|
||||||
"link": null
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "opt_empty_latent",
|
"name": "context_options",
|
||||||
"type": "LATENT",
|
"type": "COGCONTEXT",
|
||||||
"link": null
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "tora_trajectory",
|
||||||
|
"type": "TORAFEATURES",
|
||||||
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "fastercache",
|
||||||
|
"type": "FASTERCACHEARGS",
|
||||||
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vid2vid_images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -445,18 +446,15 @@
|
|||||||
"name": "cogvideo_pipe",
|
"name": "cogvideo_pipe",
|
||||||
"type": "COGVIDEOPIPE",
|
"type": "COGVIDEOPIPE",
|
||||||
"links": [
|
"links": [
|
||||||
89
|
113
|
||||||
],
|
]
|
||||||
"slot_index": 0,
|
|
||||||
"shape": 3
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "samples",
|
"name": "samples",
|
||||||
"type": "LATENT",
|
"type": "LATENT",
|
||||||
"links": [
|
"links": [
|
||||||
88
|
114
|
||||||
],
|
]
|
||||||
"shape": 3
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"properties": {
|
"properties": {
|
||||||
@ -464,12 +462,66 @@
|
|||||||
},
|
},
|
||||||
"widgets_values": [
|
"widgets_values": [
|
||||||
49,
|
49,
|
||||||
512,
|
720,
|
||||||
44,
|
480,
|
||||||
"fixed",
|
43,
|
||||||
30,
|
"randomize",
|
||||||
|
50,
|
||||||
6,
|
6,
|
||||||
"CogVideoXDPMScheduler"
|
"DDIM",
|
||||||
|
0.0563,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 48,
|
||||||
|
"type": "DownloadAndLoadCogVideoGGUFModel",
|
||||||
|
"pos": {
|
||||||
|
"0": 585,
|
||||||
|
"1": 34
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 378,
|
||||||
|
"1": 198
|
||||||
|
},
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "pab_config",
|
||||||
|
"type": "PAB_CONFIG",
|
||||||
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "block_edit",
|
||||||
|
"type": "TRANSFORMERBLOCKS",
|
||||||
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "cogvideo_pipe",
|
||||||
|
"type": "COGVIDEOPIPE",
|
||||||
|
"links": [
|
||||||
|
109
|
||||||
|
],
|
||||||
|
"slot_index": 0,
|
||||||
|
"shape": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "DownloadAndLoadCogVideoGGUFModel"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"CogVideoX_5b_fun_1_1_GGUF_Q4_0.safetensors",
|
||||||
|
"bf16",
|
||||||
|
false,
|
||||||
|
"offload_device",
|
||||||
|
false,
|
||||||
|
"disabled"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -482,14 +534,6 @@
|
|||||||
0,
|
0,
|
||||||
"CLIP"
|
"CLIP"
|
||||||
],
|
],
|
||||||
[
|
|
||||||
56,
|
|
||||||
20,
|
|
||||||
0,
|
|
||||||
31,
|
|
||||||
0,
|
|
||||||
"CLIP"
|
|
||||||
],
|
|
||||||
[
|
[
|
||||||
71,
|
71,
|
||||||
36,
|
36,
|
||||||
@ -498,46 +542,6 @@
|
|||||||
0,
|
0,
|
||||||
"IMAGE"
|
"IMAGE"
|
||||||
],
|
],
|
||||||
[
|
|
||||||
85,
|
|
||||||
30,
|
|
||||||
0,
|
|
||||||
41,
|
|
||||||
1,
|
|
||||||
"CONDITIONING"
|
|
||||||
],
|
|
||||||
[
|
|
||||||
86,
|
|
||||||
31,
|
|
||||||
0,
|
|
||||||
41,
|
|
||||||
2,
|
|
||||||
"CONDITIONING"
|
|
||||||
],
|
|
||||||
[
|
|
||||||
87,
|
|
||||||
37,
|
|
||||||
0,
|
|
||||||
41,
|
|
||||||
3,
|
|
||||||
"IMAGE"
|
|
||||||
],
|
|
||||||
[
|
|
||||||
88,
|
|
||||||
41,
|
|
||||||
1,
|
|
||||||
11,
|
|
||||||
1,
|
|
||||||
"LATENT"
|
|
||||||
],
|
|
||||||
[
|
|
||||||
89,
|
|
||||||
41,
|
|
||||||
0,
|
|
||||||
11,
|
|
||||||
0,
|
|
||||||
"COGVIDEOPIPE"
|
|
||||||
],
|
|
||||||
[
|
[
|
||||||
97,
|
97,
|
||||||
11,
|
11,
|
||||||
@ -547,22 +551,70 @@
|
|||||||
"IMAGE"
|
"IMAGE"
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
101,
|
108,
|
||||||
|
30,
|
||||||
|
1,
|
||||||
|
31,
|
||||||
|
0,
|
||||||
|
"CLIP"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
109,
|
||||||
48,
|
48,
|
||||||
0,
|
0,
|
||||||
41,
|
51,
|
||||||
0,
|
0,
|
||||||
"COGVIDEOPIPE"
|
"COGVIDEOPIPE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
110,
|
||||||
|
30,
|
||||||
|
0,
|
||||||
|
51,
|
||||||
|
1,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
111,
|
||||||
|
31,
|
||||||
|
0,
|
||||||
|
51,
|
||||||
|
2,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
112,
|
||||||
|
37,
|
||||||
|
0,
|
||||||
|
51,
|
||||||
|
3,
|
||||||
|
"IMAGE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
113,
|
||||||
|
51,
|
||||||
|
0,
|
||||||
|
11,
|
||||||
|
0,
|
||||||
|
"COGVIDEOPIPE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
114,
|
||||||
|
51,
|
||||||
|
1,
|
||||||
|
11,
|
||||||
|
1,
|
||||||
|
"LATENT"
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
"groups": [],
|
"groups": [],
|
||||||
"config": {},
|
"config": {},
|
||||||
"extra": {
|
"extra": {
|
||||||
"ds": {
|
"ds": {
|
||||||
"scale": 0.7627768444385654,
|
"scale": 0.7513148009015784,
|
||||||
"offset": [
|
"offset": [
|
||||||
62.58315607223924,
|
724.7448506313632,
|
||||||
102.05205752424705
|
128.336592104936
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"last_node_id": 45,
|
"last_node_id": 47,
|
||||||
"last_link_id": 97,
|
"last_link_id": 110,
|
||||||
"nodes": [
|
"nodes": [
|
||||||
{
|
{
|
||||||
"id": 20,
|
"id": 20,
|
||||||
@ -22,8 +22,7 @@
|
|||||||
"name": "CLIP",
|
"name": "CLIP",
|
||||||
"type": "CLIP",
|
"type": "CLIP",
|
||||||
"links": [
|
"links": [
|
||||||
54,
|
54
|
||||||
56
|
|
||||||
],
|
],
|
||||||
"slot_index": 0,
|
"slot_index": 0,
|
||||||
"shape": 3
|
"shape": 3
|
||||||
@ -37,85 +36,6 @@
|
|||||||
"sd3"
|
"sd3"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"id": 37,
|
|
||||||
"type": "ImageResizeKJ",
|
|
||||||
"pos": {
|
|
||||||
"0": 824,
|
|
||||||
"1": 715
|
|
||||||
},
|
|
||||||
"size": {
|
|
||||||
"0": 315,
|
|
||||||
"1": 266
|
|
||||||
},
|
|
||||||
"flags": {},
|
|
||||||
"order": 5,
|
|
||||||
"mode": 0,
|
|
||||||
"inputs": [
|
|
||||||
{
|
|
||||||
"name": "image",
|
|
||||||
"type": "IMAGE",
|
|
||||||
"link": 71
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "get_image_size",
|
|
||||||
"type": "IMAGE",
|
|
||||||
"link": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "width_input",
|
|
||||||
"type": "INT",
|
|
||||||
"link": null,
|
|
||||||
"widget": {
|
|
||||||
"name": "width_input"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "height_input",
|
|
||||||
"type": "INT",
|
|
||||||
"link": null,
|
|
||||||
"widget": {
|
|
||||||
"name": "height_input"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "IMAGE",
|
|
||||||
"type": "IMAGE",
|
|
||||||
"links": [
|
|
||||||
87
|
|
||||||
],
|
|
||||||
"slot_index": 0,
|
|
||||||
"shape": 3
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "width",
|
|
||||||
"type": "INT",
|
|
||||||
"links": null,
|
|
||||||
"shape": 3
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "height",
|
|
||||||
"type": "INT",
|
|
||||||
"links": null,
|
|
||||||
"shape": 3
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"properties": {
|
|
||||||
"Node name for S&R": "ImageResizeKJ"
|
|
||||||
},
|
|
||||||
"widgets_values": [
|
|
||||||
720,
|
|
||||||
480,
|
|
||||||
"nearest-exact",
|
|
||||||
false,
|
|
||||||
2,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
"disabled"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": 11,
|
"id": 11,
|
||||||
"type": "CogVideoDecode",
|
"type": "CogVideoDecode",
|
||||||
@ -134,12 +54,12 @@
|
|||||||
{
|
{
|
||||||
"name": "pipeline",
|
"name": "pipeline",
|
||||||
"type": "COGVIDEOPIPE",
|
"type": "COGVIDEOPIPE",
|
||||||
"link": 89
|
"link": 108
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "samples",
|
"name": "samples",
|
||||||
"type": "LATENT",
|
"type": "LATENT",
|
||||||
"link": 88
|
"link": 109
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -165,43 +85,6 @@
|
|||||||
true
|
true
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"id": 1,
|
|
||||||
"type": "DownloadAndLoadCogVideoModel",
|
|
||||||
"pos": {
|
|
||||||
"0": 642,
|
|
||||||
"1": 90
|
|
||||||
},
|
|
||||||
"size": {
|
|
||||||
"0": 337.8885192871094,
|
|
||||||
"1": 154
|
|
||||||
},
|
|
||||||
"flags": {},
|
|
||||||
"order": 1,
|
|
||||||
"mode": 0,
|
|
||||||
"inputs": [],
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "cogvideo_pipe",
|
|
||||||
"type": "COGVIDEOPIPE",
|
|
||||||
"links": [
|
|
||||||
84
|
|
||||||
],
|
|
||||||
"slot_index": 0,
|
|
||||||
"shape": 3
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"properties": {
|
|
||||||
"Node name for S&R": "DownloadAndLoadCogVideoModel"
|
|
||||||
},
|
|
||||||
"widgets_values": [
|
|
||||||
"kijai/CogVideoX-Fun-5b",
|
|
||||||
"bf16",
|
|
||||||
"disabled",
|
|
||||||
"disabled",
|
|
||||||
false
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": 31,
|
"id": 31,
|
||||||
"type": "CogVideoTextEncode",
|
"type": "CogVideoTextEncode",
|
||||||
@ -211,16 +94,16 @@
|
|||||||
},
|
},
|
||||||
"size": {
|
"size": {
|
||||||
"0": 463.01251220703125,
|
"0": 463.01251220703125,
|
||||||
"1": 98.10446166992188
|
"1": 144
|
||||||
},
|
},
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 4,
|
"order": 5,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"inputs": [
|
"inputs": [
|
||||||
{
|
{
|
||||||
"name": "clip",
|
"name": "clip",
|
||||||
"type": "CLIP",
|
"type": "CLIP",
|
||||||
"link": 56
|
"link": 110
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -228,17 +111,24 @@
|
|||||||
"name": "conditioning",
|
"name": "conditioning",
|
||||||
"type": "CONDITIONING",
|
"type": "CONDITIONING",
|
||||||
"links": [
|
"links": [
|
||||||
86
|
106
|
||||||
],
|
],
|
||||||
"slot_index": 0,
|
"slot_index": 0,
|
||||||
"shape": 3
|
"shape": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"properties": {
|
"properties": {
|
||||||
"Node name for S&R": "CogVideoTextEncode"
|
"Node name for S&R": "CogVideoTextEncode"
|
||||||
},
|
},
|
||||||
"widgets_values": [
|
"widgets_values": [
|
||||||
"The video is not of a high quality, it has a low resolution. Watermark present in each frame. Strange motion trajectory. "
|
"The video is not of a high quality, it has a low resolution. Watermark present in each frame. Strange motion trajectory. ",
|
||||||
|
1,
|
||||||
|
true
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -249,8 +139,8 @@
|
|||||||
"1": 345
|
"1": 345
|
||||||
},
|
},
|
||||||
"size": [
|
"size": [
|
||||||
605.3909898931465,
|
605.3909912109375,
|
||||||
724.5306772953109
|
714.2606608072917
|
||||||
],
|
],
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 8,
|
"order": 8,
|
||||||
@ -264,17 +154,20 @@
|
|||||||
{
|
{
|
||||||
"name": "audio",
|
"name": "audio",
|
||||||
"type": "AUDIO",
|
"type": "AUDIO",
|
||||||
"link": null
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "meta_batch",
|
"name": "meta_batch",
|
||||||
"type": "VHS_BatchManager",
|
"type": "VHS_BatchManager",
|
||||||
"link": null
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "vae",
|
"name": "vae",
|
||||||
"type": "VAE",
|
"type": "VAE",
|
||||||
"link": null
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -302,7 +195,7 @@
|
|||||||
"hidden": false,
|
"hidden": false,
|
||||||
"paused": false,
|
"paused": false,
|
||||||
"params": {
|
"params": {
|
||||||
"filename": "CogVideoX_Fun_00003.mp4",
|
"filename": "CogVideoX_Fun_00001.mp4",
|
||||||
"subfolder": "",
|
"subfolder": "",
|
||||||
"type": "temp",
|
"type": "temp",
|
||||||
"format": "video/h264-mp4",
|
"format": "video/h264-mp4",
|
||||||
@ -313,15 +206,191 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 41,
|
"id": 36,
|
||||||
"type": "CogVideoXFunSampler",
|
"type": "LoadImage",
|
||||||
"pos": {
|
"pos": {
|
||||||
"0": 1058,
|
"0": 325,
|
||||||
"1": 345
|
"1": 715
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 432.4361877441406,
|
||||||
|
"1": 361.0254211425781
|
||||||
|
},
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [
|
||||||
|
71
|
||||||
|
],
|
||||||
|
"slot_index": 0,
|
||||||
|
"shape": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MASK",
|
||||||
|
"type": "MASK",
|
||||||
|
"links": null,
|
||||||
|
"shape": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "LoadImage"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"6e1a7befce6daa63fc01cb66c1a22ed0.jpg",
|
||||||
|
"image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "DownloadAndLoadCogVideoModel",
|
||||||
|
"pos": {
|
||||||
|
"0": 602,
|
||||||
|
"1": 53
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 337.8885192871094,
|
||||||
|
"1": 194
|
||||||
|
},
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "pab_config",
|
||||||
|
"type": "PAB_CONFIG",
|
||||||
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "block_edit",
|
||||||
|
"type": "TRANSFORMERBLOCKS",
|
||||||
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "lora",
|
||||||
|
"type": "COGLORA",
|
||||||
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "cogvideo_pipe",
|
||||||
|
"type": "COGVIDEOPIPE",
|
||||||
|
"links": [
|
||||||
|
104
|
||||||
|
],
|
||||||
|
"slot_index": 0,
|
||||||
|
"shape": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "DownloadAndLoadCogVideoModel"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"kijai/CogVideoX-Fun-5b",
|
||||||
|
"bf16",
|
||||||
|
"disabled",
|
||||||
|
"disabled",
|
||||||
|
false
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 37,
|
||||||
|
"type": "ImageResizeKJ",
|
||||||
|
"pos": {
|
||||||
|
"0": 824,
|
||||||
|
"1": 715
|
||||||
},
|
},
|
||||||
"size": {
|
"size": {
|
||||||
"0": 315,
|
"0": 315,
|
||||||
"1": 282
|
"1": 266
|
||||||
|
},
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "image",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 71
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "get_image_size",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "width_input",
|
||||||
|
"type": "INT",
|
||||||
|
"link": null,
|
||||||
|
"widget": {
|
||||||
|
"name": "width_input"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "height_input",
|
||||||
|
"type": "INT",
|
||||||
|
"link": null,
|
||||||
|
"widget": {
|
||||||
|
"name": "height_input"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [
|
||||||
|
107
|
||||||
|
],
|
||||||
|
"slot_index": 0,
|
||||||
|
"shape": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "width",
|
||||||
|
"type": "INT",
|
||||||
|
"links": null,
|
||||||
|
"shape": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "height",
|
||||||
|
"type": "INT",
|
||||||
|
"links": null,
|
||||||
|
"shape": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "ImageResizeKJ"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
720,
|
||||||
|
480,
|
||||||
|
"lanczos",
|
||||||
|
false,
|
||||||
|
2,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
"disabled"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 47,
|
||||||
|
"type": "CogVideoXFunSampler",
|
||||||
|
"pos": {
|
||||||
|
"0": 1068,
|
||||||
|
"1": 198
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 367.79998779296875,
|
||||||
|
"1": 434
|
||||||
},
|
},
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 6,
|
"order": 6,
|
||||||
@ -330,27 +399,53 @@
|
|||||||
{
|
{
|
||||||
"name": "pipeline",
|
"name": "pipeline",
|
||||||
"type": "COGVIDEOPIPE",
|
"type": "COGVIDEOPIPE",
|
||||||
"link": 84
|
"link": 104
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "positive",
|
"name": "positive",
|
||||||
"type": "CONDITIONING",
|
"type": "CONDITIONING",
|
||||||
"link": 85
|
"link": 105
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "negative",
|
"name": "negative",
|
||||||
"type": "CONDITIONING",
|
"type": "CONDITIONING",
|
||||||
"link": 86
|
"link": 106
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "start_img",
|
"name": "start_img",
|
||||||
"type": "IMAGE",
|
"type": "IMAGE",
|
||||||
"link": 87
|
"link": 107,
|
||||||
|
"shape": 7
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "end_img",
|
"name": "end_img",
|
||||||
"type": "IMAGE",
|
"type": "IMAGE",
|
||||||
"link": null
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "context_options",
|
||||||
|
"type": "COGCONTEXT",
|
||||||
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "tora_trajectory",
|
||||||
|
"type": "TORAFEATURES",
|
||||||
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "fastercache",
|
||||||
|
"type": "FASTERCACHEARGS",
|
||||||
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vid2vid_images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": null,
|
||||||
|
"shape": 7
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -358,18 +453,15 @@
|
|||||||
"name": "cogvideo_pipe",
|
"name": "cogvideo_pipe",
|
||||||
"type": "COGVIDEOPIPE",
|
"type": "COGVIDEOPIPE",
|
||||||
"links": [
|
"links": [
|
||||||
89
|
108
|
||||||
],
|
]
|
||||||
"slot_index": 0,
|
|
||||||
"shape": 3
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "samples",
|
"name": "samples",
|
||||||
"type": "LATENT",
|
"type": "LATENT",
|
||||||
"links": [
|
"links": [
|
||||||
88
|
109
|
||||||
],
|
]
|
||||||
"shape": 3
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"properties": {
|
"properties": {
|
||||||
@ -377,12 +469,15 @@
|
|||||||
},
|
},
|
||||||
"widgets_values": [
|
"widgets_values": [
|
||||||
49,
|
49,
|
||||||
512,
|
720,
|
||||||
|
480,
|
||||||
43,
|
43,
|
||||||
"fixed",
|
"fixed",
|
||||||
30,
|
50,
|
||||||
6,
|
6,
|
||||||
"DPM++"
|
"DDIM",
|
||||||
|
0.0563,
|
||||||
|
1
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -411,57 +506,27 @@
|
|||||||
"name": "conditioning",
|
"name": "conditioning",
|
||||||
"type": "CONDITIONING",
|
"type": "CONDITIONING",
|
||||||
"links": [
|
"links": [
|
||||||
85
|
105
|
||||||
],
|
],
|
||||||
"slot_index": 0,
|
"slot_index": 0,
|
||||||
"shape": 3
|
"shape": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [
|
||||||
|
110
|
||||||
|
],
|
||||||
|
"slot_index": 1
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"properties": {
|
"properties": {
|
||||||
"Node name for S&R": "CogVideoTextEncode"
|
"Node name for S&R": "CogVideoTextEncode"
|
||||||
},
|
},
|
||||||
"widgets_values": [
|
"widgets_values": [
|
||||||
"fireworks display over night city. The video is of high quality, and the view is very clear. High quality, masterpiece, best quality, highres, ultra-detailed, fantastic."
|
"fireworks display over night city. The video is of high quality, and the view is very clear. High quality, masterpiece, best quality, highres, ultra-detailed, fantastic.",
|
||||||
]
|
1,
|
||||||
},
|
false
|
||||||
{
|
|
||||||
"id": 36,
|
|
||||||
"type": "LoadImage",
|
|
||||||
"pos": {
|
|
||||||
"0": 325,
|
|
||||||
"1": 715
|
|
||||||
},
|
|
||||||
"size": {
|
|
||||||
"0": 432.4361877441406,
|
|
||||||
"1": 361.0254211425781
|
|
||||||
},
|
|
||||||
"flags": {},
|
|
||||||
"order": 2,
|
|
||||||
"mode": 0,
|
|
||||||
"inputs": [],
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "IMAGE",
|
|
||||||
"type": "IMAGE",
|
|
||||||
"links": [
|
|
||||||
71
|
|
||||||
],
|
|
||||||
"slot_index": 0,
|
|
||||||
"shape": 3
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "MASK",
|
|
||||||
"type": "MASK",
|
|
||||||
"links": null,
|
|
||||||
"shape": 3
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"properties": {
|
|
||||||
"Node name for S&R": "LoadImage"
|
|
||||||
},
|
|
||||||
"widgets_values": [
|
|
||||||
"6e1a7befce6daa63fc01cb66c1a22ed0.jpg",
|
|
||||||
"image"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -474,14 +539,6 @@
|
|||||||
0,
|
0,
|
||||||
"CLIP"
|
"CLIP"
|
||||||
],
|
],
|
||||||
[
|
|
||||||
56,
|
|
||||||
20,
|
|
||||||
0,
|
|
||||||
31,
|
|
||||||
0,
|
|
||||||
"CLIP"
|
|
||||||
],
|
|
||||||
[
|
[
|
||||||
71,
|
71,
|
||||||
36,
|
36,
|
||||||
@ -490,54 +547,6 @@
|
|||||||
0,
|
0,
|
||||||
"IMAGE"
|
"IMAGE"
|
||||||
],
|
],
|
||||||
[
|
|
||||||
84,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
41,
|
|
||||||
0,
|
|
||||||
"COGVIDEOPIPE"
|
|
||||||
],
|
|
||||||
[
|
|
||||||
85,
|
|
||||||
30,
|
|
||||||
0,
|
|
||||||
41,
|
|
||||||
1,
|
|
||||||
"CONDITIONING"
|
|
||||||
],
|
|
||||||
[
|
|
||||||
86,
|
|
||||||
31,
|
|
||||||
0,
|
|
||||||
41,
|
|
||||||
2,
|
|
||||||
"CONDITIONING"
|
|
||||||
],
|
|
||||||
[
|
|
||||||
87,
|
|
||||||
37,
|
|
||||||
0,
|
|
||||||
41,
|
|
||||||
3,
|
|
||||||
"IMAGE"
|
|
||||||
],
|
|
||||||
[
|
|
||||||
88,
|
|
||||||
41,
|
|
||||||
1,
|
|
||||||
11,
|
|
||||||
1,
|
|
||||||
"LATENT"
|
|
||||||
],
|
|
||||||
[
|
|
||||||
89,
|
|
||||||
41,
|
|
||||||
0,
|
|
||||||
11,
|
|
||||||
0,
|
|
||||||
"COGVIDEOPIPE"
|
|
||||||
],
|
|
||||||
[
|
[
|
||||||
97,
|
97,
|
||||||
11,
|
11,
|
||||||
@ -545,16 +554,72 @@
|
|||||||
44,
|
44,
|
||||||
0,
|
0,
|
||||||
"IMAGE"
|
"IMAGE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
104,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
47,
|
||||||
|
0,
|
||||||
|
"COGVIDEOPIPE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
105,
|
||||||
|
30,
|
||||||
|
0,
|
||||||
|
47,
|
||||||
|
1,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
106,
|
||||||
|
31,
|
||||||
|
0,
|
||||||
|
47,
|
||||||
|
2,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
107,
|
||||||
|
37,
|
||||||
|
0,
|
||||||
|
47,
|
||||||
|
3,
|
||||||
|
"IMAGE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
108,
|
||||||
|
47,
|
||||||
|
0,
|
||||||
|
11,
|
||||||
|
0,
|
||||||
|
"COGVIDEOPIPE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
109,
|
||||||
|
47,
|
||||||
|
1,
|
||||||
|
11,
|
||||||
|
1,
|
||||||
|
"LATENT"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
110,
|
||||||
|
30,
|
||||||
|
1,
|
||||||
|
31,
|
||||||
|
0,
|
||||||
|
"CLIP"
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
"groups": [],
|
"groups": [],
|
||||||
"config": {},
|
"config": {},
|
||||||
"extra": {
|
"extra": {
|
||||||
"ds": {
|
"ds": {
|
||||||
"scale": 0.8264462809917361,
|
"scale": 0.8264462809917363,
|
||||||
"offset": [
|
"offset": [
|
||||||
97.64239267521098,
|
245.90746806300405,
|
||||||
39.894747674006986
|
108.93624646284617
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
341
nodes.py
341
nodes.py
@ -101,7 +101,33 @@ class CogVideoPABConfig:
|
|||||||
|
|
||||||
return (pab_config, )
|
return (pab_config, )
|
||||||
|
|
||||||
|
class CogVideoContextOptions:
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {"required": {
|
||||||
|
"context_schedule": (["uniform_standard", "uniform_looped", "static_standard", "temporal_tiling"],),
|
||||||
|
"context_frames": ("INT", {"default": 48, "min": 2, "max": 100, "step": 1, "tooltip": "Number of pixel frames in the context, NOTE: the latent space has 4 frames in 1"} ),
|
||||||
|
"context_stride": ("INT", {"default": 4, "min": 4, "max": 100, "step": 1, "tooltip": "Context stride as pixel frames, NOTE: the latent space has 4 frames in 1"} ),
|
||||||
|
"context_overlap": ("INT", {"default": 4, "min": 4, "max": 100, "step": 1, "tooltip": "Context overlap as pixel frames, NOTE: the latent space has 4 frames in 1"} ),
|
||||||
|
"freenoise": ("BOOLEAN", {"default": True, "tooltip": "Shuffle the noise"}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RETURN_TYPES = ("COGCONTEXT", )
|
||||||
|
RETURN_NAMES = ("context_options",)
|
||||||
|
FUNCTION = "process"
|
||||||
|
CATEGORY = "CogVideoWrapper"
|
||||||
|
|
||||||
|
def process(self, context_schedule, context_frames, context_stride, context_overlap, freenoise):
|
||||||
|
context_options = {
|
||||||
|
"context_schedule":context_schedule,
|
||||||
|
"context_frames":context_frames,
|
||||||
|
"context_stride":context_stride,
|
||||||
|
"context_overlap":context_overlap,
|
||||||
|
"freenoise":freenoise
|
||||||
|
}
|
||||||
|
|
||||||
|
return (context_options,)
|
||||||
|
|
||||||
class CogVideoTransformerEdit:
|
class CogVideoTransformerEdit:
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -155,7 +181,8 @@ class CogVideoLoraSelect:
|
|||||||
cog_loras_list.append(cog_lora)
|
cog_loras_list.append(cog_lora)
|
||||||
print(cog_loras_list)
|
print(cog_loras_list)
|
||||||
return (cog_loras_list,)
|
return (cog_loras_list,)
|
||||||
|
|
||||||
|
#region TextEncode
|
||||||
class CogVideoEncodePrompt:
|
class CogVideoEncodePrompt:
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def INPUT_TYPES(s):
|
||||||
@ -257,8 +284,8 @@ class CogVideoTextEncode:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RETURN_TYPES = ("CONDITIONING",)
|
RETURN_TYPES = ("CONDITIONING", "CLIP",)
|
||||||
RETURN_NAMES = ("conditioning",)
|
RETURN_NAMES = ("conditioning", "clip")
|
||||||
FUNCTION = "process"
|
FUNCTION = "process"
|
||||||
CATEGORY = "CogVideoWrapper"
|
CATEGORY = "CogVideoWrapper"
|
||||||
|
|
||||||
@ -279,7 +306,7 @@ class CogVideoTextEncode:
|
|||||||
if force_offload:
|
if force_offload:
|
||||||
clip.cond_stage_model.to(offload_device)
|
clip.cond_stage_model.to(offload_device)
|
||||||
|
|
||||||
return (embeds, )
|
return (embeds, clip, )
|
||||||
|
|
||||||
class CogVideoTextEncodeCombine:
|
class CogVideoTextEncodeCombine:
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -311,7 +338,8 @@ class CogVideoTextEncodeCombine:
|
|||||||
raise ValueError("Invalid combination mode")
|
raise ValueError("Invalid combination mode")
|
||||||
|
|
||||||
return (embeds, )
|
return (embeds, )
|
||||||
|
|
||||||
|
#region ImageEncode
|
||||||
class CogVideoImageEncode:
|
class CogVideoImageEncode:
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def INPUT_TYPES(s):
|
||||||
@ -473,7 +501,8 @@ class CogVideoImageInterpolationEncode:
|
|||||||
vae.to(offload_device)
|
vae.to(offload_device)
|
||||||
|
|
||||||
return ({"samples": final_latents}, )
|
return ({"samples": final_latents}, )
|
||||||
|
|
||||||
|
#region Tora
|
||||||
from .tora.traj_utils import process_traj, scale_traj_list_to_256
|
from .tora.traj_utils import process_traj, scale_traj_list_to_256
|
||||||
from torchvision.utils import flow_to_image
|
from torchvision.utils import flow_to_image
|
||||||
|
|
||||||
@ -630,8 +659,94 @@ class ToraEncodeOpticalFlow:
|
|||||||
}
|
}
|
||||||
|
|
||||||
return (tora, )
|
return (tora, )
|
||||||
|
|
||||||
|
def add_noise_to_reference_video(image, ratio=None):
|
||||||
|
if ratio is None:
|
||||||
|
sigma = torch.normal(mean=-3.0, std=0.5, size=(image.shape[0],)).to(image.device)
|
||||||
|
sigma = torch.exp(sigma).to(image.dtype)
|
||||||
|
else:
|
||||||
|
sigma = torch.ones((image.shape[0],)).to(image.device, image.dtype) * ratio
|
||||||
|
|
||||||
|
image_noise = torch.randn_like(image) * sigma[:, None, None, None, None]
|
||||||
|
image_noise = torch.where(image==-1, torch.zeros_like(image), image_noise)
|
||||||
|
image = image + image_noise
|
||||||
|
return image
|
||||||
|
|
||||||
|
class CogVideoControlImageEncode:
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {"required": {
|
||||||
|
"pipeline": ("COGVIDEOPIPE",),
|
||||||
|
"control_video": ("IMAGE", ),
|
||||||
|
"base_resolution": ("INT", {"min": 64, "max": 1280, "step": 64, "default": 512, "tooltip": "Base resolution, closest training data bucket resolution is chosen based on the selection."}),
|
||||||
|
"enable_tiling": ("BOOLEAN", {"default": False, "tooltip": "Enable tiling for the VAE to reduce memory usage"}),
|
||||||
|
"noise_aug_strength": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
RETURN_TYPES = ("COGCONTROL_LATENTS", "INT", "INT",)
|
||||||
|
RETURN_NAMES = ("control_latents", "width", "height")
|
||||||
|
FUNCTION = "encode"
|
||||||
|
CATEGORY = "CogVideoWrapper"
|
||||||
|
|
||||||
|
def encode(self, pipeline, control_video, base_resolution, enable_tiling, noise_aug_strength=0.0563):
|
||||||
|
device = mm.get_torch_device()
|
||||||
|
offload_device = mm.unet_offload_device()
|
||||||
|
|
||||||
|
B, H, W, C = control_video.shape
|
||||||
|
|
||||||
|
vae = pipeline["pipe"].vae
|
||||||
|
vae.enable_slicing()
|
||||||
|
|
||||||
|
if enable_tiling:
|
||||||
|
from .mz_enable_vae_encode_tiling import enable_vae_encode_tiling
|
||||||
|
enable_vae_encode_tiling(vae)
|
||||||
|
|
||||||
|
if not pipeline["cpu_offloading"]:
|
||||||
|
vae.to(device)
|
||||||
|
|
||||||
|
# Count most suitable height and width
|
||||||
|
aspect_ratio_sample_size = {key : [x / 512 * base_resolution for x in ASPECT_RATIO_512[key]] for key in ASPECT_RATIO_512.keys()}
|
||||||
|
|
||||||
|
control_video = np.array(control_video.cpu().numpy() * 255, np.uint8)
|
||||||
|
original_width, original_height = Image.fromarray(control_video[0]).size
|
||||||
|
|
||||||
|
closest_size, closest_ratio = get_closest_ratio(original_height, original_width, ratios=aspect_ratio_sample_size)
|
||||||
|
height, width = [int(x / 16) * 16 for x in closest_size]
|
||||||
|
log.info(f"Closest bucket size: {width}x{height}")
|
||||||
|
|
||||||
|
video_length = int((B - 1) // vae.config.temporal_compression_ratio * vae.config.temporal_compression_ratio) + 1 if B != 1 else 1
|
||||||
|
input_video, input_video_mask, clip_image = get_video_to_video_latent(control_video, video_length=video_length, sample_size=(height, width))
|
||||||
|
|
||||||
|
control_video = pipeline["pipe"].image_processor.preprocess(rearrange(input_video, "b c f h w -> (b f) c h w"), height=height, width=width)
|
||||||
|
control_video = control_video.to(dtype=torch.float32)
|
||||||
|
control_video = rearrange(control_video, "(b f) c h w -> b c f h w", f=video_length)
|
||||||
|
|
||||||
|
masked_image = control_video.to(device=device, dtype=vae.dtype)
|
||||||
|
if noise_aug_strength > 0:
|
||||||
|
masked_image = add_noise_to_reference_video(masked_image, ratio=noise_aug_strength)
|
||||||
|
bs = 1
|
||||||
|
new_mask_pixel_values = []
|
||||||
|
for i in range(0, masked_image.shape[0], bs):
|
||||||
|
mask_pixel_values_bs = masked_image[i : i + bs]
|
||||||
|
mask_pixel_values_bs = vae.encode(mask_pixel_values_bs)[0]
|
||||||
|
mask_pixel_values_bs = mask_pixel_values_bs.mode()
|
||||||
|
new_mask_pixel_values.append(mask_pixel_values_bs)
|
||||||
|
masked_image_latents = torch.cat(new_mask_pixel_values, dim = 0)
|
||||||
|
masked_image_latents = masked_image_latents * vae.config.scaling_factor
|
||||||
|
|
||||||
|
vae.to(offload_device)
|
||||||
|
|
||||||
|
control_latents = {
|
||||||
|
"latents": masked_image_latents,
|
||||||
|
"num_frames" : B,
|
||||||
|
"height" : height,
|
||||||
|
"width" : width,
|
||||||
|
}
|
||||||
|
|
||||||
|
return (control_latents, width, height)
|
||||||
|
|
||||||
|
#region FasterCache
|
||||||
class CogVideoXFasterCache:
|
class CogVideoXFasterCache:
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def INPUT_TYPES(s):
|
||||||
@ -659,7 +774,8 @@ class CogVideoXFasterCache:
|
|||||||
"cache_device" : device if cache_device == "main_device" else offload_device
|
"cache_device" : device if cache_device == "main_device" else offload_device
|
||||||
}
|
}
|
||||||
return (fastercache,)
|
return (fastercache,)
|
||||||
|
|
||||||
|
#region Sampler
|
||||||
class CogVideoSampler:
|
class CogVideoSampler:
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def INPUT_TYPES(s):
|
||||||
@ -782,7 +898,43 @@ class CogVideoSampler:
|
|||||||
mm.soft_empty_cache()
|
mm.soft_empty_cache()
|
||||||
|
|
||||||
return (pipeline, {"samples": latents})
|
return (pipeline, {"samples": latents})
|
||||||
|
|
||||||
|
class CogVideoControlNet:
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {"required": {
|
||||||
|
"controlnet": ("COGVIDECONTROLNETMODEL",),
|
||||||
|
"images": ("IMAGE", ),
|
||||||
|
"control_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
|
||||||
|
"control_start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01}),
|
||||||
|
"control_end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
RETURN_TYPES = ("COGVIDECONTROLNET",)
|
||||||
|
RETURN_NAMES = ("cogvideo_controlnet",)
|
||||||
|
FUNCTION = "encode"
|
||||||
|
CATEGORY = "CogVideoWrapper"
|
||||||
|
|
||||||
|
def encode(self, controlnet, images, control_strength, control_start_percent, control_end_percent):
|
||||||
|
device = mm.get_torch_device()
|
||||||
|
offload_device = mm.unet_offload_device()
|
||||||
|
|
||||||
|
B, H, W, C = images.shape
|
||||||
|
|
||||||
|
control_frames = images.permute(0, 3, 1, 2).unsqueeze(0) * 2 - 1
|
||||||
|
|
||||||
|
controlnet = {
|
||||||
|
"control_model": controlnet,
|
||||||
|
"control_frames": control_frames,
|
||||||
|
"control_weights": control_strength,
|
||||||
|
"control_start": control_start_percent,
|
||||||
|
"control_end": control_end_percent,
|
||||||
|
}
|
||||||
|
|
||||||
|
return (controlnet,)
|
||||||
|
|
||||||
|
#region VideoDecode
|
||||||
class CogVideoDecode:
|
class CogVideoDecode:
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def INPUT_TYPES(s):
|
||||||
@ -878,7 +1030,8 @@ class CogVideoXFunResizeToClosestBucket:
|
|||||||
resized_images = resized_images.movedim(1,-1)
|
resized_images = resized_images.movedim(1,-1)
|
||||||
|
|
||||||
return (resized_images, width, height)
|
return (resized_images, width, height)
|
||||||
|
|
||||||
|
#region FunSamplers
|
||||||
class CogVideoXFunSampler:
|
class CogVideoXFunSampler:
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def INPUT_TYPES(s):
|
||||||
@ -888,7 +1041,8 @@ class CogVideoXFunSampler:
|
|||||||
"positive": ("CONDITIONING", ),
|
"positive": ("CONDITIONING", ),
|
||||||
"negative": ("CONDITIONING", ),
|
"negative": ("CONDITIONING", ),
|
||||||
"video_length": ("INT", {"default": 49, "min": 5, "max": 2048, "step": 4}),
|
"video_length": ("INT", {"default": 49, "min": 5, "max": 2048, "step": 4}),
|
||||||
"base_resolution": ("INT", {"min": 64, "max": 1280, "step": 64, "default": 512, "tooltip": "Base resolution, closest training data bucket resolution is chosen based on the selection."}),
|
"width": ("INT", {"default": 720, "min": 128, "max": 2048, "step": 8}),
|
||||||
|
"height": ("INT", {"default": 480, "min": 128, "max": 2048, "step": 8}),
|
||||||
"seed": ("INT", {"default": 43, "min": 0, "max": 0xffffffffffffffff}),
|
"seed": ("INT", {"default": 43, "min": 0, "max": 0xffffffffffffffff}),
|
||||||
"steps": ("INT", {"default": 50, "min": 1, "max": 200, "step": 1}),
|
"steps": ("INT", {"default": 50, "min": 1, "max": 200, "step": 1}),
|
||||||
"cfg": ("FLOAT", {"default": 6.0, "min": 1.0, "max": 20.0, "step": 0.01}),
|
"cfg": ("FLOAT", {"default": 6.0, "min": 1.0, "max": 20.0, "step": 0.01}),
|
||||||
@ -897,7 +1051,6 @@ class CogVideoXFunSampler:
|
|||||||
"optional":{
|
"optional":{
|
||||||
"start_img": ("IMAGE",),
|
"start_img": ("IMAGE",),
|
||||||
"end_img": ("IMAGE",),
|
"end_img": ("IMAGE",),
|
||||||
"opt_empty_latent": ("LATENT",),
|
|
||||||
"noise_aug_strength": ("FLOAT", {"default": 0.0563, "min": 0.0, "max": 1.0, "step": 0.001}),
|
"noise_aug_strength": ("FLOAT", {"default": 0.0563, "min": 0.0, "max": 1.0, "step": 0.001}),
|
||||||
"context_options": ("COGCONTEXT", ),
|
"context_options": ("COGCONTEXT", ),
|
||||||
"tora_trajectory": ("TORAFEATURES", ),
|
"tora_trajectory": ("TORAFEATURES", ),
|
||||||
@ -912,8 +1065,8 @@ class CogVideoXFunSampler:
|
|||||||
FUNCTION = "process"
|
FUNCTION = "process"
|
||||||
CATEGORY = "CogVideoWrapper"
|
CATEGORY = "CogVideoWrapper"
|
||||||
|
|
||||||
def process(self, pipeline, positive, negative, video_length, base_resolution, seed, steps, cfg, scheduler,
|
def process(self, pipeline, positive, negative, video_length, width, height, seed, steps, cfg, scheduler,
|
||||||
start_img=None, end_img=None, opt_empty_latent=None, noise_aug_strength=0.0563, context_options=None, fastercache=None,
|
start_img=None, end_img=None, noise_aug_strength=0.0563, context_options=None, fastercache=None,
|
||||||
tora_trajectory=None, vid2vid_images=None, vid2vid_denoise=1.0):
|
tora_trajectory=None, vid2vid_images=None, vid2vid_denoise=1.0):
|
||||||
device = mm.get_torch_device()
|
device = mm.get_torch_device()
|
||||||
offload_device = mm.unet_offload_device()
|
offload_device = mm.unet_offload_device()
|
||||||
@ -929,23 +1082,13 @@ class CogVideoXFunSampler:
|
|||||||
|
|
||||||
mm.soft_empty_cache()
|
mm.soft_empty_cache()
|
||||||
|
|
||||||
aspect_ratio_sample_size = {key : [x / 512 * base_resolution for x in ASPECT_RATIO_512[key]] for key in ASPECT_RATIO_512.keys()}
|
|
||||||
#vid2vid
|
#vid2vid
|
||||||
if vid2vid_images is not None:
|
if vid2vid_images is not None:
|
||||||
validation_video = np.array(vid2vid_images.cpu().numpy() * 255, np.uint8)
|
validation_video = np.array(vid2vid_images.cpu().numpy() * 255, np.uint8)
|
||||||
original_width, original_height = Image.fromarray(validation_video[0]).size
|
|
||||||
#img2vid
|
#img2vid
|
||||||
elif start_img is not None:
|
elif start_img is not None:
|
||||||
start_img = [to_pil(_start_img) for _start_img in start_img] if start_img is not None else None
|
start_img = [to_pil(_start_img) for _start_img in start_img] if start_img is not None else None
|
||||||
end_img = [to_pil(_end_img) for _end_img in end_img] if end_img is not None else None
|
end_img = [to_pil(_end_img) for _end_img in end_img] if end_img is not None else None
|
||||||
# Count most suitable height and width
|
|
||||||
original_width, original_height = start_img[0].size if type(start_img) is list else Image.open(start_img).size
|
|
||||||
else:
|
|
||||||
original_width = opt_empty_latent["samples"][0].shape[-1] * 8
|
|
||||||
original_height = opt_empty_latent["samples"][0].shape[-2] * 8
|
|
||||||
closest_size, closest_ratio = get_closest_ratio(original_height, original_width, ratios=aspect_ratio_sample_size)
|
|
||||||
height, width = [int(x / 16) * 16 for x in closest_size]
|
|
||||||
log.info(f"Closest bucket size: {width}x{height}")
|
|
||||||
|
|
||||||
# Load Sampler
|
# Load Sampler
|
||||||
if context_options is not None and context_options["context_schedule"] == "temporal_tiling":
|
if context_options is not None and context_options["context_schedule"] == "temporal_tiling":
|
||||||
@ -1045,156 +1188,6 @@ class CogVideoXFunVid2VidSampler:
|
|||||||
DEPRECATED = True
|
DEPRECATED = True
|
||||||
def process(self):
|
def process(self):
|
||||||
return ()
|
return ()
|
||||||
|
|
||||||
def add_noise_to_reference_video(image, ratio=None):
|
|
||||||
if ratio is None:
|
|
||||||
sigma = torch.normal(mean=-3.0, std=0.5, size=(image.shape[0],)).to(image.device)
|
|
||||||
sigma = torch.exp(sigma).to(image.dtype)
|
|
||||||
else:
|
|
||||||
sigma = torch.ones((image.shape[0],)).to(image.device, image.dtype) * ratio
|
|
||||||
|
|
||||||
image_noise = torch.randn_like(image) * sigma[:, None, None, None, None]
|
|
||||||
image_noise = torch.where(image==-1, torch.zeros_like(image), image_noise)
|
|
||||||
image = image + image_noise
|
|
||||||
return image
|
|
||||||
|
|
||||||
class CogVideoControlImageEncode:
|
|
||||||
@classmethod
|
|
||||||
def INPUT_TYPES(s):
|
|
||||||
return {"required": {
|
|
||||||
"pipeline": ("COGVIDEOPIPE",),
|
|
||||||
"control_video": ("IMAGE", ),
|
|
||||||
"base_resolution": ("INT", {"min": 64, "max": 1280, "step": 64, "default": 512, "tooltip": "Base resolution, closest training data bucket resolution is chosen based on the selection."}),
|
|
||||||
"enable_tiling": ("BOOLEAN", {"default": False, "tooltip": "Enable tiling for the VAE to reduce memory usage"}),
|
|
||||||
"noise_aug_strength": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
RETURN_TYPES = ("COGCONTROL_LATENTS", "INT", "INT",)
|
|
||||||
RETURN_NAMES = ("control_latents", "width", "height")
|
|
||||||
FUNCTION = "encode"
|
|
||||||
CATEGORY = "CogVideoWrapper"
|
|
||||||
|
|
||||||
def encode(self, pipeline, control_video, base_resolution, enable_tiling, noise_aug_strength=0.0563):
|
|
||||||
device = mm.get_torch_device()
|
|
||||||
offload_device = mm.unet_offload_device()
|
|
||||||
|
|
||||||
B, H, W, C = control_video.shape
|
|
||||||
|
|
||||||
vae = pipeline["pipe"].vae
|
|
||||||
vae.enable_slicing()
|
|
||||||
|
|
||||||
if enable_tiling:
|
|
||||||
from .mz_enable_vae_encode_tiling import enable_vae_encode_tiling
|
|
||||||
enable_vae_encode_tiling(vae)
|
|
||||||
|
|
||||||
if not pipeline["cpu_offloading"]:
|
|
||||||
vae.to(device)
|
|
||||||
|
|
||||||
# Count most suitable height and width
|
|
||||||
aspect_ratio_sample_size = {key : [x / 512 * base_resolution for x in ASPECT_RATIO_512[key]] for key in ASPECT_RATIO_512.keys()}
|
|
||||||
|
|
||||||
control_video = np.array(control_video.cpu().numpy() * 255, np.uint8)
|
|
||||||
original_width, original_height = Image.fromarray(control_video[0]).size
|
|
||||||
|
|
||||||
closest_size, closest_ratio = get_closest_ratio(original_height, original_width, ratios=aspect_ratio_sample_size)
|
|
||||||
height, width = [int(x / 16) * 16 for x in closest_size]
|
|
||||||
log.info(f"Closest bucket size: {width}x{height}")
|
|
||||||
|
|
||||||
video_length = int((B - 1) // vae.config.temporal_compression_ratio * vae.config.temporal_compression_ratio) + 1 if B != 1 else 1
|
|
||||||
input_video, input_video_mask, clip_image = get_video_to_video_latent(control_video, video_length=video_length, sample_size=(height, width))
|
|
||||||
|
|
||||||
control_video = pipeline["pipe"].image_processor.preprocess(rearrange(input_video, "b c f h w -> (b f) c h w"), height=height, width=width)
|
|
||||||
control_video = control_video.to(dtype=torch.float32)
|
|
||||||
control_video = rearrange(control_video, "(b f) c h w -> b c f h w", f=video_length)
|
|
||||||
|
|
||||||
masked_image = control_video.to(device=device, dtype=vae.dtype)
|
|
||||||
if noise_aug_strength > 0:
|
|
||||||
masked_image = add_noise_to_reference_video(masked_image, ratio=noise_aug_strength)
|
|
||||||
bs = 1
|
|
||||||
new_mask_pixel_values = []
|
|
||||||
for i in range(0, masked_image.shape[0], bs):
|
|
||||||
mask_pixel_values_bs = masked_image[i : i + bs]
|
|
||||||
mask_pixel_values_bs = vae.encode(mask_pixel_values_bs)[0]
|
|
||||||
mask_pixel_values_bs = mask_pixel_values_bs.mode()
|
|
||||||
new_mask_pixel_values.append(mask_pixel_values_bs)
|
|
||||||
masked_image_latents = torch.cat(new_mask_pixel_values, dim = 0)
|
|
||||||
masked_image_latents = masked_image_latents * vae.config.scaling_factor
|
|
||||||
|
|
||||||
vae.to(offload_device)
|
|
||||||
|
|
||||||
control_latents = {
|
|
||||||
"latents": masked_image_latents,
|
|
||||||
"num_frames" : B,
|
|
||||||
"height" : height,
|
|
||||||
"width" : width,
|
|
||||||
}
|
|
||||||
|
|
||||||
return (control_latents, width, height)
|
|
||||||
|
|
||||||
class CogVideoControlNet:
|
|
||||||
@classmethod
|
|
||||||
def INPUT_TYPES(s):
|
|
||||||
return {"required": {
|
|
||||||
"controlnet": ("COGVIDECONTROLNETMODEL",),
|
|
||||||
"images": ("IMAGE", ),
|
|
||||||
"control_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
|
|
||||||
"control_start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01}),
|
|
||||||
"control_end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
RETURN_TYPES = ("COGVIDECONTROLNET",)
|
|
||||||
RETURN_NAMES = ("cogvideo_controlnet",)
|
|
||||||
FUNCTION = "encode"
|
|
||||||
CATEGORY = "CogVideoWrapper"
|
|
||||||
|
|
||||||
def encode(self, controlnet, images, control_strength, control_start_percent, control_end_percent):
|
|
||||||
device = mm.get_torch_device()
|
|
||||||
offload_device = mm.unet_offload_device()
|
|
||||||
|
|
||||||
B, H, W, C = images.shape
|
|
||||||
|
|
||||||
control_frames = images.permute(0, 3, 1, 2).unsqueeze(0) * 2 - 1
|
|
||||||
|
|
||||||
controlnet = {
|
|
||||||
"control_model": controlnet,
|
|
||||||
"control_frames": control_frames,
|
|
||||||
"control_weights": control_strength,
|
|
||||||
"control_start": control_start_percent,
|
|
||||||
"control_end": control_end_percent,
|
|
||||||
}
|
|
||||||
|
|
||||||
return (controlnet,)
|
|
||||||
|
|
||||||
|
|
||||||
class CogVideoContextOptions:
|
|
||||||
@classmethod
|
|
||||||
def INPUT_TYPES(s):
|
|
||||||
return {"required": {
|
|
||||||
"context_schedule": (["uniform_standard", "uniform_looped", "static_standard", "temporal_tiling"],),
|
|
||||||
"context_frames": ("INT", {"default": 48, "min": 2, "max": 100, "step": 1, "tooltip": "Number of pixel frames in the context, NOTE: the latent space has 4 frames in 1"} ),
|
|
||||||
"context_stride": ("INT", {"default": 4, "min": 4, "max": 100, "step": 1, "tooltip": "Context stride as pixel frames, NOTE: the latent space has 4 frames in 1"} ),
|
|
||||||
"context_overlap": ("INT", {"default": 4, "min": 4, "max": 100, "step": 1, "tooltip": "Context overlap as pixel frames, NOTE: the latent space has 4 frames in 1"} ),
|
|
||||||
"freenoise": ("BOOLEAN", {"default": True, "tooltip": "Shuffle the noise"}),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
RETURN_TYPES = ("COGCONTEXT", )
|
|
||||||
RETURN_NAMES = ("context_options",)
|
|
||||||
FUNCTION = "process"
|
|
||||||
CATEGORY = "CogVideoWrapper"
|
|
||||||
|
|
||||||
def process(self, context_schedule, context_frames, context_stride, context_overlap, freenoise):
|
|
||||||
context_options = {
|
|
||||||
"context_schedule":context_schedule,
|
|
||||||
"context_frames":context_frames,
|
|
||||||
"context_stride":context_stride,
|
|
||||||
"context_overlap":context_overlap,
|
|
||||||
"freenoise":freenoise
|
|
||||||
}
|
|
||||||
|
|
||||||
return (context_options,)
|
|
||||||
|
|
||||||
class CogVideoXFunControlSampler:
|
class CogVideoXFunControlSampler:
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user