mirror of
https://git.datalinker.icu/kijai/ComfyUI-CogVideoXWrapper.git
synced 2025-12-09 04:44:22 +08:00
use existing T5 models
This commit is contained in:
parent
d56e14ec1e
commit
b787b9a8fa
@ -1,145 +1,37 @@
|
||||
{
|
||||
"last_node_id": 12,
|
||||
"last_link_id": 23,
|
||||
"last_node_id": 31,
|
||||
"last_link_id": 57,
|
||||
"nodes": [
|
||||
{
|
||||
"id": 11,
|
||||
"type": "CogVideoDecode",
|
||||
"pos": [
|
||||
1301,
|
||||
352
|
||||
],
|
||||
"size": {
|
||||
"0": 210,
|
||||
"1": 46
|
||||
},
|
||||
"flags": {},
|
||||
"order": 3,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "pipeline",
|
||||
"type": "COGVIDEOPIPE",
|
||||
"link": 21
|
||||
},
|
||||
{
|
||||
"name": "samples",
|
||||
"type": "LATENT",
|
||||
"link": 22
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "images",
|
||||
"type": "IMAGE",
|
||||
"links": [
|
||||
23
|
||||
],
|
||||
"shape": 3,
|
||||
"slot_index": 0
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"Node name for S&R": "CogVideoDecode"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"type": "CogVideoEncodePrompt",
|
||||
"pos": [
|
||||
459,
|
||||
485
|
||||
],
|
||||
"size": [
|
||||
408.03107827615304,
|
||||
315.59645204258936
|
||||
],
|
||||
"flags": {},
|
||||
"order": 1,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "pipeline",
|
||||
"type": "COGVIDEOPIPE",
|
||||
"link": 1
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "embeds",
|
||||
"type": "COGEMBEDS",
|
||||
"links": [
|
||||
16
|
||||
],
|
||||
"shape": 3,
|
||||
"slot_index": 0
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"Node name for S&R": "CogVideoEncodePrompt"
|
||||
},
|
||||
"widgets_values": [
|
||||
"A detailed wooden toy ship with intricately carved masts and sails is seen gliding smoothly over a plush, blue carpet that mimics the waves of the sea. The ship's hull is painted a rich brown, with tiny windows. The carpet, soft and textured, provides a perfect backdrop, resembling an oceanic expanse. Surrounding the ship are various other toys and children's items, hinting at a playful environment. The scene captures the innocence and imagination of childhood, with the toy ship's journey symbolizing endless adventures in a whimsical, indoor setting.",
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"type": "DownloadAndLoadCogVideoModel",
|
||||
"pos": [
|
||||
460,
|
||||
354
|
||||
],
|
||||
"size": {
|
||||
"0": 315,
|
||||
"1": 58
|
||||
},
|
||||
"flags": {},
|
||||
"order": 0,
|
||||
"mode": 0,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "cogvideo_pipe",
|
||||
"type": "COGVIDEOPIPE",
|
||||
"links": [
|
||||
1,
|
||||
15
|
||||
],
|
||||
"shape": 3,
|
||||
"slot_index": 0
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"Node name for S&R": "DownloadAndLoadCogVideoModel"
|
||||
},
|
||||
"widgets_values": [
|
||||
"fp16"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"id": 22,
|
||||
"type": "CogVideoSampler",
|
||||
"pos": [
|
||||
920,
|
||||
353
|
||||
1041,
|
||||
342
|
||||
],
|
||||
"size": {
|
||||
"0": 315,
|
||||
"1": 246
|
||||
"1": 266
|
||||
},
|
||||
"flags": {},
|
||||
"order": 2,
|
||||
"order": 4,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "pipeline",
|
||||
"type": "COGVIDEOPIPE",
|
||||
"link": 15
|
||||
"link": 36
|
||||
},
|
||||
{
|
||||
"name": "embeds",
|
||||
"type": "COGEMBEDS",
|
||||
"link": 16
|
||||
"name": "positive",
|
||||
"type": "CONDITIONING",
|
||||
"link": 55,
|
||||
"slot_index": 1
|
||||
},
|
||||
{
|
||||
"name": "negative",
|
||||
"type": "CONDITIONING",
|
||||
"link": 57
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
@ -147,7 +39,7 @@
|
||||
"name": "cogvideo_pipe",
|
||||
"type": "COGVIDEOPIPE",
|
||||
"links": [
|
||||
21
|
||||
37
|
||||
],
|
||||
"shape": 3
|
||||
},
|
||||
@ -155,7 +47,7 @@
|
||||
"name": "samples",
|
||||
"type": "LATENT",
|
||||
"links": [
|
||||
22
|
||||
38
|
||||
],
|
||||
"shape": 3
|
||||
}
|
||||
@ -166,33 +58,75 @@
|
||||
"widgets_values": [
|
||||
480,
|
||||
720,
|
||||
48,
|
||||
16,
|
||||
8,
|
||||
30,
|
||||
25,
|
||||
6,
|
||||
867121661458558,
|
||||
806286757407561,
|
||||
"fixed"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"id": 11,
|
||||
"type": "CogVideoDecode",
|
||||
"pos": [
|
||||
1142,
|
||||
658
|
||||
],
|
||||
"size": {
|
||||
"0": 210,
|
||||
"1": 46
|
||||
},
|
||||
"flags": {},
|
||||
"order": 5,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "pipeline",
|
||||
"type": "COGVIDEOPIPE",
|
||||
"link": 37
|
||||
},
|
||||
{
|
||||
"name": "samples",
|
||||
"type": "LATENT",
|
||||
"link": 38
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "images",
|
||||
"type": "IMAGE",
|
||||
"links": [
|
||||
51
|
||||
],
|
||||
"shape": 3,
|
||||
"slot_index": 0
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"Node name for S&R": "CogVideoDecode"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 28,
|
||||
"type": "VHS_VideoCombine",
|
||||
"pos": [
|
||||
1563,
|
||||
353
|
||||
1432,
|
||||
150
|
||||
],
|
||||
"size": [
|
||||
315,
|
||||
520.6666666666666
|
||||
667.752197265625,
|
||||
755.8347981770833
|
||||
],
|
||||
"flags": {},
|
||||
"order": 4,
|
||||
"order": 6,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "images",
|
||||
"type": "IMAGE",
|
||||
"link": 23
|
||||
"link": 51,
|
||||
"slot_index": 0
|
||||
},
|
||||
{
|
||||
"name": "audio",
|
||||
@ -235,7 +169,7 @@
|
||||
"hidden": false,
|
||||
"paused": false,
|
||||
"params": {
|
||||
"filename": "AnimateDiff_00003.mp4",
|
||||
"filename": "AnimateDiff_00001.mp4",
|
||||
"subfolder": "",
|
||||
"type": "temp",
|
||||
"format": "video/h264-mp4",
|
||||
@ -243,66 +177,226 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"type": "CogVideoTextEncode",
|
||||
"pos": [
|
||||
500,
|
||||
308
|
||||
],
|
||||
"size": [
|
||||
474.84501511852204,
|
||||
164.74235966960538
|
||||
],
|
||||
"flags": {},
|
||||
"order": 2,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "clip",
|
||||
"type": "CLIP",
|
||||
"link": 54
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "conditioning",
|
||||
"type": "CONDITIONING",
|
||||
"links": [
|
||||
55
|
||||
],
|
||||
"shape": 3,
|
||||
"slot_index": 0
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"Node name for S&R": "CogVideoTextEncode"
|
||||
},
|
||||
"widgets_values": [
|
||||
"A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. The panda's fluffy paws strum a miniature\nacoustic guitar, producing soft, melodic tunes. Nearby, a few other pandas gather, watching curiously and some clapping in rhythm. Sunlight filters\nthrough the tall bamboo, casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. The\nbackground includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical atmosphere of this unique musical\nperformance."
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 20,
|
||||
"type": "CLIPLoader",
|
||||
"pos": [
|
||||
-59,
|
||||
397
|
||||
],
|
||||
"size": {
|
||||
"0": 451.30548095703125,
|
||||
"1": 82
|
||||
},
|
||||
"flags": {},
|
||||
"order": 0,
|
||||
"mode": 0,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "CLIP",
|
||||
"type": "CLIP",
|
||||
"links": [
|
||||
54,
|
||||
56
|
||||
],
|
||||
"shape": 3,
|
||||
"slot_index": 0
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"Node name for S&R": "CLIPLoader"
|
||||
},
|
||||
"widgets_values": [
|
||||
"t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
|
||||
"sd3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 31,
|
||||
"type": "CogVideoTextEncode",
|
||||
"pos": [
|
||||
503,
|
||||
521
|
||||
],
|
||||
"size": [
|
||||
463.01251866466464,
|
||||
98.10446321574796
|
||||
],
|
||||
"flags": {},
|
||||
"order": 3,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "clip",
|
||||
"type": "CLIP",
|
||||
"link": 56
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "conditioning",
|
||||
"type": "CONDITIONING",
|
||||
"links": [
|
||||
57
|
||||
],
|
||||
"shape": 3,
|
||||
"slot_index": 0
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"Node name for S&R": "CogVideoTextEncode"
|
||||
},
|
||||
"widgets_values": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"type": "DownloadAndLoadCogVideoModel",
|
||||
"pos": [
|
||||
649,
|
||||
182
|
||||
],
|
||||
"size": {
|
||||
"0": 315,
|
||||
"1": 58
|
||||
},
|
||||
"flags": {},
|
||||
"order": 1,
|
||||
"mode": 0,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "cogvideo_pipe",
|
||||
"type": "COGVIDEOPIPE",
|
||||
"links": [
|
||||
36
|
||||
],
|
||||
"shape": 3,
|
||||
"slot_index": 0
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"Node name for S&R": "DownloadAndLoadCogVideoModel"
|
||||
},
|
||||
"widgets_values": [
|
||||
"fp16"
|
||||
]
|
||||
}
|
||||
],
|
||||
"links": [
|
||||
[
|
||||
1,
|
||||
36,
|
||||
1,
|
||||
0,
|
||||
2,
|
||||
22,
|
||||
0,
|
||||
"COGVIDEOPIPE"
|
||||
],
|
||||
[
|
||||
15,
|
||||
1,
|
||||
0,
|
||||
10,
|
||||
0,
|
||||
"COGVIDEOPIPE"
|
||||
],
|
||||
[
|
||||
16,
|
||||
2,
|
||||
0,
|
||||
10,
|
||||
1,
|
||||
"COGEMBEDS"
|
||||
],
|
||||
[
|
||||
21,
|
||||
10,
|
||||
37,
|
||||
22,
|
||||
0,
|
||||
11,
|
||||
0,
|
||||
"COGVIDEOPIPE"
|
||||
],
|
||||
[
|
||||
38,
|
||||
22,
|
||||
10,
|
||||
1,
|
||||
11,
|
||||
1,
|
||||
"LATENT"
|
||||
],
|
||||
[
|
||||
23,
|
||||
51,
|
||||
11,
|
||||
0,
|
||||
12,
|
||||
28,
|
||||
0,
|
||||
"IMAGE"
|
||||
],
|
||||
[
|
||||
54,
|
||||
20,
|
||||
0,
|
||||
30,
|
||||
0,
|
||||
"CLIP"
|
||||
],
|
||||
[
|
||||
55,
|
||||
30,
|
||||
0,
|
||||
22,
|
||||
1,
|
||||
"CONDITIONING"
|
||||
],
|
||||
[
|
||||
56,
|
||||
20,
|
||||
0,
|
||||
31,
|
||||
0,
|
||||
"CLIP"
|
||||
],
|
||||
[
|
||||
57,
|
||||
31,
|
||||
0,
|
||||
22,
|
||||
2,
|
||||
"CONDITIONING"
|
||||
]
|
||||
],
|
||||
"groups": [],
|
||||
"config": {},
|
||||
"extra": {
|
||||
"ds": {
|
||||
"scale": 1,
|
||||
"scale": 0.6830134553650706,
|
||||
"offset": [
|
||||
-281.3644522995906,
|
||||
-67.92982606602688
|
||||
359.4381777891929,
|
||||
334.95283678425216
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
52
nodes.py
52
nodes.py
@ -48,12 +48,13 @@ class DownloadAndLoadCogVideoModel:
|
||||
|
||||
snapshot_download(
|
||||
repo_id="THUDM/CogVideoX-2b",
|
||||
#ignore_patterns=["*sd-image-variations-encoder-fp16.safetensors", "fye_motion_module-fp16.safetensors"],
|
||||
ignore_patterns=["*text_encoder*"],
|
||||
local_dir=base_path,
|
||||
local_dir_use_symlinks=False,
|
||||
)
|
||||
|
||||
pipe = CogVideoXPipeline.from_pretrained(base_path, torch_dtype=dtype).to(offload_device)
|
||||
|
||||
|
||||
pipeline = {
|
||||
"pipe": pipe,
|
||||
@ -72,8 +73,8 @@ class CogVideoEncodePrompt:
|
||||
}
|
||||
}
|
||||
|
||||
RETURN_TYPES = ("COGEMBEDS",)
|
||||
RETURN_NAMES = ("embeds",)
|
||||
RETURN_TYPES = ("CONDITIONING", "CONDITIONING")
|
||||
RETURN_NAMES = ("positive", "negative")
|
||||
FUNCTION = "process"
|
||||
CATEGORY = "CogVideoWrapper"
|
||||
|
||||
@ -86,7 +87,7 @@ class CogVideoEncodePrompt:
|
||||
pipe.text_encoder.to(device)
|
||||
pipe.transformer.to(offload_device)
|
||||
|
||||
pos_embeds, neg_embeds = pipe.encode_prompt(
|
||||
positive, negative = pipe.encode_prompt(
|
||||
prompt=prompt,
|
||||
negative_prompt=negative_prompt,
|
||||
do_classifier_free_guidance=True,
|
||||
@ -96,11 +97,30 @@ class CogVideoEncodePrompt:
|
||||
dtype=dtype,
|
||||
)
|
||||
pipe.text_encoder.to(offload_device)
|
||||
embeds = {
|
||||
"positive": pos_embeds,
|
||||
"negative": neg_embeds,
|
||||
|
||||
return (positive, negative)
|
||||
|
||||
class CogVideoTextEncode:
|
||||
@classmethod
|
||||
def INPUT_TYPES(s):
|
||||
return {"required": {
|
||||
"clip": ("CLIP",),
|
||||
"prompt": ("STRING", {"default": "", "multiline": True} ),
|
||||
}
|
||||
}
|
||||
|
||||
RETURN_TYPES = ("CONDITIONING",)
|
||||
RETURN_NAMES = ("conditioning",)
|
||||
FUNCTION = "process"
|
||||
CATEGORY = "CogVideoWrapper"
|
||||
|
||||
def process(self, clip, prompt):
|
||||
clip.tokenizer.t5xxl.pad_to_max_length = True
|
||||
clip.tokenizer.t5xxl.max_length = 226
|
||||
tokens = clip.tokenize(prompt, return_word_ids=True)
|
||||
|
||||
embeds = clip.encode_from_tokens(tokens, return_pooled=False, return_dict=False)
|
||||
|
||||
return (embeds, )
|
||||
|
||||
class CogVideoSampler:
|
||||
@ -108,7 +128,8 @@ class CogVideoSampler:
|
||||
def INPUT_TYPES(s):
|
||||
return {"required": {
|
||||
"pipeline": ("COGVIDEOPIPE",),
|
||||
"embeds": ("COGEMBEDS", ),
|
||||
"positive": ("CONDITIONING", ),
|
||||
"negative": ("CONDITIONING", ),
|
||||
"height": ("INT", {"default": 480, "min": 128, "max": 2048, "step": 8}),
|
||||
"width": ("INT", {"default": 720, "min": 128, "max": 2048, "step": 8}),
|
||||
"num_frames": ("INT", {"default": 48, "min": 1, "max": 100, "step": 1}),
|
||||
@ -124,11 +145,12 @@ class CogVideoSampler:
|
||||
FUNCTION = "process"
|
||||
CATEGORY = "CogVideoWrapper"
|
||||
|
||||
def process(self, pipeline, embeds, fps, steps, cfg, seed, height, width, num_frames):
|
||||
def process(self, pipeline, positive, negative, fps, steps, cfg, seed, height, width, num_frames):
|
||||
mm.soft_empty_cache()
|
||||
device = mm.get_torch_device()
|
||||
offload_device = mm.unet_offload_device()
|
||||
pipe = pipeline["pipe"]
|
||||
dtype = pipeline["dtype"]
|
||||
|
||||
pipe.transformer.to(device)
|
||||
generator = torch.Generator(device=device).manual_seed(seed)
|
||||
@ -140,8 +162,8 @@ class CogVideoSampler:
|
||||
num_frames = num_frames,
|
||||
fps = fps,
|
||||
guidance_scale=cfg,
|
||||
prompt_embeds=embeds["positive"],
|
||||
negative_prompt_embeds=embeds["negative"],
|
||||
prompt_embeds=positive.to(dtype).to(device),
|
||||
negative_prompt_embeds=negative.to(dtype).to(device),
|
||||
#negative_prompt_embeds=torch.zeros_like(embeds),
|
||||
generator=generator,
|
||||
output_type="latents",
|
||||
@ -206,12 +228,12 @@ class CogVideoDecode:
|
||||
NODE_CLASS_MAPPINGS = {
|
||||
"DownloadAndLoadCogVideoModel": DownloadAndLoadCogVideoModel,
|
||||
"CogVideoSampler": CogVideoSampler,
|
||||
"CogVideoEncodePrompt": CogVideoEncodePrompt,
|
||||
"CogVideoDecode": CogVideoDecode
|
||||
"CogVideoDecode": CogVideoDecode,
|
||||
"CogVideoTextEncode": CogVideoTextEncode
|
||||
}
|
||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||
"DownloadAndLoadCogVideoModel": "DownloadAndLoadCogVideoModel",
|
||||
"DownloadAndLoadCogVideoModel": "(Down)load CogVideo Model",
|
||||
"CogVideoSampler": "CogVideo Sampler",
|
||||
"CogVideoEncodePrompt": "CogVideo EncodePrompt",
|
||||
"CogVideoDecode": "CogVideo Decode",
|
||||
"CogVideoTextEncode": "CogVideo TextEncode"
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user