Merge remote-tracking branch 'kijai/main'

This commit is contained in:
Phr00t 2024-09-21 12:04:20 -04:00
commit 69dbbd3487
4 changed files with 575 additions and 79 deletions

View File

@ -209,7 +209,7 @@ class CogVideoX_Fun_Pipeline_Inpaint(DiffusionPipeline):
""" """
_optional_components = [] _optional_components = []
model_cpu_offload_seq = "text_encoder->vae->transformer->vae" model_cpu_offload_seq = ">vae->transformer->vae"
_callback_tensor_inputs = [ _callback_tensor_inputs = [
"latents", "latents",
@ -631,7 +631,7 @@ class CogVideoX_Fun_Pipeline_Inpaint(DiffusionPipeline):
device = self._execution_device device = self._execution_device
self.vae.to(device) #self.vae.to(device)
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`

View File

@ -1,20 +1,10 @@
import os import os
import gc import gc
import imageio
import numpy as np import numpy as np
import torch import torch
import torchvision
import cv2
from einops import rearrange
from PIL import Image from PIL import Image
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import os
import cv2
import numpy as np
import torch
from PIL import Image
def tensor2pil(image): def tensor2pil(image):
return Image.fromarray(np.clip(255. * image.cpu().numpy(), 0, 255).astype(np.uint8)) return Image.fromarray(np.clip(255. * image.cpu().numpy(), 0, 255).astype(np.uint8))
@ -73,60 +63,6 @@ def get_width_and_height_from_image_and_base_resolution(image, base_resolution):
height_slider = round(original_height * ratio) height_slider = round(original_height * ratio)
return height_slider, width_slider return height_slider, width_slider
def color_transfer(sc, dc):
"""
Transfer color distribution from of sc, referred to dc.
Args:
sc (numpy.ndarray): input image to be transfered.
dc (numpy.ndarray): reference image
Returns:
numpy.ndarray: Transferred color distribution on the sc.
"""
def get_mean_and_std(img):
x_mean, x_std = cv2.meanStdDev(img)
x_mean = np.hstack(np.around(x_mean, 2))
x_std = np.hstack(np.around(x_std, 2))
return x_mean, x_std
sc = cv2.cvtColor(sc, cv2.COLOR_RGB2LAB)
s_mean, s_std = get_mean_and_std(sc)
dc = cv2.cvtColor(dc, cv2.COLOR_RGB2LAB)
t_mean, t_std = get_mean_and_std(dc)
img_n = ((sc - s_mean) * (t_std / s_std)) + t_mean
np.putmask(img_n, img_n > 255, 255)
np.putmask(img_n, img_n < 0, 0)
dst = cv2.cvtColor(cv2.convertScaleAbs(img_n), cv2.COLOR_LAB2RGB)
return dst
def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=6, fps=12, imageio_backend=True, color_transfer_post_process=False):
videos = rearrange(videos, "b c t h w -> t b c h w")
outputs = []
for x in videos:
x = torchvision.utils.make_grid(x, nrow=n_rows)
x = x.transpose(0, 1).transpose(1, 2).squeeze(-1)
if rescale:
x = (x + 1.0) / 2.0 # -1,1 -> 0,1
x = (x * 255).numpy().astype(np.uint8)
outputs.append(Image.fromarray(x))
if color_transfer_post_process:
for i in range(1, len(outputs)):
outputs[i] = Image.fromarray(color_transfer(np.uint8(outputs[i]), np.uint8(outputs[0])))
os.makedirs(os.path.dirname(path), exist_ok=True)
if imageio_backend:
if path.endswith("mp4"):
imageio.mimsave(path, outputs, fps=fps)
else:
imageio.mimsave(path, outputs, duration=(1000 * 1/fps))
else:
if path.endswith("mp4"):
path = path.replace('.mp4', '.gif')
outputs[0].save(path, format='GIF', append_images=outputs, save_all=True, duration=100, loop=0)
def get_image_to_video_latent(validation_image_start, validation_image_end, video_length, sample_size): def get_image_to_video_latent(validation_image_start, validation_image_end, video_length, sample_size):
if validation_image_start is not None and validation_image_end is not None: if validation_image_start is not None and validation_image_end is not None:
if type(validation_image_start) is str and os.path.isfile(validation_image_start): if type(validation_image_start) is str and os.path.isfile(validation_image_start):
@ -224,18 +160,7 @@ def get_image_to_video_latent(validation_image_start, validation_image_end, vide
return input_video, input_video_mask, clip_image return input_video, input_video_mask, clip_image
def get_video_to_video_latent(input_video_path, video_length, sample_size): def get_video_to_video_latent(input_video_path, video_length, sample_size):
if type(input_video_path) is str: input_video = input_video_path
cap = cv2.VideoCapture(input_video_path)
input_video = []
while True:
ret, frame = cap.read()
if not ret:
break
frame = cv2.resize(frame, (sample_size[1], sample_size[0]))
input_video.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
cap.release()
else:
input_video = input_video_path
input_video = torch.from_numpy(np.array(input_video))[:video_length] input_video = torch.from_numpy(np.array(input_video))[:video_length]
input_video = input_video.permute([3, 0, 1, 2]).unsqueeze(0) / 255 input_video = input_video.permute([3, 0, 1, 2]).unsqueeze(0) / 255

View File

@ -0,0 +1,570 @@
{
"last_node_id": 48,
"last_link_id": 101,
"nodes": [
{
"id": 20,
"type": "CLIPLoader",
"pos": {
"0": -26,
"1": 400
},
"size": {
"0": 451.30548095703125,
"1": 82
},
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "CLIP",
"type": "CLIP",
"links": [
54,
56
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "CLIPLoader"
},
"widgets_values": [
"t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
"sd3"
]
},
{
"id": 31,
"type": "CogVideoTextEncode",
"pos": {
"0": 497,
"1": 520
},
"size": {
"0": 463.01251220703125,
"1": 124
},
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 56
}
],
"outputs": [
{
"name": "conditioning",
"type": "CONDITIONING",
"links": [
86
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "CogVideoTextEncode"
},
"widgets_values": [
"The video is not of a high quality, it has a low resolution. Watermark present in each frame. Strange motion trajectory. ",
1,
true
]
},
{
"id": 44,
"type": "VHS_VideoCombine",
"pos": {
"0": 1842,
"1": 345
},
"size": [
855.81494140625,
927.6441243489584
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 97
},
{
"name": "audio",
"type": "AUDIO",
"link": null
},
{
"name": "meta_batch",
"type": "VHS_BatchManager",
"link": null
},
{
"name": "vae",
"type": "VAE",
"link": null
}
],
"outputs": [
{
"name": "Filenames",
"type": "VHS_FILENAMES",
"links": null,
"shape": 3
}
],
"properties": {
"Node name for S&R": "VHS_VideoCombine"
},
"widgets_values": {
"frame_rate": 16,
"loop_count": 0,
"filename_prefix": "CogVideoX_Fun",
"format": "video/h264-mp4",
"pix_fmt": "yuv420p",
"crf": 19,
"save_metadata": true,
"pingpong": false,
"save_output": false,
"videopreview": {
"hidden": false,
"paused": false,
"params": {
"filename": "CogVideoX_Fun_00012.mp4",
"subfolder": "",
"type": "temp",
"format": "video/h264-mp4",
"frame_rate": 16
},
"muted": false
}
}
},
{
"id": 11,
"type": "CogVideoDecode",
"pos": {
"0": 1448,
"1": 345
},
"size": {
"0": 300.396484375,
"1": 198
},
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "pipeline",
"type": "COGVIDEOPIPE",
"link": 89
},
{
"name": "samples",
"type": "LATENT",
"link": 88
}
],
"outputs": [
{
"name": "images",
"type": "IMAGE",
"links": [
97
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "CogVideoDecode"
},
"widgets_values": [
true,
240,
360,
0.2,
0.2,
true
]
},
{
"id": 36,
"type": "LoadImage",
"pos": {
"0": 364,
"1": 715
},
"size": {
"0": 391.3421325683594,
"1": 456.8497009277344
},
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
71
],
"slot_index": 0,
"shape": 3
},
{
"name": "MASK",
"type": "MASK",
"links": null,
"shape": 3
}
],
"properties": {
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"sd3stag.png",
"image"
]
},
{
"id": 37,
"type": "ImageResizeKJ",
"pos": {
"0": 824,
"1": 715
},
"size": {
"0": 315,
"1": 266
},
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "image",
"type": "IMAGE",
"link": 71
},
{
"name": "get_image_size",
"type": "IMAGE",
"link": null
},
{
"name": "width_input",
"type": "INT",
"link": null,
"widget": {
"name": "width_input"
}
},
{
"name": "height_input",
"type": "INT",
"link": null,
"widget": {
"name": "height_input"
}
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
87
],
"slot_index": 0,
"shape": 3
},
{
"name": "width",
"type": "INT",
"links": null,
"shape": 3
},
{
"name": "height",
"type": "INT",
"links": null,
"shape": 3
}
],
"properties": {
"Node name for S&R": "ImageResizeKJ"
},
"widgets_values": [
720,
480,
"lanczos",
true,
16,
0,
0,
"disabled"
]
},
{
"id": 30,
"type": "CogVideoTextEncode",
"pos": {
"0": 493,
"1": 303
},
"size": {
"0": 471.90142822265625,
"1": 168.08047485351562
},
"flags": {},
"order": 3,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 54
}
],
"outputs": [
{
"name": "conditioning",
"type": "CONDITIONING",
"links": [
85
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "CogVideoTextEncode"
},
"widgets_values": [
"majestic stag grazing in a forest and basking in the setting sun",
1,
true
]
},
{
"id": 48,
"type": "DownloadAndLoadCogVideoGGUFModel",
"pos": {
"0": 584,
"1": 103
},
"size": {
"0": 378,
"1": 130
},
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "cogvideo_pipe",
"type": "COGVIDEOPIPE",
"links": [
101
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "DownloadAndLoadCogVideoGGUFModel"
},
"widgets_values": [
"CogVideoX_5b_fun_GGUF_Q4_0.safetensors",
"bf16",
false,
"offload_device"
]
},
{
"id": 41,
"type": "CogVideoXFunSampler",
"pos": {
"0": 1058,
"1": 345
},
"size": {
"0": 315,
"1": 302
},
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "pipeline",
"type": "COGVIDEOPIPE",
"link": 101
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 85
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 86
},
{
"name": "start_img",
"type": "IMAGE",
"link": 87
},
{
"name": "end_img",
"type": "IMAGE",
"link": null
},
{
"name": "opt_empty_latent",
"type": "LATENT",
"link": null
}
],
"outputs": [
{
"name": "cogvideo_pipe",
"type": "COGVIDEOPIPE",
"links": [
89
],
"slot_index": 0,
"shape": 3
},
{
"name": "samples",
"type": "LATENT",
"links": [
88
],
"shape": 3
}
],
"properties": {
"Node name for S&R": "CogVideoXFunSampler"
},
"widgets_values": [
49,
512,
44,
"fixed",
30,
6,
"CogVideoXDPMScheduler"
]
}
],
"links": [
[
54,
20,
0,
30,
0,
"CLIP"
],
[
56,
20,
0,
31,
0,
"CLIP"
],
[
71,
36,
0,
37,
0,
"IMAGE"
],
[
85,
30,
0,
41,
1,
"CONDITIONING"
],
[
86,
31,
0,
41,
2,
"CONDITIONING"
],
[
87,
37,
0,
41,
3,
"IMAGE"
],
[
88,
41,
1,
11,
1,
"LATENT"
],
[
89,
41,
0,
11,
0,
"COGVIDEOPIPE"
],
[
97,
11,
0,
44,
0,
"IMAGE"
],
[
101,
48,
0,
41,
0,
"COGVIDEOPIPE"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 0.7627768444385654,
"offset": [
62.58315607223924,
102.05205752424705
]
}
},
"version": 0.4
}

View File

@ -727,7 +727,8 @@ class CogVideoXFunSampler:
base_path = pipeline["base_path"] base_path = pipeline["base_path"]
assert "Fun" in base_path, "'Unfun' models not supported in 'CogVideoXFunSampler', use the 'CogVideoSampler'" assert "Fun" in base_path, "'Unfun' models not supported in 'CogVideoXFunSampler', use the 'CogVideoSampler'"
pipe.enable_model_cpu_offload(device=device) if not pipeline["cpu_offloading"]:
pipe.enable_model_cpu_offload(device=device)
mm.soft_empty_cache() mm.soft_empty_cache()