mirror of
https://git.datalinker.icu/kijai/ComfyUI-KJNodes.git
synced 2025-12-08 20:34:35 +08:00
Compare commits
5 Commits
5a655a9cae
...
c0368e2402
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c0368e2402 | ||
|
|
390d05fe7e | ||
|
|
f0ed965cd9 | ||
|
|
acdd16a973 | ||
|
|
c116d3396f |
@ -210,6 +210,8 @@ NODE_CONFIG = {
|
||||
"WanVideoNAG": {"class": WanVideoNAG, "name": "WanVideoNAG"},
|
||||
"GGUFLoaderKJ": {"class": GGUFLoaderKJ, "name": "GGUF Loader KJ"},
|
||||
"LatentInpaintTTM": {"class": LatentInpaintTTM, "name": "Latent Inpaint TTM"},
|
||||
"NABLA_AttentionKJ": {"class": NABLA_AttentionKJ, "name": "NABLA Attention KJ"},
|
||||
"TorchCompileModelAdvanced": {"class": TorchCompileModelAdvanced, "name": "TorchCompileModelAdvanced"},
|
||||
|
||||
#instance diffusion
|
||||
"CreateInstanceDiffusionTracking": {"class": CreateInstanceDiffusionTracking},
|
||||
|
||||
@ -3902,29 +3902,34 @@ class ImagePadKJ:
|
||||
class LoadVideosFromFolder:
|
||||
@classmethod
|
||||
def __init__(cls):
|
||||
cls.vhs_nodes = None
|
||||
vhs_pkg_name = "ComfyUI-VideoHelperSuite"
|
||||
vhs_pkg_name_lower = vhs_pkg_name.lower()
|
||||
vhs_pkg_name_suffix = vhs_pkg_name_lower.split("-")[-1]
|
||||
vhs_submodule_name = "videohelpersuite"
|
||||
try:
|
||||
cls.vhs_nodes = importlib.import_module("ComfyUI-VideoHelperSuite.videohelpersuite")
|
||||
cls.vhs_nodes = importlib.import_module(vhs_pkg_name+"."+vhs_submodule_name)
|
||||
except ImportError:
|
||||
try:
|
||||
cls.vhs_nodes = importlib.import_module("comfyui-videohelpersuite.videohelpersuite")
|
||||
cls.vhs_nodes = importlib.import_module(vhs_pkg_name_lower+"."+vhs_submodule_name)
|
||||
except ImportError:
|
||||
# Fallback to sys.modules search for Windows compatibility
|
||||
import sys
|
||||
vhs_module = None
|
||||
for module_name in sys.modules:
|
||||
if 'videohelpersuite' in module_name and 'videohelpersuite' in sys.modules[module_name].__dict__:
|
||||
if vhs_pkg_name_lower in module_name and vhs_submodule_name in sys.modules[module_name].__dict__:
|
||||
vhs_module = sys.modules[module_name]
|
||||
break
|
||||
|
||||
if vhs_module is None:
|
||||
# Try direct access to the videohelpersuite submodule
|
||||
for module_name in sys.modules:
|
||||
if module_name.endswith('videohelpersuite'):
|
||||
if module_name.endswith(vhs_pkg_name_suffix):
|
||||
vhs_module = sys.modules[module_name]
|
||||
break
|
||||
|
||||
if vhs_module is not None:
|
||||
cls.vhs_nodes = vhs_module
|
||||
cls.vhs_nodes = importlib.import_module(f"{vhs_module.__name__}.{vhs_submodule_name}")
|
||||
else:
|
||||
raise ImportError("This node requires ComfyUI-VideoHelperSuite to be installed.")
|
||||
|
||||
@ -3960,16 +3965,26 @@ class LoadVideosFromFolder:
|
||||
FUNCTION = "load_video"
|
||||
|
||||
def load_video(self, output_type, grid_max_columns, add_label=False, **kwargs):
|
||||
VIDEO_EXTS = ['webm', 'mp4', 'mkv', 'gif', 'mov']
|
||||
if self.vhs_nodes is None:
|
||||
raise ImportError("This node requires ComfyUI-VideoHelperSuite to be installed.")
|
||||
videos_list = []
|
||||
filenames = []
|
||||
for f in os.listdir(kwargs['video']):
|
||||
if os.path.isfile(os.path.join(kwargs['video'], f)):
|
||||
file_parts = f.split('.')
|
||||
if len(file_parts) > 1 and (file_parts[-1].lower() in ['webm', 'mp4', 'mkv', 'gif', 'mov']):
|
||||
videos_list.append(os.path.join(kwargs['video'], f))
|
||||
filenames.append(f)
|
||||
root = kwargs['video']
|
||||
pairs = []
|
||||
for f in os.listdir(root):
|
||||
full = os.path.join(root, f)
|
||||
# Skip non-files fast
|
||||
if not os.path.isfile(full):
|
||||
continue
|
||||
# Check extension
|
||||
ext = f.rsplit('.', 1)[-1].lower() if '.' in f else ''
|
||||
if ext in VIDEO_EXTS:
|
||||
pairs.append((full, f))
|
||||
def _natural_key(s):
|
||||
s = os.path.basename(s)
|
||||
return [int(t) if t.isdigit() else t.lower() for t in re.split(r'(\d+)', s)]
|
||||
pairs.sort(key=lambda x: _natural_key(x[1]))
|
||||
videos_list = [p[0] for p in pairs]
|
||||
filenames = [p[1] for p in pairs]
|
||||
print(videos_list)
|
||||
kwargs.pop('video')
|
||||
loaded_videos = []
|
||||
|
||||
@ -3,15 +3,17 @@ from comfy.ldm.modules import attention as comfy_attention
|
||||
import logging
|
||||
import torch
|
||||
import importlib
|
||||
import math
|
||||
|
||||
import folder_paths
|
||||
import comfy.model_management as mm
|
||||
from comfy.cli_args import args
|
||||
from comfy.ldm.modules.attention import wrap_attn
|
||||
from comfy.ldm.modules.attention import wrap_attn, optimized_attention
|
||||
import comfy.model_patcher
|
||||
import comfy.utils
|
||||
import comfy.sd
|
||||
|
||||
|
||||
try:
|
||||
from comfy_api.latest import io
|
||||
v3_available = True
|
||||
@ -71,6 +73,9 @@ def get_sage_func(sage_attention, allow_compile=False):
|
||||
|
||||
@wrap_attn
|
||||
def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False, **kwargs):
|
||||
in_dtype = v.dtype
|
||||
if q.dtype == torch.float32 or k.dtype == torch.float32 or v.dtype == torch.float32:
|
||||
q, k, v = q.to(torch.float16), k.to(torch.float16), v.to(torch.float16)
|
||||
if skip_reshape:
|
||||
b, _, _, dim_head = q.shape
|
||||
tensor_layout="HND"
|
||||
@ -89,7 +94,7 @@ def get_sage_func(sage_attention, allow_compile=False):
|
||||
# add a heads dimension if there isn't already one
|
||||
if mask.ndim == 3:
|
||||
mask = mask.unsqueeze(1)
|
||||
out = sage_func(q, k, v, attn_mask=mask, is_causal=False, tensor_layout=tensor_layout)
|
||||
out = sage_func(q, k, v, attn_mask=mask, is_causal=False, tensor_layout=tensor_layout).to(in_dtype)
|
||||
if tensor_layout == "HND":
|
||||
if not skip_output_reshape:
|
||||
out = (
|
||||
@ -675,6 +680,7 @@ class TorchCompileModelFluxAdvancedV2:
|
||||
try:
|
||||
if double_blocks:
|
||||
for i, block in enumerate(diffusion_model.double_blocks):
|
||||
print("Adding double block to compile list", i)
|
||||
compile_key_list.append(f"diffusion_model.double_blocks.{i}")
|
||||
if single_blocks:
|
||||
for i, block in enumerate(diffusion_model.single_blocks):
|
||||
@ -718,7 +724,7 @@ class TorchCompileModelHyVideo:
|
||||
}
|
||||
RETURN_TYPES = ("MODEL",)
|
||||
FUNCTION = "patch"
|
||||
|
||||
DEPRECATED = True
|
||||
CATEGORY = "KJNodes/torchcompile"
|
||||
EXPERIMENTAL = True
|
||||
|
||||
@ -850,7 +856,60 @@ class TorchCompileModelWanVideoV2:
|
||||
raise RuntimeError("Failed to compile model")
|
||||
|
||||
return (m, )
|
||||
|
||||
|
||||
|
||||
class TorchCompileModelAdvanced:
|
||||
@classmethod
|
||||
def INPUT_TYPES(s):
|
||||
return {
|
||||
"required": {
|
||||
"model": ("MODEL",),
|
||||
"backend": (["inductor","cudagraphs"], {"default": "inductor"}),
|
||||
"fullgraph": ("BOOLEAN", {"default": False, "tooltip": "Enable full graph mode"}),
|
||||
"mode": (["default", "max-autotune", "max-autotune-no-cudagraphs", "reduce-overhead"], {"default": "default"}),
|
||||
"dynamic": ("BOOLEAN", {"default": False, "tooltip": "Enable dynamic mode"}),
|
||||
"compile_transformer_blocks_only": ("BOOLEAN", {"default": True, "tooltip": "Compile only transformer blocks, faster compile and less error prone"}),
|
||||
"dynamo_cache_size_limit": ("INT", {"default": 64, "min": 0, "max": 1024, "step": 1, "tooltip": "torch._dynamo.config.cache_size_limit"}),
|
||||
"debug_compile_keys": ("BOOLEAN", {"default": False, "tooltip": "Print the compile keys used for torch.compile"}),
|
||||
},
|
||||
}
|
||||
RETURN_TYPES = ("MODEL",)
|
||||
FUNCTION = "patch"
|
||||
CATEGORY = "KJNodes/torchcompile"
|
||||
DESCRIPTION = "Advanced torch.compile patching for diffusion models."
|
||||
EXPERIMENTAL = True
|
||||
|
||||
def patch(self, model, backend, fullgraph, mode, dynamic, dynamo_cache_size_limit, compile_transformer_blocks_only, debug_compile_keys):
|
||||
from comfy_api.torch_helpers import set_torch_compile_wrapper
|
||||
m = model.clone()
|
||||
diffusion_model = m.get_model_object("diffusion_model")
|
||||
torch._dynamo.config.cache_size_limit = dynamo_cache_size_limit
|
||||
|
||||
try:
|
||||
if compile_transformer_blocks_only:
|
||||
layer_types = ["double_blocks", "single_blocks", "layers", "transformer_blocks", "blocks"]
|
||||
compile_key_list = []
|
||||
for layer_name in layer_types:
|
||||
if hasattr(diffusion_model, layer_name):
|
||||
blocks = getattr(diffusion_model, layer_name)
|
||||
for i in range(len(blocks)):
|
||||
compile_key_list.append(f"diffusion_model.{layer_name}.{i}")
|
||||
if not compile_key_list:
|
||||
logging.warning("No known transformer blocks found to compile, compiling entire diffusion model instead")
|
||||
elif debug_compile_keys:
|
||||
logging.info("TorchCompileModelAdvanced: Compile key list:")
|
||||
for key in compile_key_list:
|
||||
logging.info(f" - {key}")
|
||||
if not compile_key_list:
|
||||
compile_key_list =["diffusion_model"]
|
||||
|
||||
set_torch_compile_wrapper(model=m, keys=compile_key_list, backend=backend, mode=mode, dynamic=dynamic, fullgraph=fullgraph)
|
||||
except:
|
||||
raise RuntimeError("Failed to compile model")
|
||||
|
||||
return (m, )
|
||||
|
||||
|
||||
class TorchCompileModelQwenImage:
|
||||
@classmethod
|
||||
def INPUT_TYPES(s):
|
||||
@ -2005,3 +2064,126 @@ else:
|
||||
FUNCTION = ""
|
||||
CATEGORY = ""
|
||||
DESCRIPTION = "This node requires newer ComfyUI"
|
||||
|
||||
|
||||
try:
|
||||
from torch.nn.attention.flex_attention import flex_attention, BlockMask
|
||||
except:
|
||||
flex_attention = None
|
||||
BlockMask = None
|
||||
|
||||
class NABLA_AttentionKJ():
|
||||
@classmethod
|
||||
def INPUT_TYPES(s):
|
||||
return {"required": {
|
||||
"model": ("MODEL",),
|
||||
"latent": ("LATENT", {"tooltip": "Only used to get the latent shape"}),
|
||||
"window_time": ("INT", {"default": 11, "min": 1, "tooltip": "Temporal attention window size"}),
|
||||
"window_width": ("INT", {"default": 3, "min": 1, "tooltip": "Spatial attention window size"}),
|
||||
"window_height": ("INT", {"default": 3, "min": 1, "tooltip": "Spatial attention window size"}),
|
||||
"sparsity": ("FLOAT", {"default": 0.9, "min": 0.0, "max": 1.0, "step": 0.01}),
|
||||
"torch_compile": ("BOOLEAN", {"default": True, "tooltip": "Most likely required for reasonable memory usage"})
|
||||
},
|
||||
}
|
||||
|
||||
RETURN_TYPES = ("MODEL", )
|
||||
FUNCTION = "patch"
|
||||
DESCRIPTION = "Experimental node for patching attention mode to use NABLA sparse attention for video models, currently only works with Kadinsky5"
|
||||
CATEGORY = "KJNodes/experimental"
|
||||
|
||||
def patch(self, model, latent, window_time, window_width, window_height, sparsity, torch_compile):
|
||||
if flex_attention is None or BlockMask is None:
|
||||
raise RuntimeError("can't import flex_attention from torch.nn.attention, requires newer pytorch version")
|
||||
|
||||
model_clone = model.clone()
|
||||
samples = latent["samples"]
|
||||
|
||||
sparse_params = get_sparse_params(samples, window_time, window_height, window_width, sparsity)
|
||||
nabla_attention = NABLA_Attention(sparse_params)
|
||||
|
||||
def attention_override_nabla(func, *args, **kwargs):
|
||||
return nabla_attention(*args, **kwargs)
|
||||
|
||||
if torch_compile:
|
||||
attention_override_nabla = torch.compile(attention_override_nabla, mode="max-autotune-no-cudagraphs", dynamic=True)
|
||||
|
||||
# attention override
|
||||
model_clone.model_options["transformer_options"]["optimized_attention_override"] = attention_override_nabla
|
||||
|
||||
return model_clone,
|
||||
|
||||
|
||||
class NABLA_Attention():
|
||||
def __init__(self, sparse_params):
|
||||
self.sparse_params = sparse_params
|
||||
|
||||
def __call__(self, q, k, v, heads, **kwargs):
|
||||
if q.shape[-2] < 3000 or k.shape[-2] < 3000:
|
||||
return optimized_attention(q, k, v, heads, **kwargs)
|
||||
block_mask = self.nablaT_v2(q, k, self.sparse_params["sta_mask"], thr=self.sparse_params["P"])
|
||||
out = flex_attention(q, k, v, block_mask=block_mask).transpose(1, 2).contiguous().flatten(-2, -1)
|
||||
return out
|
||||
|
||||
def nablaT_v2(self, q, k, sta, thr=0.9):
|
||||
# Map estimation
|
||||
BLOCK_SIZE = 64
|
||||
B, h, S, D = q.shape
|
||||
s1 = S // BLOCK_SIZE
|
||||
qa = q.reshape(B, h, s1, BLOCK_SIZE, D).mean(-2)
|
||||
ka = k.reshape(B, h, s1, BLOCK_SIZE, D).mean(-2).transpose(-2, -1)
|
||||
map = qa @ ka
|
||||
|
||||
map = torch.softmax(map / math.sqrt(D), dim=-1)
|
||||
# Map binarization
|
||||
vals, inds = map.sort(-1)
|
||||
cvals = vals.cumsum_(-1)
|
||||
mask = (cvals >= 1 - thr).int()
|
||||
mask = mask.gather(-1, inds.argsort(-1))
|
||||
|
||||
mask = torch.logical_or(mask, sta)
|
||||
|
||||
# BlockMask creation
|
||||
kv_nb = mask.sum(-1).to(torch.int32)
|
||||
kv_inds = mask.argsort(dim=-1, descending=True).to(torch.int32)
|
||||
return BlockMask.from_kv_blocks(torch.zeros_like(kv_nb), kv_inds, kv_nb, kv_inds, BLOCK_SIZE=BLOCK_SIZE, mask_mod=None)
|
||||
|
||||
def fast_sta_nabla(T, H, W, wT=3, wH=3, wW=3):
|
||||
l = torch.Tensor([T, H, W]).amax()
|
||||
r = torch.arange(0, l, 1, dtype=torch.int16, device=mm.get_torch_device())
|
||||
mat = (r.unsqueeze(1) - r.unsqueeze(0)).abs()
|
||||
sta_t, sta_h, sta_w = (
|
||||
mat[:T, :T].flatten(),
|
||||
mat[:H, :H].flatten(),
|
||||
mat[:W, :W].flatten(),
|
||||
)
|
||||
sta_t = sta_t <= wT // 2
|
||||
sta_h = sta_h <= wH // 2
|
||||
sta_w = sta_w <= wW // 2
|
||||
sta_hw = (sta_h.unsqueeze(1) * sta_w.unsqueeze(0)).reshape(H, H, W, W).transpose(1, 2).flatten()
|
||||
sta = (sta_t.unsqueeze(1) * sta_hw.unsqueeze(0)).reshape(T, T, H * W, H * W).transpose(1, 2)
|
||||
return sta.reshape(T * H * W, T * H * W)
|
||||
|
||||
|
||||
def get_sparse_params(x, wT, wH, wW, sparsity=0.9):
|
||||
B, C, T, H, W = x.shape
|
||||
print("x shape:", x.shape)
|
||||
patch_size = (1, 2, 2)
|
||||
T, H, W = (
|
||||
T // patch_size[0],
|
||||
H // patch_size[1],
|
||||
W // patch_size[2],
|
||||
)
|
||||
sta_mask = fast_sta_nabla(T, H // 8, W // 8, wT, wH, wW)
|
||||
sparse_params = {
|
||||
"sta_mask": sta_mask.unsqueeze_(0).unsqueeze_(0),
|
||||
"to_fractal": True,
|
||||
"P": sparsity,
|
||||
"wT": wT,
|
||||
"wH": wH,
|
||||
"wW": wW,
|
||||
"add_sta": True,
|
||||
"visual_shape": (T, H, W),
|
||||
"method": "topcdf",
|
||||
}
|
||||
|
||||
return sparse_params
|
||||
Loading…
x
Reference in New Issue
Block a user