mirror of
https://git.datalinker.icu/kijai/ComfyUI-KJNodes.git
synced 2025-12-09 04:44:30 +08:00
Compare commits
5 Commits
13cb145177
...
78db78ab28
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
78db78ab28 | ||
|
|
50e7dd34d3 | ||
|
|
37206374ef | ||
|
|
cb820f0249 | ||
|
|
95bc77f855 |
@ -212,6 +212,9 @@ NODE_CONFIG = {
|
|||||||
"LatentInpaintTTM": {"class": LatentInpaintTTM, "name": "Latent Inpaint TTM"},
|
"LatentInpaintTTM": {"class": LatentInpaintTTM, "name": "Latent Inpaint TTM"},
|
||||||
"NABLA_AttentionKJ": {"class": NABLA_AttentionKJ, "name": "NABLA Attention KJ"},
|
"NABLA_AttentionKJ": {"class": NABLA_AttentionKJ, "name": "NABLA Attention KJ"},
|
||||||
"TorchCompileModelAdvanced": {"class": TorchCompileModelAdvanced, "name": "TorchCompileModelAdvanced"},
|
"TorchCompileModelAdvanced": {"class": TorchCompileModelAdvanced, "name": "TorchCompileModelAdvanced"},
|
||||||
|
"StartRecordCUDAMemoryHistory": {"class": StartRecordCUDAMemoryHistory, "name": "Start Recording CUDAMemory History"},
|
||||||
|
"EndRecordCUDAMemoryHistory": {"class": EndRecordCUDAMemoryHistory, "name": "End Recording CUDAMemory History"},
|
||||||
|
"VisualizeCUDAMemoryHistory": {"class": VisualizeCUDAMemoryHistory, "name": "Visualize CUDAMemory History"},
|
||||||
|
|
||||||
#instance diffusion
|
#instance diffusion
|
||||||
"CreateInstanceDiffusionTracking": {"class": CreateInstanceDiffusionTracking},
|
"CreateInstanceDiffusionTracking": {"class": CreateInstanceDiffusionTracking},
|
||||||
|
|||||||
@ -2156,8 +2156,8 @@ class ReplaceImagesInBatch:
|
|||||||
FUNCTION = "replace"
|
FUNCTION = "replace"
|
||||||
CATEGORY = "KJNodes/image"
|
CATEGORY = "KJNodes/image"
|
||||||
DESCRIPTION = """
|
DESCRIPTION = """
|
||||||
Replaces the images in a batch, starting from the specified start index,
|
Replaces the images in a batch, starting from the specified start index with step stride,
|
||||||
with the replacement images.
|
using the replacement images.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -2165,6 +2165,7 @@ with the replacement images.
|
|||||||
return {
|
return {
|
||||||
"required": {
|
"required": {
|
||||||
"start_index": ("INT", {"default": 1,"min": 0, "max": 4096, "step": 1}),
|
"start_index": ("INT", {"default": 1,"min": 0, "max": 4096, "step": 1}),
|
||||||
|
"step": ("INT", {"default": 1,"min": 1, "max": 4096, "step": 1}),
|
||||||
},
|
},
|
||||||
"optional": {
|
"optional": {
|
||||||
"original_images": ("IMAGE",),
|
"original_images": ("IMAGE",),
|
||||||
@ -2174,14 +2175,14 @@ with the replacement images.
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def replace(self, original_images=None, replacement_images=None, start_index=1, original_masks=None, replacement_masks=None):
|
def replace(self, original_images=None, replacement_images=None, start_index=1, step=1, original_masks=None, replacement_masks=None):
|
||||||
images = None
|
images = None
|
||||||
masks = None
|
masks = None
|
||||||
|
|
||||||
if original_images is not None and replacement_images is not None:
|
if original_images is not None and replacement_images is not None:
|
||||||
if start_index >= len(original_images):
|
if start_index >= len(original_images):
|
||||||
raise ValueError("ReplaceImagesInBatch: Start index is out of range")
|
raise ValueError("ReplaceImagesInBatch: Start index is out of range")
|
||||||
end_index = start_index + len(replacement_images)
|
end_index = start_index + len(replacement_images) * step
|
||||||
if end_index > len(original_images):
|
if end_index > len(original_images):
|
||||||
raise ValueError("ReplaceImagesInBatch: End index is out of range")
|
raise ValueError("ReplaceImagesInBatch: End index is out of range")
|
||||||
|
|
||||||
@ -2189,7 +2190,7 @@ with the replacement images.
|
|||||||
if original_images_copy.shape[2] != replacement_images.shape[2] or original_images_copy.shape[3] != replacement_images.shape[3]:
|
if original_images_copy.shape[2] != replacement_images.shape[2] or original_images_copy.shape[3] != replacement_images.shape[3]:
|
||||||
replacement_images = common_upscale(replacement_images.movedim(-1, 1), original_images_copy.shape[1], original_images_copy.shape[2], "lanczos", "center").movedim(1, -1)
|
replacement_images = common_upscale(replacement_images.movedim(-1, 1), original_images_copy.shape[1], original_images_copy.shape[2], "lanczos", "center").movedim(1, -1)
|
||||||
|
|
||||||
original_images_copy[start_index:end_index] = replacement_images
|
original_images_copy[start_index:end_index:step] = replacement_images
|
||||||
images = original_images_copy
|
images = original_images_copy
|
||||||
else:
|
else:
|
||||||
images = torch.zeros((1, 64, 64, 3))
|
images = torch.zeros((1, 64, 64, 3))
|
||||||
@ -2197,7 +2198,7 @@ with the replacement images.
|
|||||||
if original_masks is not None and replacement_masks is not None:
|
if original_masks is not None and replacement_masks is not None:
|
||||||
if start_index >= len(original_masks):
|
if start_index >= len(original_masks):
|
||||||
raise ValueError("ReplaceImagesInBatch: Start index is out of range")
|
raise ValueError("ReplaceImagesInBatch: Start index is out of range")
|
||||||
end_index = start_index + len(replacement_masks)
|
end_index = start_index + len(replacement_masks) * step
|
||||||
if end_index > len(original_masks):
|
if end_index > len(original_masks):
|
||||||
raise ValueError("ReplaceImagesInBatch: End index is out of range")
|
raise ValueError("ReplaceImagesInBatch: End index is out of range")
|
||||||
|
|
||||||
@ -2205,7 +2206,7 @@ with the replacement images.
|
|||||||
if original_masks_copy.shape[1] != replacement_masks.shape[1] or original_masks_copy.shape[2] != replacement_masks.shape[2]:
|
if original_masks_copy.shape[1] != replacement_masks.shape[1] or original_masks_copy.shape[2] != replacement_masks.shape[2]:
|
||||||
replacement_masks = common_upscale(replacement_masks.unsqueeze(1), original_masks_copy.shape[1], original_masks_copy.shape[2], "nearest-exact", "center").squeeze(0)
|
replacement_masks = common_upscale(replacement_masks.unsqueeze(1), original_masks_copy.shape[1], original_masks_copy.shape[2], "nearest-exact", "center").squeeze(0)
|
||||||
|
|
||||||
original_masks_copy[start_index:end_index] = replacement_masks
|
original_masks_copy[start_index:end_index:step] = replacement_masks
|
||||||
masks = original_masks_copy
|
masks = original_masks_copy
|
||||||
else:
|
else:
|
||||||
masks = torch.zeros((1, 64, 64))
|
masks = torch.zeros((1, 64, 64))
|
||||||
|
|||||||
@ -4,6 +4,7 @@ import logging
|
|||||||
import torch
|
import torch
|
||||||
import importlib
|
import importlib
|
||||||
import math
|
import math
|
||||||
|
import datetime
|
||||||
|
|
||||||
import folder_paths
|
import folder_paths
|
||||||
import comfy.model_management as mm
|
import comfy.model_management as mm
|
||||||
@ -2103,7 +2104,7 @@ class NABLA_AttentionKJ():
|
|||||||
|
|
||||||
def attention_override_nabla(func, *args, **kwargs):
|
def attention_override_nabla(func, *args, **kwargs):
|
||||||
return nabla_attention(*args, **kwargs)
|
return nabla_attention(*args, **kwargs)
|
||||||
|
|
||||||
if torch_compile:
|
if torch_compile:
|
||||||
attention_override_nabla = torch.compile(attention_override_nabla, mode="max-autotune-no-cudagraphs", dynamic=True)
|
attention_override_nabla = torch.compile(attention_override_nabla, mode="max-autotune-no-cudagraphs", dynamic=True)
|
||||||
|
|
||||||
@ -2146,7 +2147,7 @@ class NABLA_Attention():
|
|||||||
kv_nb = mask.sum(-1).to(torch.int32)
|
kv_nb = mask.sum(-1).to(torch.int32)
|
||||||
kv_inds = mask.argsort(dim=-1, descending=True).to(torch.int32)
|
kv_inds = mask.argsort(dim=-1, descending=True).to(torch.int32)
|
||||||
return BlockMask.from_kv_blocks(torch.zeros_like(kv_nb), kv_inds, kv_nb, kv_inds, BLOCK_SIZE=BLOCK_SIZE, mask_mod=None)
|
return BlockMask.from_kv_blocks(torch.zeros_like(kv_nb), kv_inds, kv_nb, kv_inds, BLOCK_SIZE=BLOCK_SIZE, mask_mod=None)
|
||||||
|
|
||||||
def fast_sta_nabla(T, H, W, wT=3, wH=3, wW=3):
|
def fast_sta_nabla(T, H, W, wT=3, wH=3, wW=3):
|
||||||
l = torch.Tensor([T, H, W]).amax()
|
l = torch.Tensor([T, H, W]).amax()
|
||||||
r = torch.arange(0, l, 1, dtype=torch.int16, device=mm.get_torch_device())
|
r = torch.arange(0, l, 1, dtype=torch.int16, device=mm.get_torch_device())
|
||||||
@ -2166,7 +2167,7 @@ def fast_sta_nabla(T, H, W, wT=3, wH=3, wW=3):
|
|||||||
|
|
||||||
def get_sparse_params(x, wT, wH, wW, sparsity=0.9):
|
def get_sparse_params(x, wT, wH, wW, sparsity=0.9):
|
||||||
B, C, T, H, W = x.shape
|
B, C, T, H, W = x.shape
|
||||||
print("x shape:", x.shape)
|
#print("x shape:", x.shape)
|
||||||
patch_size = (1, 2, 2)
|
patch_size = (1, 2, 2)
|
||||||
T, H, W = (
|
T, H, W = (
|
||||||
T // patch_size[0],
|
T // patch_size[0],
|
||||||
@ -2186,4 +2187,119 @@ def get_sparse_params(x, wT, wH, wW, sparsity=0.9):
|
|||||||
"method": "topcdf",
|
"method": "topcdf",
|
||||||
}
|
}
|
||||||
|
|
||||||
return sparse_params
|
return sparse_params
|
||||||
|
|
||||||
|
from comfy.comfy_types.node_typing import IO
|
||||||
|
class StartRecordCUDAMemoryHistory():
|
||||||
|
# @classmethod
|
||||||
|
# def IS_CHANGED(s):
|
||||||
|
# return True
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {
|
||||||
|
"required": {
|
||||||
|
"input": (IO.ANY,),
|
||||||
|
"enabled": (["all", "state", "None"], {"default": "all", "tooltip": "None: disable, 'state': keep info for allocated memory, 'all': keep history of all alloc/free calls"}),
|
||||||
|
"context": (["all", "state", "alloc", "None"], {"default": "all", "tooltip": "None: no tracebacks, 'state': tracebacks for allocated memory, 'alloc': for alloc calls, 'all': for free calls"}),
|
||||||
|
"stacks": (["python", "all"], {"default": "all", "tooltip": "'python': Python/TorchScript/inductor frames, 'all': also C++ frames"}),
|
||||||
|
"max_entries": ("INT", {"default": 100000, "min": 1000, "max": 10000000, "tooltip": "Maximum number of entries to record"}),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
RETURN_TYPES = (IO.ANY, )
|
||||||
|
RETURN_NAMES = ("input", "output_path",)
|
||||||
|
FUNCTION = "start"
|
||||||
|
CATEGORY = "KJNodes/experimental"
|
||||||
|
DESCRIPTION = "THIS NODE ALWAYS RUNS. Starts recording CUDA memory allocation history, can be ended and saved with EndRecordCUDAMemoryHistory. "
|
||||||
|
|
||||||
|
def start(self, input, enabled, context, stacks, max_entries):
|
||||||
|
mm.soft_empty_cache()
|
||||||
|
torch.cuda.reset_peak_memory_stats(mm.get_torch_device())
|
||||||
|
torch.cuda.memory._record_memory_history(
|
||||||
|
max_entries=max_entries,
|
||||||
|
enabled=enabled if enabled != "None" else None,
|
||||||
|
context=context if context != "None" else None,
|
||||||
|
stacks=stacks
|
||||||
|
)
|
||||||
|
return input,
|
||||||
|
|
||||||
|
class EndRecordCUDAMemoryHistory():
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {"required": {
|
||||||
|
"input": (IO.ANY,),
|
||||||
|
"output_path": ("STRING", {"default": "comfy_cuda_memory_history"}, "Base path for saving the CUDA memory history file, timestamp and .pt extension will be added"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
RETURN_TYPES = (IO.ANY, "STRING",)
|
||||||
|
RETURN_NAMES = ("input", "output_path",)
|
||||||
|
FUNCTION = "end"
|
||||||
|
CATEGORY = "KJNodes/experimental"
|
||||||
|
DESCRIPTION = "Records CUDA memory allocation history between start and end, saves to a file that can be analyzed here: https://docs.pytorch.org/memory_viz or with VisualizeCUDAMemoryHistory node"
|
||||||
|
|
||||||
|
def end(self, input, output_path):
|
||||||
|
mm.soft_empty_cache()
|
||||||
|
time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
output_path = f"{output_path}{time}.pt"
|
||||||
|
torch.cuda.memory._dump_snapshot(output_path)
|
||||||
|
torch.cuda.memory._record_memory_history(enabled=None)
|
||||||
|
return input, output_path
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
from server import PromptServer
|
||||||
|
except:
|
||||||
|
PromptServer = None
|
||||||
|
|
||||||
|
class VisualizeCUDAMemoryHistory():
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {"required": {
|
||||||
|
"snapshot_path": ("STRING", ),
|
||||||
|
},
|
||||||
|
"hidden": {
|
||||||
|
"unique_id": "UNIQUE_ID",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
RETURN_TYPES = ("STRING",)
|
||||||
|
RETURN_NAMES = ("output_path",)
|
||||||
|
FUNCTION = "visualize"
|
||||||
|
CATEGORY = "KJNodes/experimental"
|
||||||
|
DESCRIPTION = "Visualizes a CUDA memory allocation history file, opens in browser"
|
||||||
|
OUTPUT_NODE = True
|
||||||
|
|
||||||
|
def visualize(self, snapshot_path, unique_id):
|
||||||
|
import pickle
|
||||||
|
from torch.cuda import _memory_viz
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from folder_paths import get_output_directory
|
||||||
|
output_dir = get_output_directory()
|
||||||
|
|
||||||
|
with open(snapshot_path, "rb") as f:
|
||||||
|
snapshot = pickle.load(f)
|
||||||
|
|
||||||
|
html = _memory_viz.trace_plot(snapshot)
|
||||||
|
html_filename = f"cuda_memory_history_{uuid.uuid4().hex}.html"
|
||||||
|
output_path = os.path.join(output_dir, "memory_history", html_filename)
|
||||||
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||||
|
|
||||||
|
with open(output_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(html)
|
||||||
|
|
||||||
|
api_url = f"http://localhost:8188/api/view?type=output&filename={html_filename}&subfolder=memory_history"
|
||||||
|
|
||||||
|
# Progress UI
|
||||||
|
if unique_id and PromptServer is not None:
|
||||||
|
try:
|
||||||
|
PromptServer.instance.send_progress_text(
|
||||||
|
api_url,
|
||||||
|
unique_id
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return api_url,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user