From ab8cac5396db7caffb3e73ca171a3684c8daea2d Mon Sep 17 00:00:00 2001 From: kijai <40791699+kijai@users.noreply.github.com> Date: Wed, 16 Jul 2025 14:02:31 +0300 Subject: [PATCH] Add LoraExtractKJ Improved Lora extraction node - build in diff substraction - lowrank algo for quick extraction - dtype selection --- __init__.py | 4 ++ nodes/lora_nodes.py | 154 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) create mode 100644 nodes/lora_nodes.py diff --git a/__init__.py b/__init__.py index 677d6a3..3a950fc 100644 --- a/__init__.py +++ b/__init__.py @@ -6,6 +6,7 @@ from .nodes.image_nodes import * from .nodes.intrinsic_lora_nodes import * from .nodes.mask_nodes import * from .nodes.model_optimization_nodes import * +from .nodes.lora_nodes import * NODE_CONFIG = { #constants "BOOLConstant": {"class": BOOLConstant, "name": "BOOL Constant"}, @@ -203,6 +204,9 @@ NODE_CONFIG = { "CreateInstanceDiffusionTracking": {"class": CreateInstanceDiffusionTracking}, "AppendInstanceDiffusionTracking": {"class": AppendInstanceDiffusionTracking}, "DrawInstanceDiffusionTracking": {"class": DrawInstanceDiffusionTracking}, + + #lora + "LoraExtractKJ": {"class": LoraExtractKJ, "name": "LoraExtractKJ"}, } def generate_node_mappings(node_config): diff --git a/nodes/lora_nodes.py b/nodes/lora_nodes.py new file mode 100644 index 0000000..ab9fcfb --- /dev/null +++ b/nodes/lora_nodes.py @@ -0,0 +1,154 @@ +import torch +import comfy.model_management +import comfy.utils +import folder_paths +import os +import logging +from enum import Enum +from tqdm import tqdm + +CLAMP_QUANTILE = 0.99 + +def extract_lora(diff, rank, algorithm, lowrank_iters=7): + conv2d = (len(diff.shape) == 4) + kernel_size = None if not conv2d else diff.size()[2:4] + conv2d_3x3 = conv2d and kernel_size != (1, 1) + out_dim, in_dim = diff.size()[0:2] + rank = min(rank, in_dim, out_dim) + + if conv2d: + if conv2d_3x3: + diff = diff.flatten(start_dim=1) + else: + diff = diff.squeeze() + + diff_float = diff.float() + + if algorithm == "svd_lowrank": + U, S, V = torch.svd_lowrank(diff_float, q=rank, niter=lowrank_iters) + U = U @ torch.diag(S) + Vh = V.t() + else: + U, S, Vh = torch.linalg.svd(diff_float) + U = U[:, :rank] + S = S[:rank] + U = U @ torch.diag(S) + Vh = Vh[:rank, :] + + dist = torch.cat([U.flatten(), Vh.flatten()]) + hi_val = torch.quantile(dist, CLAMP_QUANTILE) + low_val = -hi_val + + U = U.clamp(low_val, hi_val) + Vh = Vh.clamp(low_val, hi_val) + if conv2d: + U = U.reshape(out_dim, rank, 1, 1) + Vh = Vh.reshape(rank, in_dim, kernel_size[0], kernel_size[1]) + return (U, Vh) + +class LORAType(Enum): + STANDARD = 0 + FULL_DIFF = 1 + +LORA_TYPES = {"standard": LORAType.STANDARD, + "full_diff": LORAType.FULL_DIFF} + +def calc_lora_model(model_diff, rank, prefix_model, prefix_lora, output_sd, lora_type, algorithm, lowrank_iters, out_dtype, bias_diff=False): + comfy.model_management.load_models_gpu([model_diff], force_patch_weights=True) + sd = model_diff.model_state_dict(filter_prefix=prefix_model) + + # Get total number of keys to process for progress bar + total_keys = len([k for k in sd if k.endswith(".weight") or (bias_diff and k.endswith(".bias"))]) + + # Create progress bar + progress_bar = tqdm(total=total_keys, desc=f"Extracting LoRA ({prefix_lora.strip('.')})") + comfy_pbar = comfy.utils.ProgressBar(total_keys) + + for k in sd: + if k.endswith(".weight"): + weight_diff = sd[k] + if weight_diff.ndim == 5: + logging.info(f"Skipping 5D tensor for key {k}") #skip patch embed + progress_bar.update(1) + comfy_pbar.update(1) + continue + if lora_type == LORAType.STANDARD: + if weight_diff.ndim < 2: + if bias_diff: + output_sd["{}{}.diff".format(prefix_lora, k[len(prefix_model):-7])] = weight_diff.contiguous().to(out_dtype).cpu() + progress_bar.update(1) + comfy_pbar.update(1) + continue + try: + out = extract_lora(weight_diff, rank, algorithm, lowrank_iters) + output_sd["{}{}.lora_up.weight".format(prefix_lora, k[len(prefix_model):-7])] = out[0].contiguous().to(out_dtype).cpu() + output_sd["{}{}.lora_down.weight".format(prefix_lora, k[len(prefix_model):-7])] = out[1].contiguous().to(out_dtype).cpu() + except: + logging.warning("Could not generate lora weights for key {}, is the weight difference a zero?".format(k)) + elif lora_type == LORAType.FULL_DIFF: + output_sd["{}{}.diff".format(prefix_lora, k[len(prefix_model):-7])] = weight_diff.contiguous().to(out_dtype).cpu() + + progress_bar.update(1) + comfy_pbar.update(1) + + elif bias_diff and k.endswith(".bias"): + output_sd["{}{}.diff_b".format(prefix_lora, k[len(prefix_model):-5])] = sd[k].contiguous().to(out_dtype).cpu() + progress_bar.update(1) + comfy_pbar.update(1) + progress_bar.close() + return output_sd + +class LoraExtractKJ: + def __init__(self): + self.output_dir = folder_paths.get_output_directory() + + @classmethod + def INPUT_TYPES(s): + return {"required": + { + "finetuned_model": ("MODEL",), + "original_model": ("MODEL",), + "filename_prefix": ("STRING", {"default": "loras/ComfyUI_extracted_lora"}), + "rank": ("INT", {"default": 8, "min": 1, "max": 4096, "step": 1}), + "lora_type": (tuple(LORA_TYPES.keys()),), + "algorithm": (["svd_linalg", "svd_lowrank"], {"default": "svd", "tooltip": "SVD algorithm to use, svd_lowrank is faster but less accurate."}), + "lowrank_iters": ("INT", {"default": 7, "min": 1, "max": 100, "step": 1, "tooltip": "The number of subspace iterations for lowrank SVD algorithm."}), + "output_dtype": (["fp16", "bf16", "fp32"], {"default": "fp16"}), + "bias_diff": ("BOOLEAN", {"default": True}), + }, + + } + RETURN_TYPES = () + FUNCTION = "save" + OUTPUT_NODE = True + + CATEGORY = "KJNodes/lora" + + def save(self, finetuned_model, original_model, filename_prefix, rank, lora_type, algorithm, lowrank_iters, output_dtype, bias_diff): + dtype = {"fp8_e4m3fn": torch.float8_e4m3fn, "bf16": torch.bfloat16, "fp16": torch.float16, "fp16_fast": torch.float16, "fp32": torch.float32}[output_dtype] + m = finetuned_model.clone() + kp = original_model.get_key_patches("diffusion_model.") + for k in kp: + m.add_patches({k: kp[k]}, - 1.0, 1.0) + model_diff = m + + lora_type = LORA_TYPES.get(lora_type) + full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir) + + output_sd = {} + if model_diff is not None: + output_sd = calc_lora_model(model_diff, rank, "diffusion_model.", "diffusion_model.", output_sd, lora_type, algorithm, lowrank_iters, dtype, bias_diff=bias_diff) + + output_checkpoint = f"{filename}_rank{rank}_{output_dtype}_{counter:05}_.safetensors" + output_checkpoint = os.path.join(full_output_folder, output_checkpoint) + + comfy.utils.save_torch_file(output_sd, output_checkpoint, metadata=None) + return {} + +NODE_CLASS_MAPPINGS = { + "LoraExtractKJ": LoraExtractKJ +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "LoraExtractKJ": "LoraExtractKJ" +}