From ab8cac5396db7caffb3e73ca171a3684c8daea2d Mon Sep 17 00:00:00 2001
From: kijai <40791699+kijai@users.noreply.github.com>
Date: Wed, 16 Jul 2025 14:02:31 +0300
Subject: [PATCH] Add LoraExtractKJ

Improved Lora extraction node
- build in diff substraction
- lowrank algo for quick extraction
- dtype selection
---
 __init__.py         |   4 ++
 nodes/lora_nodes.py | 154 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 158 insertions(+)
 create mode 100644 nodes/lora_nodes.py

diff --git a/__init__.py b/__init__.py
index 677d6a3..3a950fc 100644
--- a/__init__.py
+++ b/__init__.py
@@ -6,6 +6,7 @@ from .nodes.image_nodes import *
 from .nodes.intrinsic_lora_nodes import *
 from .nodes.mask_nodes import *
 from .nodes.model_optimization_nodes import *
+from .nodes.lora_nodes import *
 NODE_CONFIG = {
     #constants
     "BOOLConstant": {"class": BOOLConstant, "name": "BOOL Constant"},
@@ -203,6 +204,9 @@ NODE_CONFIG = {
     "CreateInstanceDiffusionTracking": {"class": CreateInstanceDiffusionTracking},
     "AppendInstanceDiffusionTracking": {"class": AppendInstanceDiffusionTracking},
     "DrawInstanceDiffusionTracking": {"class": DrawInstanceDiffusionTracking},
+
+    #lora
+    "LoraExtractKJ": {"class": LoraExtractKJ, "name": "LoraExtractKJ"},
 }
 
 def generate_node_mappings(node_config):
diff --git a/nodes/lora_nodes.py b/nodes/lora_nodes.py
new file mode 100644
index 0000000..ab9fcfb
--- /dev/null
+++ b/nodes/lora_nodes.py
@@ -0,0 +1,154 @@
+import torch
+import comfy.model_management
+import comfy.utils
+import folder_paths
+import os
+import logging
+from enum import Enum
+from tqdm import tqdm
+
+CLAMP_QUANTILE = 0.99
+
+def extract_lora(diff, rank, algorithm, lowrank_iters=7):
+    conv2d = (len(diff.shape) == 4)
+    kernel_size = None if not conv2d else diff.size()[2:4]
+    conv2d_3x3 = conv2d and kernel_size != (1, 1)
+    out_dim, in_dim = diff.size()[0:2]
+    rank = min(rank, in_dim, out_dim)
+
+    if conv2d:
+        if conv2d_3x3:
+            diff = diff.flatten(start_dim=1)
+        else:
+            diff = diff.squeeze()
+
+    diff_float = diff.float()
+
+    if algorithm == "svd_lowrank":
+        U, S, V = torch.svd_lowrank(diff_float, q=rank, niter=lowrank_iters)
+        U = U @ torch.diag(S)
+        Vh = V.t()
+    else:
+        U, S, Vh = torch.linalg.svd(diff_float)
+        U = U[:, :rank]
+        S = S[:rank]
+        U = U @ torch.diag(S)
+        Vh = Vh[:rank, :]
+
+    dist = torch.cat([U.flatten(), Vh.flatten()])
+    hi_val = torch.quantile(dist, CLAMP_QUANTILE)
+    low_val = -hi_val
+
+    U = U.clamp(low_val, hi_val)
+    Vh = Vh.clamp(low_val, hi_val)
+    if conv2d:
+        U = U.reshape(out_dim, rank, 1, 1)
+        Vh = Vh.reshape(rank, in_dim, kernel_size[0], kernel_size[1])
+    return (U, Vh)
+
+class LORAType(Enum):
+    STANDARD = 0
+    FULL_DIFF = 1
+
+LORA_TYPES = {"standard": LORAType.STANDARD,
+              "full_diff": LORAType.FULL_DIFF}
+
+def calc_lora_model(model_diff, rank, prefix_model, prefix_lora, output_sd, lora_type, algorithm, lowrank_iters, out_dtype, bias_diff=False):
+    comfy.model_management.load_models_gpu([model_diff], force_patch_weights=True)
+    sd = model_diff.model_state_dict(filter_prefix=prefix_model)
+
+    # Get total number of keys to process for progress bar
+    total_keys = len([k for k in sd if k.endswith(".weight") or (bias_diff and k.endswith(".bias"))])
+    
+    # Create progress bar
+    progress_bar = tqdm(total=total_keys, desc=f"Extracting LoRA ({prefix_lora.strip('.')})")
+    comfy_pbar = comfy.utils.ProgressBar(total_keys)
+
+    for k in sd:
+        if k.endswith(".weight"):
+            weight_diff = sd[k]
+            if weight_diff.ndim == 5:
+                logging.info(f"Skipping 5D tensor for key {k}") #skip patch embed
+                progress_bar.update(1)
+                comfy_pbar.update(1)
+                continue
+            if lora_type == LORAType.STANDARD:
+                if weight_diff.ndim < 2:
+                    if bias_diff:
+                        output_sd["{}{}.diff".format(prefix_lora, k[len(prefix_model):-7])] = weight_diff.contiguous().to(out_dtype).cpu()
+                    progress_bar.update(1)
+                    comfy_pbar.update(1)
+                    continue
+                try:
+                    out = extract_lora(weight_diff, rank, algorithm, lowrank_iters)
+                    output_sd["{}{}.lora_up.weight".format(prefix_lora, k[len(prefix_model):-7])] = out[0].contiguous().to(out_dtype).cpu()
+                    output_sd["{}{}.lora_down.weight".format(prefix_lora, k[len(prefix_model):-7])] = out[1].contiguous().to(out_dtype).cpu()
+                except:
+                    logging.warning("Could not generate lora weights for key {}, is the weight difference a zero?".format(k))
+            elif lora_type == LORAType.FULL_DIFF:
+                output_sd["{}{}.diff".format(prefix_lora, k[len(prefix_model):-7])] = weight_diff.contiguous().to(out_dtype).cpu()
+            
+            progress_bar.update(1)
+            comfy_pbar.update(1)
+
+        elif bias_diff and k.endswith(".bias"):
+            output_sd["{}{}.diff_b".format(prefix_lora, k[len(prefix_model):-5])] = sd[k].contiguous().to(out_dtype).cpu()
+            progress_bar.update(1)
+            comfy_pbar.update(1)
+    progress_bar.close()
+    return output_sd
+
+class LoraExtractKJ:
+    def __init__(self):
+        self.output_dir = folder_paths.get_output_directory()
+
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": 
+                {
+                    "finetuned_model": ("MODEL",),
+                    "original_model": ("MODEL",),
+                    "filename_prefix": ("STRING", {"default": "loras/ComfyUI_extracted_lora"}),
+                    "rank": ("INT", {"default": 8, "min": 1, "max": 4096, "step": 1}),
+                    "lora_type": (tuple(LORA_TYPES.keys()),),
+                    "algorithm": (["svd_linalg", "svd_lowrank"], {"default": "svd", "tooltip": "SVD algorithm to use, svd_lowrank is faster but less accurate."}),
+                    "lowrank_iters": ("INT", {"default": 7, "min": 1, "max": 100, "step": 1, "tooltip": "The number of subspace iterations for lowrank SVD algorithm."}),
+                    "output_dtype": (["fp16", "bf16", "fp32"], {"default": "fp16"}),
+                    "bias_diff": ("BOOLEAN", {"default": True}),
+                },
+
+    }
+    RETURN_TYPES = ()
+    FUNCTION = "save"
+    OUTPUT_NODE = True
+
+    CATEGORY = "KJNodes/lora"
+
+    def save(self, finetuned_model, original_model, filename_prefix, rank, lora_type, algorithm, lowrank_iters, output_dtype, bias_diff):
+        dtype = {"fp8_e4m3fn": torch.float8_e4m3fn, "bf16": torch.bfloat16, "fp16": torch.float16, "fp16_fast": torch.float16, "fp32": torch.float32}[output_dtype]
+        m = finetuned_model.clone()
+        kp = original_model.get_key_patches("diffusion_model.")
+        for k in kp:
+            m.add_patches({k: kp[k]}, - 1.0, 1.0)
+        model_diff = m
+
+        lora_type = LORA_TYPES.get(lora_type)
+        full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir)
+
+        output_sd = {}
+        if model_diff is not None:
+            output_sd = calc_lora_model(model_diff, rank, "diffusion_model.", "diffusion_model.", output_sd, lora_type, algorithm, lowrank_iters, dtype, bias_diff=bias_diff)
+
+        output_checkpoint = f"{filename}_rank{rank}_{output_dtype}_{counter:05}_.safetensors"
+        output_checkpoint = os.path.join(full_output_folder, output_checkpoint)
+
+        comfy.utils.save_torch_file(output_sd, output_checkpoint, metadata=None)
+        return {}
+
+NODE_CLASS_MAPPINGS = {
+    "LoraExtractKJ": LoraExtractKJ
+}
+
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "LoraExtractKJ": "LoraExtractKJ"
+}