From cedea479027303837fd4022fa63b498b0087d766 Mon Sep 17 00:00:00 2001 From: Dango233 Date: Tue, 28 Oct 2025 22:28:43 -0400 Subject: [PATCH 1/3] Fix LoRA extraction for scaled fp8 models --- nodes/lora_nodes.py | 92 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 78 insertions(+), 14 deletions(-) diff --git a/nodes/lora_nodes.py b/nodes/lora_nodes.py index 926c6a3..62ea0dd 100644 --- a/nodes/lora_nodes.py +++ b/nodes/lora_nodes.py @@ -1,6 +1,7 @@ import torch import comfy.model_management import comfy.utils +import comfy.lora import folder_paths import os import logging @@ -11,6 +12,50 @@ device = comfy.model_management.get_torch_device() CLAMP_QUANTILE = 0.99 + +def _resolve_weight_from_patches(patches, key): + base_weight, convert_func = patches[0] + weight_tensor = comfy.model_management.cast_to_device( + base_weight, torch.device("cpu"), torch.float32, copy=True + ) + try: + weight_tensor = convert_func(weight_tensor, inplace=True) + except TypeError: + weight_tensor = convert_func(weight_tensor) + + if len(patches) > 1: + weight_tensor = comfy.lora.calculate_weight( + patches[1:], + weight_tensor, + key, + intermediate_dtype=torch.float32, + original_weights={key: patches}, + ) + + return weight_tensor + + +def _build_scaled_fp8_diff(finetuned_model, original_model, prefix, bias_diff): + finetuned_patches = finetuned_model.get_key_patches(prefix) + original_patches = original_model.get_key_patches(prefix) + + common_keys = set(finetuned_patches.keys()).intersection(original_patches.keys()) + diff_sd = {} + + for key in common_keys: + is_weight = key.endswith(".weight") + is_bias = key.endswith(".bias") + + if not is_weight and not (bias_diff and is_bias): + continue + + ft_tensor = _resolve_weight_from_patches(finetuned_patches[key], key) + orig_tensor = _resolve_weight_from_patches(original_patches[key], key) + + diff_sd[key] = ft_tensor.sub(orig_tensor) + + return diff_sd + def extract_lora(diff, key, rank, algorithm, lora_type, lowrank_iters=7, adaptive_param=1.0, clamp_quantile=True): """ Extracts LoRA weights from a weight difference tensor using SVD. @@ -99,15 +144,18 @@ def extract_lora(diff, key, rank, algorithm, lora_type, lowrank_iters=7, adaptiv return (U, Vh) -def calc_lora_model(model_diff, rank, prefix_model, prefix_lora, output_sd, lora_type, algorithm, lowrank_iters, out_dtype, bias_diff=False, adaptive_param=1.0, clamp_quantile=True): - comfy.model_management.load_models_gpu([model_diff], force_patch_weights=True) - model_diff.model.diffusion_model.cpu() - sd = model_diff.model_state_dict(filter_prefix=prefix_model) - del model_diff - comfy.model_management.soft_empty_cache() - for k, v in sd.items(): - if isinstance(v, torch.Tensor): - sd[k] = v.cpu() +def calc_lora_model(model_diff, rank, prefix_model, prefix_lora, output_sd, lora_type, algorithm, lowrank_iters, out_dtype, bias_diff=False, adaptive_param=1.0, clamp_quantile=True, sd_override=None): + if sd_override is None: + comfy.model_management.load_models_gpu([model_diff], force_patch_weights=True) + model_diff.model.diffusion_model.cpu() + sd = model_diff.model_state_dict(filter_prefix=prefix_model) + del model_diff + comfy.model_management.soft_empty_cache() + for k, v in sd.items(): + if isinstance(v, torch.Tensor): + sd[k] = v.cpu() + else: + sd = sd_override # Get total number of keys to process for progress bar total_keys = len([k for k in sd if k.endswith(".weight") or (bias_diff and k.endswith(".bias"))]) @@ -183,17 +231,33 @@ class LoraExtractKJ: raise ValueError("svd_lowrank algorithm is only supported for standard LoRA extraction.") dtype = {"fp8_e4m3fn": torch.float8_e4m3fn, "bf16": torch.bfloat16, "fp16": torch.float16, "fp16_fast": torch.float16, "fp32": torch.float32}[output_dtype] - m = finetuned_model.clone() - kp = original_model.get_key_patches("diffusion_model.") - for k in kp: - m.add_patches({k: kp[k]}, - 1.0, 1.0) - model_diff = m + + model_diff = None + sd_override = None + + scaled_fp8_ft = getattr(getattr(finetuned_model.model, "model_config", None), "scaled_fp8", None) + scaled_fp8_orig = getattr(getattr(original_model.model, "model_config", None), "scaled_fp8", None) + + if scaled_fp8_ft is not None and scaled_fp8_orig is not None: + comfy.model_management.load_models_gpu([finetuned_model, original_model], force_patch_weights=True) + sd_override = _build_scaled_fp8_diff( + finetuned_model, original_model, "diffusion_model.", bias_diff + ) + comfy.model_management.soft_empty_cache() + else: + m = finetuned_model.clone() + kp = original_model.get_key_patches("diffusion_model.") + for k in kp: + m.add_patches({k: kp[k]}, - 1.0, 1.0) + model_diff = m full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir) output_sd = {} if model_diff is not None: output_sd = calc_lora_model(model_diff, rank, "diffusion_model.", "diffusion_model.", output_sd, lora_type, algorithm, lowrank_iters, dtype, bias_diff=bias_diff, adaptive_param=adaptive_param, clamp_quantile=clamp_quantile) + elif sd_override is not None: + output_sd = calc_lora_model(None, rank, "diffusion_model.", "diffusion_model.", output_sd, lora_type, algorithm, lowrank_iters, dtype, bias_diff=bias_diff, adaptive_param=adaptive_param, clamp_quantile=clamp_quantile, sd_override=sd_override) if "adaptive" in lora_type: rank_str = f"{lora_type}_{adaptive_param:.2f}" else: From e6ee59b4c2cd9eb95a8647951beefc69cff10349 Mon Sep 17 00:00:00 2001 From: Dango233 Date: Tue, 28 Oct 2025 22:30:26 -0400 Subject: [PATCH 2/3] Log when scaled fp8 diff path is used --- nodes/lora_nodes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nodes/lora_nodes.py b/nodes/lora_nodes.py index 62ea0dd..543757f 100644 --- a/nodes/lora_nodes.py +++ b/nodes/lora_nodes.py @@ -240,6 +240,9 @@ class LoraExtractKJ: if scaled_fp8_ft is not None and scaled_fp8_orig is not None: comfy.model_management.load_models_gpu([finetuned_model, original_model], force_patch_weights=True) + logging.info( + "LoraExtractKJ: detected scaled fp8 weights on both models; using high-precision diff path." + ) sd_override = _build_scaled_fp8_diff( finetuned_model, original_model, "diffusion_model.", bias_diff ) From 8643d75a6b98dfd1f39eb97ea53e1c927314200a Mon Sep 17 00:00:00 2001 From: Dango233 Date: Tue, 28 Oct 2025 22:40:05 -0400 Subject: [PATCH 3/3] Extend fp8 diff path when either model is scaled --- nodes/lora_nodes.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/nodes/lora_nodes.py b/nodes/lora_nodes.py index 543757f..6b60cfc 100644 --- a/nodes/lora_nodes.py +++ b/nodes/lora_nodes.py @@ -237,11 +237,14 @@ class LoraExtractKJ: scaled_fp8_ft = getattr(getattr(finetuned_model.model, "model_config", None), "scaled_fp8", None) scaled_fp8_orig = getattr(getattr(original_model.model, "model_config", None), "scaled_fp8", None) + scaled_fp8_present = scaled_fp8_ft is not None or scaled_fp8_orig is not None - if scaled_fp8_ft is not None and scaled_fp8_orig is not None: + if scaled_fp8_present: comfy.model_management.load_models_gpu([finetuned_model, original_model], force_patch_weights=True) logging.info( - "LoraExtractKJ: detected scaled fp8 weights on both models; using high-precision diff path." + "LoraExtractKJ: detected scaled fp8 weights (finetuned=%s, original=%s); using high-precision diff path.", + scaled_fp8_ft is not None, + scaled_fp8_orig is not None, ) sd_override = _build_scaled_fp8_diff( finetuned_model, original_model, "diffusion_model.", bias_diff