From 9f7cf610dab957d4b4ff3a2aa1da7cf3b0b908a1 Mon Sep 17 00:00:00 2001 From: kijai <40791699+kijai@users.noreply.github.com> Date: Thu, 18 Sep 2025 18:15:35 +0300 Subject: [PATCH] Update model_optimization_nodes.py --- nodes/model_optimization_nodes.py | 203 ++++++++++++++++-------------- 1 file changed, 106 insertions(+), 97 deletions(-) diff --git a/nodes/model_optimization_nodes.py b/nodes/model_optimization_nodes.py index c959c64..9c1ddf2 100644 --- a/nodes/model_optimization_nodes.py +++ b/nodes/model_optimization_nodes.py @@ -9,7 +9,12 @@ import comfy.model_management as mm from comfy.cli_args import args from typing import Optional, Tuple import importlib -from comfy_api.latest import io +try: + from comfy_api.latest import io + v3_available = True +except ImportError: + v3_available = False + logging.warning("ComfyUI v3 node API not available, please update ComfyUI to access latest v3 nodes.") sageattn_modes = ["disabled", "auto", "sageattn_qk_int8_pv_fp16_cuda", "sageattn_qk_int8_pv_fp16_triton", "sageattn_qk_int8_pv_fp8_cuda", "sageattn_qk_int8_pv_fp8_cuda++"] @@ -1913,106 +1918,110 @@ class CFGZeroStarAndInit: m.set_model_sampler_cfg_function(cfg_zerostar) return (m, ) +if v3_available: + class GGUFLoaderKJ(io.ComfyNode): + gguf_nodes = None -class GGUFLoaderKJ(io.ComfyNode): - @classmethod - def __init__(cls): - try: - cls.gguf_nodes = importlib.import_module("ComfyUI-GGUF") - except ImportError: - try: - cls.gguf_nodes = importlib.import_module("comfyui-gguf") - except ImportError: + @classmethod + def _ensure_gguf_loaded(cls): + if cls.gguf_nodes is None: + try: + cls.gguf_nodes = importlib.import_module("ComfyUI-GGUF") + except ImportError: + try: + cls.gguf_nodes = importlib.import_module("comfyui-gguf") + except ImportError: + raise ImportError("This node requires ComfyUI-GGUF to be installed.") + + @classmethod + def define_schema(cls): + cls._ensure_gguf_loaded() + return io.Schema( + node_id="GGUFLoaderKJ", + category="KJNodes/experimental", + description="Loads a GGUF model with advanced options, requires [ComfyUI-GGUF](https://github.com/city96/ComfyUI-GGUF) to be installed.", + is_experimental=True, + inputs=[ + io.Combo.Input("model_name", options=[x for x in folder_paths.get_filename_list("unet_gguf")]), + io.Combo.Input("extra_model_name", options=[x for x in folder_paths.get_filename_list("unet_gguf")] + ["none"], default="none", tooltip="An extra gguf model to load and merge into the main model, for example VACE module"), + io.Combo.Input("dequant_dtype", options=["default", "target", "float32", "float16", "bfloat16"], default="default"), + io.Combo.Input("patch_dtype", options=["default", "target", "float32", "float16", "bfloat16"], default="default"), + io.Boolean.Input("patch_on_device", default=False), + io.Boolean.Input("enable_fp16_accumulation", default=False, tooltip="Enable torch.backends.cuda.matmul.allow_fp16_accumulation, required minimum pytorch version 2.7.1"), + io.Combo.Input("attention_override", options=["none", "sdpa", "sageattn", "xformers", "flashattn"], default="none", tooltip="Overrides the used attention implementation, requires the respective library to be installed"), + + ], + outputs=[io.Model.Output(),], + ) + + def attention_override_pytorch(func, *args, **kwargs): + new_attention = comfy.ldm.modules.attention.attention_pytorch + return new_attention.__wrapped__(*args, **kwargs) + def attention_override_sage(func, *args, **kwargs): + new_attention = comfy.ldm.modules.attention.attention_sage + return new_attention.__wrapped__(*args, **kwargs) + def attention_override_xformers(func, *args, **kwargs): + new_attention = comfy.ldm.modules.attention.attention_xformers + return new_attention.__wrapped__(*args, **kwargs) + def attention_override_flash(func, *args, **kwargs): + new_attention = comfy.ldm.modules.attention.attention_flash + return new_attention.__wrapped__(*args, **kwargs) + + ATTENTION_OVERRIDES = { + "sdpa": attention_override_pytorch, + "sageattn": attention_override_sage, + "xformers": attention_override_xformers, + "flashattn": attention_override_flash, + } + + @classmethod + def execute(cls, model_name, extra_model_name, dequant_dtype, patch_dtype, patch_on_device, attention_override, enable_fp16_accumulation): + if cls.gguf_nodes is None: raise ImportError("This node requires ComfyUI-GGUF to be installed.") + ops = cls.gguf_nodes.ops.GGMLOps() - @classmethod - def define_schema(cls): - return io.Schema( - node_id="GGUFLoaderKJ", - category="KJNodes/experimental", - description="Loads a GGUF model with advanced options, requires [ComfyUI-GGUF](https://github.com/city96/ComfyUI-GGUF) to be installed.", - is_experimental=True, - inputs=[ - io.Combo.Input("model_name", options=[x for x in folder_paths.get_filename_list("unet_gguf")]), - io.Combo.Input("extra_model_name", options=[x for x in folder_paths.get_filename_list("unet_gguf")] + ["none"], default="none", tooltip="An extra gguf model to load and merge into the main model, for example VACE module"), - io.Combo.Input("dequant_dtype", options=["default", "target", "float32", "float16", "bfloat16"], default="default"), - io.Combo.Input("patch_dtype", options=["default", "target", "float32", "float16", "bfloat16"], default="default"), - io.Boolean.Input("patch_on_device", default=False), - io.Boolean.Input("enable_fp16_accumulation", default=False, tooltip="Enable torch.backends.cuda.matmul.allow_fp16_accumulation, required minimum pytorch version 2.7.1"), - io.Combo.Input("attention_override", options=["none", "sdpa", "sageattn", "xformers", "flashattn"], default="none", tooltip="Overrides the used attention implementation, requires the respective library to be installed"), + def set_linear_dtype(attr, value): + if value == "default": + setattr(ops.Linear, attr, None) + elif value == "target": + setattr(ops.Linear, attr, value) + else: + setattr(ops.Linear, attr, getattr(torch, value)) - ], - outputs=[io.Model.Output(),], - ) - - def attention_override_pytorch(func, *args, **kwargs): - new_attention = comfy.ldm.modules.attention.attention_pytorch - return new_attention.__wrapped__(*args, **kwargs) - def attention_override_sage(func, *args, **kwargs): - new_attention = comfy.ldm.modules.attention.attention_sage - return new_attention.__wrapped__(*args, **kwargs) - def attention_override_xformers(func, *args, **kwargs): - new_attention = comfy.ldm.modules.attention.attention_xformers - return new_attention.__wrapped__(*args, **kwargs) - def attention_override_flash(func, *args, **kwargs): - new_attention = comfy.ldm.modules.attention.attention_flash - return new_attention.__wrapped__(*args, **kwargs) - - ATTENTION_OVERRIDES = { - "sdpa": attention_override_pytorch, - "sageattn": attention_override_sage, - "xformers": attention_override_xformers, - "flashattn": attention_override_flash, - } - - @classmethod - def execute(cls, model_name, extra_model_name, dequant_dtype, patch_dtype, patch_on_device, attention_override, enable_fp16_accumulation): - if cls.gguf_nodes is None: - raise ImportError("This node requires ComfyUI-GGUF to be installed.") - ops = cls.gguf_nodes.ops.GGMLOps() + set_linear_dtype("dequant_dtype", dequant_dtype) + set_linear_dtype("patch_dtype", patch_dtype) - def set_linear_dtype(attr, value): - if value == "default": - setattr(ops.Linear, attr, None) - elif value == "target": - setattr(ops.Linear, attr, value) + # init model + model_path = folder_paths.get_full_path("unet", model_name) + sd = cls.gguf_nodes.loader.gguf_sd_loader(model_path) + + if extra_model_name is not None and extra_model_name != "none": + if not extra_model_name.endswith(".gguf"): + raise ValueError("Extra model must also be a .gguf file") + extra_model_full_path = folder_paths.get_full_path("unet", extra_model_name) + extra_model = cls.gguf_nodes.loader.gguf_sd_loader(extra_model_full_path) + sd.update(extra_model) + + model = comfy.sd.load_diffusion_model_state_dict( + sd, model_options={"custom_operations": ops} + ) + if model is None: + raise RuntimeError(f"ERROR: Could not detect model type of: {model_path}") + + model = cls.gguf_nodes.nodes.GGUFModelPatcher.clone(model) + model.patch_on_device = patch_on_device + + # attention override + if attention_override in cls.ATTENTION_OVERRIDES: + model.model_options["transformer_options"]["optimized_attention_override"] = cls.ATTENTION_OVERRIDES[attention_override] + + if enable_fp16_accumulation: + if hasattr(torch.backends.cuda.matmul, "allow_fp16_accumulation"): + torch.backends.cuda.matmul.allow_fp16_accumulation = True + else: + raise RuntimeError("Failed to set fp16 accumulation, requires pytorch version 2.7.1 or higher") else: - setattr(ops.Linear, attr, getattr(torch, value)) + if hasattr(torch.backends.cuda.matmul, "allow_fp16_accumulation"): + torch.backends.cuda.matmul.allow_fp16_accumulation = False - set_linear_dtype("dequant_dtype", dequant_dtype) - set_linear_dtype("patch_dtype", patch_dtype) - - # init model - model_path = folder_paths.get_full_path("unet", model_name) - sd = cls.gguf_nodes.loader.gguf_sd_loader(model_path) - - if extra_model_name is not None and extra_model_name != "none": - if not extra_model_name.endswith(".gguf"): - raise ValueError("Extra model must also be a .gguf file") - extra_model_full_path = folder_paths.get_full_path("unet", extra_model_name) - extra_model = cls.gguf_nodes.loader.gguf_sd_loader(extra_model_full_path) - sd.update(extra_model) - - model = comfy.sd.load_diffusion_model_state_dict( - sd, model_options={"custom_operations": ops} - ) - if model is None: - raise RuntimeError(f"ERROR: Could not detect model type of: {model_path}") - - model = cls.gguf_nodes.nodes.GGUFModelPatcher.clone(model) - model.patch_on_device = patch_on_device - - # attention override - if attention_override in cls.ATTENTION_OVERRIDES: - model.model_options["transformer_options"]["optimized_attention_override"] = cls.ATTENTION_OVERRIDES[attention_override] - - if enable_fp16_accumulation: - if hasattr(torch.backends.cuda.matmul, "allow_fp16_accumulation"): - torch.backends.cuda.matmul.allow_fp16_accumulation = True - else: - raise RuntimeError("Failed to set fp16 accumulation, requires pytorch version 2.7.1 or higher") - else: - if hasattr(torch.backends.cuda.matmul, "allow_fp16_accumulation"): - torch.backends.cuda.matmul.allow_fp16_accumulation = False - - return io.NodeOutput(model,) \ No newline at end of file + return io.NodeOutput(model,) \ No newline at end of file