Add GGUFLoaderKJ

GGUF loader to extend ComfyUI-GGUF -nodes, requires it installed. Allows loading separate VACE GGUF modules
2025-12-09 04:44:30 +08:00 · 2025-09-17 17:50:18 +03:00 · 2025-09-17 17:50:18 +03:00 · 39d152e042
commit 39d152e042
parent ffd4d1c908
2 changed files with 109 additions and 4 deletions
--- a/init.py
+++ b/init.py
@ -205,6 +205,7 @@ NODE_CONFIG = {
    "CFGZeroStarAndInit": {"class": CFGZeroStarAndInit, "name": "CFG Zero Star/Init"},
    "ModelPatchTorchSettings": {"class": ModelPatchTorchSettings, "name": "Model Patch Torch Settings"},
    "WanVideoNAG": {"class": WanVideoNAG, "name": "WanVideoNAG"},
+    "GGUFLoaderKJ": {"class": GGUFLoaderKJ, "name": "GGUF Loader KJ"},

    #instance diffusion
    "CreateInstanceDiffusionTracking": {"class": CreateInstanceDiffusionTracking},
--- a/nodes/model_optimization_nodes.py
+++ b/nodes/model_optimization_nodes.py
@ -8,7 +8,8 @@ import folder_paths
 import comfy.model_management as mm
 from comfy.cli_args import args
 from typing import Optional, Tuple
-
+import importlib
+from comfy_api.latest import io

 sageattn_modes = ["disabled", "auto", "sageattn_qk_int8_pv_fp16_cuda", "sageattn_qk_int8_pv_fp16_triton", "sageattn_qk_int8_pv_fp8_cuda", "sageattn_qk_int8_pv_fp8_cuda++"]

@ -239,7 +240,7 @@ class CheckpointLoaderKJ(BaseLoaderKJ):
            "compute_dtype": (["default", "fp16", "bf16", "fp32"], {"default": "default", "tooltip": "The compute dtype to use for the model."}),
            "patch_cublaslinear": ("BOOLEAN", {"default": False, "tooltip": "Enable or disable the patching, won't take effect on already loaded models!"}),
            "sage_attention": (sageattn_modes, {"default": False, "tooltip": "Patch comfy attention to use sageattn."}),
-            "enable_fp16_accumulation": ("BOOLEAN", {"default": False, "tooltip": "Enable torch.backends.cuda.matmul.allow_fp16_accumulation, requires pytorch 2.7.0 nightly."}),
+            "enable_fp16_accumulation": ("BOOLEAN", {"default": False, "tooltip": "Enable torch.backends.cuda.matmul.allow_fp16_accumulation, required minimum pytorch version 2.7.1"}),
        }}

    RETURN_TYPES = ("MODEL", "CLIP", "VAE")
@ -285,7 +286,7 @@ class CheckpointLoaderKJ(BaseLoaderKJ):
            if hasattr(torch.backends.cuda.matmul, "allow_fp16_accumulation"):
                torch.backends.cuda.matmul.allow_fp16_accumulation = True
            else:
-                raise RuntimeError("Failed to set fp16 accumulation, this requires pytorch 2.7.0 nightly currently")
+                raise RuntimeError("Failed to set fp16 accumulation, requires pytorch version 2.7.1 or higher")
        else:
            if hasattr(torch.backends.cuda.matmul, "allow_fp16_accumulation"):
                torch.backends.cuda.matmul.allow_fp16_accumulation = False
@ -1910,4 +1911,107 @@ class CFGZeroStarAndInit:

        m = model.clone()
        m.set_model_sampler_cfg_function(cfg_zerostar)
-        return (m, )
+        return (m, )
+    
+
+class GGUFLoaderKJ(io.ComfyNode):
+    @classmethod
+    def __init__(cls):
+        try:
+            cls.gguf_nodes = importlib.import_module("ComfyUI-GGUF")
+        except ImportError:
+            try:
+                cls.gguf_nodes = importlib.import_module("comfyui-gguf")
+            except ImportError:
+                raise ImportError("This node requires ComfyUI-GGUF to be installed.")
+
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="GGUFLoaderKJ",
+            category="KJNodes/experimental",
+            is_experimental=True,
+            inputs=[
+                io.Combo.Input("model_name", options=[x for x in folder_paths.get_filename_list("unet_gguf")]),
+                io.Combo.Input("extra_model_name", options=[x for x in folder_paths.get_filename_list("unet_gguf")] + ["none"], default="none", tooltip="An extra gguf model to load and merge into the main model, for example VACE module"),
+                io.Combo.Input("dequant_dtype", options=["default", "target", "float32", "float16", "bfloat16"], default="default"),
+                io.Combo.Input("patch_dtype", options=["default", "target", "float32", "float16", "bfloat16"], default="default"),
+                io.Boolean.Input("patch_on_device", default=False),
+                io.Boolean.Input("enable_fp16_accumulation", default=False, tooltip="Enable torch.backends.cuda.matmul.allow_fp16_accumulation, required minimum pytorch version 2.7.1"),
+                io.Combo.Input("attention_override", options=["none", "sdpa", "sageattn", "xformers", "flashattn"], default="none", tooltip="Overrides the used attention implementation, requires the respective library to be installed"),
+
+            ],
+            outputs=[io.Model.Output(),],
+        )
+    
+    def attention_override_pytorch(func, *args, **kwargs):
+        new_attention = comfy.ldm.modules.attention.attention_pytorch
+        return new_attention.__wrapped__(*args, **kwargs)
+    def attention_override_sage(func, *args, **kwargs):
+        new_attention = comfy.ldm.modules.attention.attention_sage
+        return new_attention.__wrapped__(*args, **kwargs)
+    def attention_override_xformers(func, *args, **kwargs):
+        new_attention = comfy.ldm.modules.attention.attention_xformers
+        return new_attention.__wrapped__(*args, **kwargs)
+    def attention_override_flash(func, *args, **kwargs):
+        new_attention = comfy.ldm.modules.attention.attention_flash
+        return new_attention.__wrapped__(*args, **kwargs)
+    
+    ATTENTION_OVERRIDES = {
+        "sdpa": attention_override_pytorch,
+        "sageattn": attention_override_sage,
+        "xformers": attention_override_xformers,
+        "flashattn": attention_override_flash,
+    }
+    
+    @classmethod
+    def execute(cls, model_name, extra_model_name, dequant_dtype, patch_dtype, patch_on_device, attention_override, enable_fp16_accumulation):
+        if cls.gguf_nodes is None:
+            raise ImportError("This node requires ComfyUI-GGUF to be installed.")
+        ops = cls.gguf_nodes.ops.GGMLOps()
+
+        def set_linear_dtype(attr, value):
+            if value == "default":
+                setattr(ops.Linear, attr, None)
+            elif value == "target":
+                setattr(ops.Linear, attr, value)
+            else:
+                setattr(ops.Linear, attr, getattr(torch, value))
+
+        set_linear_dtype("dequant_dtype", dequant_dtype)
+        set_linear_dtype("patch_dtype", patch_dtype)
+
+        # init model
+        model_path = folder_paths.get_full_path("unet", model_name)
+        sd = cls.gguf_nodes.loader.gguf_sd_loader(model_path)
+
+        if extra_model_name is not None and extra_model_name != "none":
+            if not extra_model_name.endswith(".gguf"):
+                raise ValueError("Extra model must also be a .gguf file")
+            extra_model_full_path = folder_paths.get_full_path("unet", extra_model_name)
+            extra_model = cls.gguf_nodes.loader.gguf_sd_loader(extra_model_full_path)
+            sd.update(extra_model)
+
+        model = comfy.sd.load_diffusion_model_state_dict(
+            sd, model_options={"custom_operations": ops}
+        )
+        if model is None:
+            raise RuntimeError(f"ERROR: Could not detect model type of: {model_path}")
+        
+        model = cls.gguf_nodes.nodes.GGUFModelPatcher.clone(model)
+        model.patch_on_device = patch_on_device
+
+        # attention override
+        if attention_override in cls.ATTENTION_OVERRIDES:
+            model.model_options["transformer_options"]["optimized_attention_override"] = cls.ATTENTION_OVERRIDES[attention_override]
+        
+        if enable_fp16_accumulation:
+            if hasattr(torch.backends.cuda.matmul, "allow_fp16_accumulation"):
+                torch.backends.cuda.matmul.allow_fp16_accumulation = True
+            else:
+                raise RuntimeError("Failed to set fp16 accumulation, requires pytorch version 2.7.1 or higher")
+        else:
+            if hasattr(torch.backends.cuda.matmul, "allow_fp16_accumulation"):
+                torch.backends.cuda.matmul.allow_fp16_accumulation = False
+
+        return io.NodeOutput(model,)