Add GGUFLoaderKJ

GGUF loader to extend ComfyUI-GGUF -nodes, requires it installed.
Allows loading separate VACE GGUF modules
This commit is contained in:
kijai 2025-09-17 17:50:18 +03:00
parent ffd4d1c908
commit 39d152e042
2 changed files with 109 additions and 4 deletions

View File

@ -205,6 +205,7 @@ NODE_CONFIG = {
"CFGZeroStarAndInit": {"class": CFGZeroStarAndInit, "name": "CFG Zero Star/Init"},
"ModelPatchTorchSettings": {"class": ModelPatchTorchSettings, "name": "Model Patch Torch Settings"},
"WanVideoNAG": {"class": WanVideoNAG, "name": "WanVideoNAG"},
"GGUFLoaderKJ": {"class": GGUFLoaderKJ, "name": "GGUF Loader KJ"},
#instance diffusion
"CreateInstanceDiffusionTracking": {"class": CreateInstanceDiffusionTracking},

View File

@ -8,7 +8,8 @@ import folder_paths
import comfy.model_management as mm
from comfy.cli_args import args
from typing import Optional, Tuple
import importlib
from comfy_api.latest import io
sageattn_modes = ["disabled", "auto", "sageattn_qk_int8_pv_fp16_cuda", "sageattn_qk_int8_pv_fp16_triton", "sageattn_qk_int8_pv_fp8_cuda", "sageattn_qk_int8_pv_fp8_cuda++"]
@ -239,7 +240,7 @@ class CheckpointLoaderKJ(BaseLoaderKJ):
"compute_dtype": (["default", "fp16", "bf16", "fp32"], {"default": "default", "tooltip": "The compute dtype to use for the model."}),
"patch_cublaslinear": ("BOOLEAN", {"default": False, "tooltip": "Enable or disable the patching, won't take effect on already loaded models!"}),
"sage_attention": (sageattn_modes, {"default": False, "tooltip": "Patch comfy attention to use sageattn."}),
"enable_fp16_accumulation": ("BOOLEAN", {"default": False, "tooltip": "Enable torch.backends.cuda.matmul.allow_fp16_accumulation, requires pytorch 2.7.0 nightly."}),
"enable_fp16_accumulation": ("BOOLEAN", {"default": False, "tooltip": "Enable torch.backends.cuda.matmul.allow_fp16_accumulation, required minimum pytorch version 2.7.1"}),
}}
RETURN_TYPES = ("MODEL", "CLIP", "VAE")
@ -285,7 +286,7 @@ class CheckpointLoaderKJ(BaseLoaderKJ):
if hasattr(torch.backends.cuda.matmul, "allow_fp16_accumulation"):
torch.backends.cuda.matmul.allow_fp16_accumulation = True
else:
raise RuntimeError("Failed to set fp16 accumulation, this requires pytorch 2.7.0 nightly currently")
raise RuntimeError("Failed to set fp16 accumulation, requires pytorch version 2.7.1 or higher")
else:
if hasattr(torch.backends.cuda.matmul, "allow_fp16_accumulation"):
torch.backends.cuda.matmul.allow_fp16_accumulation = False
@ -1910,4 +1911,107 @@ class CFGZeroStarAndInit:
m = model.clone()
m.set_model_sampler_cfg_function(cfg_zerostar)
return (m, )
return (m, )
class GGUFLoaderKJ(io.ComfyNode):
@classmethod
def __init__(cls):
try:
cls.gguf_nodes = importlib.import_module("ComfyUI-GGUF")
except ImportError:
try:
cls.gguf_nodes = importlib.import_module("comfyui-gguf")
except ImportError:
raise ImportError("This node requires ComfyUI-GGUF to be installed.")
@classmethod
def define_schema(cls):
return io.Schema(
node_id="GGUFLoaderKJ",
category="KJNodes/experimental",
is_experimental=True,
inputs=[
io.Combo.Input("model_name", options=[x for x in folder_paths.get_filename_list("unet_gguf")]),
io.Combo.Input("extra_model_name", options=[x for x in folder_paths.get_filename_list("unet_gguf")] + ["none"], default="none", tooltip="An extra gguf model to load and merge into the main model, for example VACE module"),
io.Combo.Input("dequant_dtype", options=["default", "target", "float32", "float16", "bfloat16"], default="default"),
io.Combo.Input("patch_dtype", options=["default", "target", "float32", "float16", "bfloat16"], default="default"),
io.Boolean.Input("patch_on_device", default=False),
io.Boolean.Input("enable_fp16_accumulation", default=False, tooltip="Enable torch.backends.cuda.matmul.allow_fp16_accumulation, required minimum pytorch version 2.7.1"),
io.Combo.Input("attention_override", options=["none", "sdpa", "sageattn", "xformers", "flashattn"], default="none", tooltip="Overrides the used attention implementation, requires the respective library to be installed"),
],
outputs=[io.Model.Output(),],
)
def attention_override_pytorch(func, *args, **kwargs):
new_attention = comfy.ldm.modules.attention.attention_pytorch
return new_attention.__wrapped__(*args, **kwargs)
def attention_override_sage(func, *args, **kwargs):
new_attention = comfy.ldm.modules.attention.attention_sage
return new_attention.__wrapped__(*args, **kwargs)
def attention_override_xformers(func, *args, **kwargs):
new_attention = comfy.ldm.modules.attention.attention_xformers
return new_attention.__wrapped__(*args, **kwargs)
def attention_override_flash(func, *args, **kwargs):
new_attention = comfy.ldm.modules.attention.attention_flash
return new_attention.__wrapped__(*args, **kwargs)
ATTENTION_OVERRIDES = {
"sdpa": attention_override_pytorch,
"sageattn": attention_override_sage,
"xformers": attention_override_xformers,
"flashattn": attention_override_flash,
}
@classmethod
def execute(cls, model_name, extra_model_name, dequant_dtype, patch_dtype, patch_on_device, attention_override, enable_fp16_accumulation):
if cls.gguf_nodes is None:
raise ImportError("This node requires ComfyUI-GGUF to be installed.")
ops = cls.gguf_nodes.ops.GGMLOps()
def set_linear_dtype(attr, value):
if value == "default":
setattr(ops.Linear, attr, None)
elif value == "target":
setattr(ops.Linear, attr, value)
else:
setattr(ops.Linear, attr, getattr(torch, value))
set_linear_dtype("dequant_dtype", dequant_dtype)
set_linear_dtype("patch_dtype", patch_dtype)
# init model
model_path = folder_paths.get_full_path("unet", model_name)
sd = cls.gguf_nodes.loader.gguf_sd_loader(model_path)
if extra_model_name is not None and extra_model_name != "none":
if not extra_model_name.endswith(".gguf"):
raise ValueError("Extra model must also be a .gguf file")
extra_model_full_path = folder_paths.get_full_path("unet", extra_model_name)
extra_model = cls.gguf_nodes.loader.gguf_sd_loader(extra_model_full_path)
sd.update(extra_model)
model = comfy.sd.load_diffusion_model_state_dict(
sd, model_options={"custom_operations": ops}
)
if model is None:
raise RuntimeError(f"ERROR: Could not detect model type of: {model_path}")
model = cls.gguf_nodes.nodes.GGUFModelPatcher.clone(model)
model.patch_on_device = patch_on_device
# attention override
if attention_override in cls.ATTENTION_OVERRIDES:
model.model_options["transformer_options"]["optimized_attention_override"] = cls.ATTENTION_OVERRIDES[attention_override]
if enable_fp16_accumulation:
if hasattr(torch.backends.cuda.matmul, "allow_fp16_accumulation"):
torch.backends.cuda.matmul.allow_fp16_accumulation = True
else:
raise RuntimeError("Failed to set fp16 accumulation, requires pytorch version 2.7.1 or higher")
else:
if hasattr(torch.backends.cuda.matmul, "allow_fp16_accumulation"):
torch.backends.cuda.matmul.allow_fp16_accumulation = False
return io.NodeOutput(model,)