From b6fd3ffd10cd367f80c44a1920151d65219b0f9d Mon Sep 17 00:00:00 2001 From: Chenlei Hu Date: Mon, 21 Apr 2025 14:39:45 -0400 Subject: [PATCH 01/23] Populate AUTH_TOKEN_COMFY_ORG hidden input (#7709) --- execution.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/execution.py b/execution.py index d09102f55..feb61ae82 100644 --- a/execution.py +++ b/execution.py @@ -144,6 +144,8 @@ def get_input_data(inputs, class_def, unique_id, outputs=None, dynprompt=None, e input_data_all[x] = [extra_data.get('extra_pnginfo', None)] if h[x] == "UNIQUE_ID": input_data_all[x] = [unique_id] + if h[x] == "AUTH_TOKEN_COMFY_ORG": + input_data_all[x] = [extra_data.get("auth_token_comfy_org", None)] return input_data_all, missing_keys map_node_over_list = None #Don't hook this please From ce22f687cc35b4414d792dd75812446ef56aa627 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Mon, 21 Apr 2025 11:40:29 -0700 Subject: [PATCH 02/23] Support for WAN VACE preview model. (#7711) * Support for WAN VACE preview model. * Remove print. --- comfy/ldm/wan/model.py | 144 +++++++++++++++++++++++++++++++++++++- comfy/model_base.py | 28 ++++++++ comfy/model_detection.py | 11 ++- comfy/supported_models.py | 12 +++- comfy_extras/nodes_wan.py | 106 ++++++++++++++++++++++++++++ 5 files changed, 295 insertions(+), 6 deletions(-) diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py index 2a30497c5..5e7848bd5 100644 --- a/comfy/ldm/wan/model.py +++ b/comfy/ldm/wan/model.py @@ -220,6 +220,34 @@ class WanAttentionBlock(nn.Module): return x +class VaceWanAttentionBlock(WanAttentionBlock): + def __init__( + self, + cross_attn_type, + dim, + ffn_dim, + num_heads, + window_size=(-1, -1), + qk_norm=True, + cross_attn_norm=False, + eps=1e-6, + block_id=0, + operation_settings={} + ): + super().__init__(cross_attn_type, dim, ffn_dim, num_heads, window_size, qk_norm, cross_attn_norm, eps, operation_settings=operation_settings) + self.block_id = block_id + if block_id == 0: + self.before_proj = operation_settings.get("operations").Linear(self.dim, self.dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) + self.after_proj = operation_settings.get("operations").Linear(self.dim, self.dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) + + def forward(self, c, x, **kwargs): + if self.block_id == 0: + c = self.before_proj(c) + x + c = super().forward(c, **kwargs) + c_skip = self.after_proj(c) + return c_skip, c + + class Head(nn.Module): def __init__(self, dim, out_dim, patch_size, eps=1e-6, operation_settings={}): @@ -395,6 +423,7 @@ class WanModel(torch.nn.Module): clip_fea=None, freqs=None, transformer_options={}, + **kwargs, ): r""" Forward pass through the diffusion model @@ -457,7 +486,7 @@ class WanModel(torch.nn.Module): x = self.unpatchify(x, grid_sizes) return x - def forward(self, x, timestep, context, clip_fea=None, transformer_options={},**kwargs): + def forward(self, x, timestep, context, clip_fea=None, transformer_options={}, **kwargs): bs, c, t, h, w = x.shape x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size) patch_size = self.patch_size @@ -471,7 +500,7 @@ class WanModel(torch.nn.Module): img_ids = repeat(img_ids, "t h w c -> b (t h w) c", b=bs) freqs = self.rope_embedder(img_ids).movedim(1, 2) - return self.forward_orig(x, timestep, context, clip_fea=clip_fea, freqs=freqs, transformer_options=transformer_options)[:, :, :t, :h, :w] + return self.forward_orig(x, timestep, context, clip_fea=clip_fea, freqs=freqs, transformer_options=transformer_options, **kwargs)[:, :, :t, :h, :w] def unpatchify(self, x, grid_sizes): r""" @@ -496,3 +525,114 @@ class WanModel(torch.nn.Module): u = torch.einsum('bfhwpqrc->bcfphqwr', u) u = u.reshape(b, c, *[i * j for i, j in zip(grid_sizes, self.patch_size)]) return u + + +class VaceWanModel(WanModel): + r""" + Wan diffusion backbone supporting both text-to-video and image-to-video. + """ + + def __init__(self, + model_type='vace', + patch_size=(1, 2, 2), + text_len=512, + in_dim=16, + dim=2048, + ffn_dim=8192, + freq_dim=256, + text_dim=4096, + out_dim=16, + num_heads=16, + num_layers=32, + window_size=(-1, -1), + qk_norm=True, + cross_attn_norm=True, + eps=1e-6, + flf_pos_embed_token_number=None, + image_model=None, + vace_layers=None, + vace_in_dim=None, + device=None, + dtype=None, + operations=None, + ): + + super().__init__(model_type='t2v', patch_size=patch_size, text_len=text_len, in_dim=in_dim, dim=dim, ffn_dim=ffn_dim, freq_dim=freq_dim, text_dim=text_dim, out_dim=out_dim, num_heads=num_heads, num_layers=num_layers, window_size=window_size, qk_norm=qk_norm, cross_attn_norm=cross_attn_norm, eps=eps, flf_pos_embed_token_number=flf_pos_embed_token_number, image_model=image_model, device=device, dtype=dtype, operations=operations) + operation_settings = {"operations": operations, "device": device, "dtype": dtype} + + # Vace + if vace_layers is not None: + self.vace_layers = vace_layers + self.vace_in_dim = vace_in_dim + # vace blocks + self.vace_blocks = nn.ModuleList([ + VaceWanAttentionBlock('t2v_cross_attn', self.dim, self.ffn_dim, self.num_heads, self.window_size, self.qk_norm, self.cross_attn_norm, self.eps, block_id=i, operation_settings=operation_settings) + for i in range(self.vace_layers) + ]) + + self.vace_layers_mapping = {i: n for n, i in enumerate(range(0, self.num_layers, self.num_layers // self.vace_layers))} + # vace patch embeddings + self.vace_patch_embedding = operations.Conv3d( + self.vace_in_dim, self.dim, kernel_size=self.patch_size, stride=self.patch_size, device=device, dtype=torch.float32 + ) + + def forward_orig( + self, + x, + t, + context, + vace_context, + clip_fea=None, + freqs=None, + transformer_options={}, + **kwargs, + ): + # embeddings + x = self.patch_embedding(x.float()).to(x.dtype) + grid_sizes = x.shape[2:] + x = x.flatten(2).transpose(1, 2) + + # time embeddings + e = self.time_embedding( + sinusoidal_embedding_1d(self.freq_dim, t).to(dtype=x[0].dtype)) + e0 = self.time_projection(e).unflatten(1, (6, self.dim)) + + # context + context = self.text_embedding(context) + + context_img_len = None + if clip_fea is not None: + if self.img_emb is not None: + context_clip = self.img_emb(clip_fea) # bs x 257 x dim + context = torch.concat([context_clip, context], dim=1) + context_img_len = clip_fea.shape[-2] + + c = self.vace_patch_embedding(vace_context.float()).to(vace_context.dtype) + c = c.flatten(2).transpose(1, 2) + + # arguments + x_orig = x + + patches_replace = transformer_options.get("patches_replace", {}) + blocks_replace = patches_replace.get("dit", {}) + for i, block in enumerate(self.blocks): + if ("double_block", i) in blocks_replace: + def block_wrap(args): + out = {} + out["img"] = block(args["img"], context=args["txt"], e=args["vec"], freqs=args["pe"], context_img_len=context_img_len) + return out + out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": e0, "pe": freqs}, {"original_block": block_wrap}) + x = out["img"] + else: + x = block(x, e=e0, freqs=freqs, context=context, context_img_len=context_img_len) + + ii = self.vace_layers_mapping.get(i, None) + if ii is not None: + c_skip, c = self.vace_blocks[ii](c, x=x_orig, e=e0, freqs=freqs, context=context, context_img_len=context_img_len) + x += c_skip + # head + x = self.head(x, e) + + # unpatchify + x = self.unpatchify(x, grid_sizes) + return x diff --git a/comfy/model_base.py b/comfy/model_base.py index 8dab1740b..04a101526 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -1043,6 +1043,34 @@ class WAN21(BaseModel): out['clip_fea'] = comfy.conds.CONDRegular(clip_vision_output.penultimate_hidden_states) return out + +class WAN21_Vace(WAN21): + def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None): + super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.VaceWanModel) + self.image_to_video = image_to_video + + def extra_conds(self, **kwargs): + out = super().extra_conds(**kwargs) + noise = kwargs.get("noise", None) + noise_shape = list(noise.shape) + vace_frames = kwargs.get("vace_frames", None) + if vace_frames is None: + noise_shape[1] = 32 + vace_frames = torch.zeros(noise_shape, device=noise.device, dtype=noise.dtype) + + for i in range(0, vace_frames.shape[1], 16): + vace_frames = vace_frames.clone() + vace_frames[:, i:i + 16] = self.process_latent_in(vace_frames[:, i:i + 16]) + + mask = kwargs.get("vace_mask", None) + if mask is None: + noise_shape[1] = 64 + mask = torch.ones(noise_shape, device=noise.device, dtype=noise.dtype) + + out['vace_context'] = comfy.conds.CONDRegular(torch.cat([vace_frames.to(noise), mask.to(noise)], dim=1)) + return out + + class Hunyuan3Dv2(BaseModel): def __init__(self, model_config, model_type=ModelType.FLOW, device=None): super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hunyuan3d.model.Hunyuan3Dv2) diff --git a/comfy/model_detection.py b/comfy/model_detection.py index 6499bf238..76de78a8a 100644 --- a/comfy/model_detection.py +++ b/comfy/model_detection.py @@ -317,10 +317,15 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): dit_config["cross_attn_norm"] = True dit_config["eps"] = 1e-6 dit_config["in_dim"] = state_dict['{}patch_embedding.weight'.format(key_prefix)].shape[1] - if '{}img_emb.proj.0.bias'.format(key_prefix) in state_dict_keys: - dit_config["model_type"] = "i2v" + if '{}vace_patch_embedding.weight'.format(key_prefix) in state_dict_keys: + dit_config["model_type"] = "vace" + dit_config["vace_in_dim"] = state_dict['{}vace_patch_embedding.weight'.format(key_prefix)].shape[1] + dit_config["vace_layers"] = count_blocks(state_dict_keys, '{}vace_blocks.'.format(key_prefix) + '{}.') else: - dit_config["model_type"] = "t2v" + if '{}img_emb.proj.0.bias'.format(key_prefix) in state_dict_keys: + dit_config["model_type"] = "i2v" + else: + dit_config["model_type"] = "t2v" flf_weight = state_dict.get('{}img_emb.emb_pos'.format(key_prefix)) if flf_weight is not None: dit_config["flf_pos_embed_token_number"] = flf_weight.shape[1] diff --git a/comfy/supported_models.py b/comfy/supported_models.py index 81c47ac68..5e55035cf 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -987,6 +987,16 @@ class WAN21_FunControl2V(WAN21_T2V): out = model_base.WAN21(self, image_to_video=False, device=device) return out +class WAN21_Vace(WAN21_T2V): + unet_config = { + "image_model": "wan2.1", + "model_type": "vace", + } + + def get_model(self, state_dict, prefix="", device=None): + out = model_base.WAN21_Vace(self, image_to_video=False, device=device) + return out + class Hunyuan3Dv2(supported_models_base.BASE): unet_config = { "image_model": "hunyuan3d2", @@ -1055,6 +1065,6 @@ class HiDream(supported_models_base.BASE): return None # TODO -models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, Hunyuan3Dv2mini, Hunyuan3Dv2, HiDream] +models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, Hunyuan3Dv2mini, Hunyuan3Dv2, HiDream] models += [SVD_img2vid] diff --git a/comfy_extras/nodes_wan.py b/comfy_extras/nodes_wan.py index 8ad358ce8..19a6cdfa4 100644 --- a/comfy_extras/nodes_wan.py +++ b/comfy_extras/nodes_wan.py @@ -193,9 +193,115 @@ class WanFunInpaintToVideo: return flfv.encode(positive, negative, vae, width, height, length, batch_size, start_image=start_image, end_image=end_image, clip_vision_start_image=clip_vision_output) +class WanVaceToVideo: + @classmethod + def INPUT_TYPES(s): + return {"required": {"positive": ("CONDITIONING", ), + "negative": ("CONDITIONING", ), + "vae": ("VAE", ), + "width": ("INT", {"default": 832, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}), + "height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}), + "length": ("INT", {"default": 81, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}), + "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), + }, + "optional": {"control_video": ("IMAGE", ), + "control_masks": ("MASK", ), + "reference_image": ("IMAGE", ), + }} + + RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT", "INT") + RETURN_NAMES = ("positive", "negative", "latent", "trim_latent") + FUNCTION = "encode" + + CATEGORY = "conditioning/video_models" + + EXPERIMENTAL = True + + def encode(self, positive, negative, vae, width, height, length, batch_size, control_video=None, control_masks=None, reference_image=None): + latent_length = ((length - 1) // 4) + 1 + if control_video is not None: + control_video = comfy.utils.common_upscale(control_video[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1) + if control_video.shape[0] < length: + control_video = torch.nn.functional.pad(control_video, (0, 0, 0, 0, 0, 0, 0, length - control_video.shape[0]), value=0.5) + else: + control_video = torch.ones((length, height, width, 3)) * 0.5 + + if reference_image is not None: + reference_image = comfy.utils.common_upscale(reference_image[:1].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1) + reference_image = vae.encode(reference_image[:, :, :, :3]) + reference_image = torch.cat([reference_image, comfy.latent_formats.Wan21().process_out(torch.zeros_like(reference_image))], dim=1) + + if control_masks is None: + mask = torch.ones((length, height, width, 1)) + else: + mask = control_masks + if mask.ndim == 3: + mask = mask.unsqueeze(1) + mask = comfy.utils.common_upscale(mask[:length], width, height, "bilinear", "center").movedim(1, -1) + if mask.shape[0] < length: + mask = torch.nn.functional.pad(mask, (0, 0, 0, 0, 0, 0, 0, length - mask.shape[0]), value=1.0) + + control_video = control_video - 0.5 + inactive = (control_video * (1 - mask)) + 0.5 + reactive = (control_video * mask) + 0.5 + + inactive = vae.encode(inactive[:, :, :, :3]) + reactive = vae.encode(reactive[:, :, :, :3]) + control_video_latent = torch.cat((inactive, reactive), dim=1) + if reference_image is not None: + control_video_latent = torch.cat((reference_image, control_video_latent), dim=2) + + vae_stride = 8 + height_mask = height // vae_stride + width_mask = width // vae_stride + mask = mask.view(length, height_mask, vae_stride, width_mask, vae_stride) + mask = mask.permute(2, 4, 0, 1, 3) + mask = mask.reshape(vae_stride * vae_stride, length, height_mask, width_mask) + mask = torch.nn.functional.interpolate(mask.unsqueeze(0), size=(latent_length, height_mask, width_mask), mode='nearest-exact').squeeze(0) + + trim_latent = 0 + if reference_image is not None: + mask_pad = torch.zeros_like(mask[:, :reference_image.shape[2], :, :]) + mask = torch.cat((mask_pad, mask), dim=1) + latent_length += reference_image.shape[2] + trim_latent = reference_image.shape[2] + + mask = mask.unsqueeze(0) + positive = node_helpers.conditioning_set_values(positive, {"vace_frames": control_video_latent, "vace_mask": mask}) + negative = node_helpers.conditioning_set_values(negative, {"vace_frames": control_video_latent, "vace_mask": mask}) + + latent = torch.zeros([batch_size, 16, latent_length, height // 8, width // 8], device=comfy.model_management.intermediate_device()) + out_latent = {} + out_latent["samples"] = latent + return (positive, negative, out_latent, trim_latent) + +class TrimVideoLatent: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples": ("LATENT",), + "trim_amount": ("INT", {"default": 0, "min": 0, "max": 99999}), + }} + + RETURN_TYPES = ("LATENT",) + FUNCTION = "op" + + CATEGORY = "latent/video" + + EXPERIMENTAL = True + + def op(self, samples, trim_amount): + samples_out = samples.copy() + + s1 = samples["samples"] + samples_out["samples"] = s1[:, :, trim_amount:] + return (samples_out,) + + NODE_CLASS_MAPPINGS = { "WanImageToVideo": WanImageToVideo, "WanFunControlToVideo": WanFunControlToVideo, "WanFunInpaintToVideo": WanFunInpaintToVideo, "WanFirstLastFrameToVideo": WanFirstLastFrameToVideo, + "WanVaceToVideo": WanVaceToVideo, + "TrimVideoLatent": TrimVideoLatent, } From 5d51794607d71e1bbffd7d9d5a1eed417de771ae Mon Sep 17 00:00:00 2001 From: filtered <176114999+webfiltered@users.noreply.github.com> Date: Tue, 22 Apr 2025 06:13:00 +1000 Subject: [PATCH 03/23] Add node type hint for socketless option (#7714) * Add node type hint for socketless option * nit - Doc --- comfy/comfy_types/node_typing.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/comfy/comfy_types/node_typing.py b/comfy/comfy_types/node_typing.py index 42ed5174e..a348791a9 100644 --- a/comfy/comfy_types/node_typing.py +++ b/comfy/comfy_types/node_typing.py @@ -115,6 +115,11 @@ class InputTypeOptions(TypedDict): """When a link exists, rather than receiving the evaluated value, you will receive the link (i.e. `["nodeId", ]`). Designed for node expansion.""" tooltip: NotRequired[str] """Tooltip for the input (or widget), shown on pointer hover""" + socketless: NotRequired[bool] + """All inputs (including widgets) have an input socket to connect links. When ``true``, if there is a widget for this input, no socket will be created. + Available from frontend v1.17.5 + Ref: https://github.com/Comfy-Org/ComfyUI_frontend/pull/3548 + """ # class InputTypeNumber(InputTypeOptions): # default: float | int min: NotRequired[float] From 9d57b8afd8c9f14776b1464919472ae17de2b03e Mon Sep 17 00:00:00 2001 From: "Alexander G. Morano" Date: Mon, 21 Apr 2025 18:51:31 -0400 Subject: [PATCH 04/23] Update nodes_primitive.py (#7716) Allow FLOAT and INT types to support negative numbers. Caps the numbers at the user's own system min and max. --- comfy_extras/nodes_primitive.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/comfy_extras/nodes_primitive.py b/comfy_extras/nodes_primitive.py index b770104fb..184b990c3 100644 --- a/comfy_extras/nodes_primitive.py +++ b/comfy_extras/nodes_primitive.py @@ -1,6 +1,8 @@ # Primitive nodes that are evaluated at backend. from __future__ import annotations +import sys + from comfy.comfy_types.node_typing import ComfyNodeABC, InputTypeDict, IO @@ -23,7 +25,7 @@ class Int(ComfyNodeABC): @classmethod def INPUT_TYPES(cls) -> InputTypeDict: return { - "required": {"value": (IO.INT, {"control_after_generate": True})}, + "required": {"value": (IO.INT, {"min": -sys.maxsize, "max": sys.maxsize, "control_after_generate": True})}, } RETURN_TYPES = (IO.INT,) @@ -38,7 +40,7 @@ class Float(ComfyNodeABC): @classmethod def INPUT_TYPES(cls) -> InputTypeDict: return { - "required": {"value": (IO.FLOAT, {})}, + "required": {"value": (IO.FLOAT, {"min": -sys.maxsize, "max": sys.maxsize})}, } RETURN_TYPES = (IO.FLOAT,) From 5d0d4ee98a24b6c72c94635fc5a6e93af2b005bc Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Mon, 21 Apr 2025 16:36:20 -0700 Subject: [PATCH 05/23] Add strength control for vace. (#7717) --- comfy/ldm/wan/model.py | 3 ++- comfy/model_base.py | 3 +++ comfy_extras/nodes_wan.py | 7 ++++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py index 5e7848bd5..4ef86d5f2 100644 --- a/comfy/ldm/wan/model.py +++ b/comfy/ldm/wan/model.py @@ -582,6 +582,7 @@ class VaceWanModel(WanModel): t, context, vace_context, + vace_strength=1.0, clip_fea=None, freqs=None, transformer_options={}, @@ -629,7 +630,7 @@ class VaceWanModel(WanModel): ii = self.vace_layers_mapping.get(i, None) if ii is not None: c_skip, c = self.vace_blocks[ii](c, x=x_orig, e=e0, freqs=freqs, context=context, context_img_len=context_img_len) - x += c_skip + x += c_skip * vace_strength # head x = self.head(x, e) diff --git a/comfy/model_base.py b/comfy/model_base.py index 04a101526..b0c6a465b 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -1068,6 +1068,9 @@ class WAN21_Vace(WAN21): mask = torch.ones(noise_shape, device=noise.device, dtype=noise.dtype) out['vace_context'] = comfy.conds.CONDRegular(torch.cat([vace_frames.to(noise), mask.to(noise)], dim=1)) + + vace_strength = kwargs.get("vace_strength", 1.0) + out['vace_strength'] = comfy.conds.CONDConstant(vace_strength) return out diff --git a/comfy_extras/nodes_wan.py b/comfy_extras/nodes_wan.py index 19a6cdfa4..9dda64597 100644 --- a/comfy_extras/nodes_wan.py +++ b/comfy_extras/nodes_wan.py @@ -203,6 +203,7 @@ class WanVaceToVideo: "height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}), "length": ("INT", {"default": 81, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}), "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), + "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1000.0, "step": 0.01}), }, "optional": {"control_video": ("IMAGE", ), "control_masks": ("MASK", ), @@ -217,7 +218,7 @@ class WanVaceToVideo: EXPERIMENTAL = True - def encode(self, positive, negative, vae, width, height, length, batch_size, control_video=None, control_masks=None, reference_image=None): + def encode(self, positive, negative, vae, width, height, length, batch_size, strength, control_video=None, control_masks=None, reference_image=None): latent_length = ((length - 1) // 4) + 1 if control_video is not None: control_video = comfy.utils.common_upscale(control_video[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1) @@ -267,8 +268,8 @@ class WanVaceToVideo: trim_latent = reference_image.shape[2] mask = mask.unsqueeze(0) - positive = node_helpers.conditioning_set_values(positive, {"vace_frames": control_video_latent, "vace_mask": mask}) - negative = node_helpers.conditioning_set_values(negative, {"vace_frames": control_video_latent, "vace_mask": mask}) + positive = node_helpers.conditioning_set_values(positive, {"vace_frames": control_video_latent, "vace_mask": mask, "vace_strength": strength}) + negative = node_helpers.conditioning_set_values(negative, {"vace_frames": control_video_latent, "vace_mask": mask, "vace_strength": strength}) latent = torch.zeros([batch_size, 16, latent_length, height // 8, width // 8], device=comfy.model_management.intermediate_device()) out_latent = {} From 1f3fba2af518073551a73582c8dce7bae4ad7716 Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Tue, 22 Apr 2025 08:15:32 +0800 Subject: [PATCH 06/23] Unified Weight Adapter system for better maintainability and future feature of Lora system (#7540) --- comfy/lora.py | 321 ++----------------------------- comfy/weight_adapter/__init__.py | 13 ++ comfy/weight_adapter/base.py | 94 +++++++++ comfy/weight_adapter/glora.py | 93 +++++++++ comfy/weight_adapter/loha.py | 100 ++++++++++ comfy/weight_adapter/lokr.py | 133 +++++++++++++ comfy/weight_adapter/lora.py | 142 ++++++++++++++ 7 files changed, 592 insertions(+), 304 deletions(-) create mode 100644 comfy/weight_adapter/__init__.py create mode 100644 comfy/weight_adapter/base.py create mode 100644 comfy/weight_adapter/glora.py create mode 100644 comfy/weight_adapter/loha.py create mode 100644 comfy/weight_adapter/lokr.py create mode 100644 comfy/weight_adapter/lora.py diff --git a/comfy/lora.py b/comfy/lora.py index bc9f3022a..8760a21fb 100644 --- a/comfy/lora.py +++ b/comfy/lora.py @@ -20,6 +20,7 @@ from __future__ import annotations import comfy.utils import comfy.model_management import comfy.model_base +import comfy.weight_adapter as weight_adapter import logging import torch @@ -49,139 +50,12 @@ def load_lora(lora, to_load, log_missing=True): dora_scale = lora[dora_scale_name] loaded_keys.add(dora_scale_name) - reshape_name = "{}.reshape_weight".format(x) - reshape = None - if reshape_name in lora.keys(): - try: - reshape = lora[reshape_name].tolist() - loaded_keys.add(reshape_name) - except: - pass - - regular_lora = "{}.lora_up.weight".format(x) - diffusers_lora = "{}_lora.up.weight".format(x) - diffusers2_lora = "{}.lora_B.weight".format(x) - diffusers3_lora = "{}.lora.up.weight".format(x) - mochi_lora = "{}.lora_B".format(x) - transformers_lora = "{}.lora_linear_layer.up.weight".format(x) - A_name = None - - if regular_lora in lora.keys(): - A_name = regular_lora - B_name = "{}.lora_down.weight".format(x) - mid_name = "{}.lora_mid.weight".format(x) - elif diffusers_lora in lora.keys(): - A_name = diffusers_lora - B_name = "{}_lora.down.weight".format(x) - mid_name = None - elif diffusers2_lora in lora.keys(): - A_name = diffusers2_lora - B_name = "{}.lora_A.weight".format(x) - mid_name = None - elif diffusers3_lora in lora.keys(): - A_name = diffusers3_lora - B_name = "{}.lora.down.weight".format(x) - mid_name = None - elif mochi_lora in lora.keys(): - A_name = mochi_lora - B_name = "{}.lora_A".format(x) - mid_name = None - elif transformers_lora in lora.keys(): - A_name = transformers_lora - B_name ="{}.lora_linear_layer.down.weight".format(x) - mid_name = None - - if A_name is not None: - mid = None - if mid_name is not None and mid_name in lora.keys(): - mid = lora[mid_name] - loaded_keys.add(mid_name) - patch_dict[to_load[x]] = ("lora", (lora[A_name], lora[B_name], alpha, mid, dora_scale, reshape)) - loaded_keys.add(A_name) - loaded_keys.add(B_name) - - - ######## loha - hada_w1_a_name = "{}.hada_w1_a".format(x) - hada_w1_b_name = "{}.hada_w1_b".format(x) - hada_w2_a_name = "{}.hada_w2_a".format(x) - hada_w2_b_name = "{}.hada_w2_b".format(x) - hada_t1_name = "{}.hada_t1".format(x) - hada_t2_name = "{}.hada_t2".format(x) - if hada_w1_a_name in lora.keys(): - hada_t1 = None - hada_t2 = None - if hada_t1_name in lora.keys(): - hada_t1 = lora[hada_t1_name] - hada_t2 = lora[hada_t2_name] - loaded_keys.add(hada_t1_name) - loaded_keys.add(hada_t2_name) - - patch_dict[to_load[x]] = ("loha", (lora[hada_w1_a_name], lora[hada_w1_b_name], alpha, lora[hada_w2_a_name], lora[hada_w2_b_name], hada_t1, hada_t2, dora_scale)) - loaded_keys.add(hada_w1_a_name) - loaded_keys.add(hada_w1_b_name) - loaded_keys.add(hada_w2_a_name) - loaded_keys.add(hada_w2_b_name) - - - ######## lokr - lokr_w1_name = "{}.lokr_w1".format(x) - lokr_w2_name = "{}.lokr_w2".format(x) - lokr_w1_a_name = "{}.lokr_w1_a".format(x) - lokr_w1_b_name = "{}.lokr_w1_b".format(x) - lokr_t2_name = "{}.lokr_t2".format(x) - lokr_w2_a_name = "{}.lokr_w2_a".format(x) - lokr_w2_b_name = "{}.lokr_w2_b".format(x) - - lokr_w1 = None - if lokr_w1_name in lora.keys(): - lokr_w1 = lora[lokr_w1_name] - loaded_keys.add(lokr_w1_name) - - lokr_w2 = None - if lokr_w2_name in lora.keys(): - lokr_w2 = lora[lokr_w2_name] - loaded_keys.add(lokr_w2_name) - - lokr_w1_a = None - if lokr_w1_a_name in lora.keys(): - lokr_w1_a = lora[lokr_w1_a_name] - loaded_keys.add(lokr_w1_a_name) - - lokr_w1_b = None - if lokr_w1_b_name in lora.keys(): - lokr_w1_b = lora[lokr_w1_b_name] - loaded_keys.add(lokr_w1_b_name) - - lokr_w2_a = None - if lokr_w2_a_name in lora.keys(): - lokr_w2_a = lora[lokr_w2_a_name] - loaded_keys.add(lokr_w2_a_name) - - lokr_w2_b = None - if lokr_w2_b_name in lora.keys(): - lokr_w2_b = lora[lokr_w2_b_name] - loaded_keys.add(lokr_w2_b_name) - - lokr_t2 = None - if lokr_t2_name in lora.keys(): - lokr_t2 = lora[lokr_t2_name] - loaded_keys.add(lokr_t2_name) - - if (lokr_w1 is not None) or (lokr_w2 is not None) or (lokr_w1_a is not None) or (lokr_w2_a is not None): - patch_dict[to_load[x]] = ("lokr", (lokr_w1, lokr_w2, alpha, lokr_w1_a, lokr_w1_b, lokr_w2_a, lokr_w2_b, lokr_t2, dora_scale)) - - #glora - a1_name = "{}.a1.weight".format(x) - a2_name = "{}.a2.weight".format(x) - b1_name = "{}.b1.weight".format(x) - b2_name = "{}.b2.weight".format(x) - if a1_name in lora: - patch_dict[to_load[x]] = ("glora", (lora[a1_name], lora[a2_name], lora[b1_name], lora[b2_name], alpha, dora_scale)) - loaded_keys.add(a1_name) - loaded_keys.add(a2_name) - loaded_keys.add(b1_name) - loaded_keys.add(b2_name) + for adapter_cls in weight_adapter.adapters: + adapter = adapter_cls.load(x, lora, alpha, dora_scale, loaded_keys) + if adapter is not None: + patch_dict[to_load[x]] = adapter + loaded_keys.update(adapter.loaded_keys) + continue w_norm_name = "{}.w_norm".format(x) b_norm_name = "{}.b_norm".format(x) @@ -408,26 +282,6 @@ def model_lora_keys_unet(model, key_map={}): return key_map -def weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function): - dora_scale = comfy.model_management.cast_to_device(dora_scale, weight.device, intermediate_dtype) - lora_diff *= alpha - weight_calc = weight + function(lora_diff).type(weight.dtype) - weight_norm = ( - weight_calc.transpose(0, 1) - .reshape(weight_calc.shape[1], -1) - .norm(dim=1, keepdim=True) - .reshape(weight_calc.shape[1], *[1] * (weight_calc.dim() - 1)) - .transpose(0, 1) - ) - - weight_calc *= (dora_scale / weight_norm).type(weight.dtype) - if strength != 1.0: - weight_calc -= weight - weight += strength * (weight_calc) - else: - weight[:] = weight_calc - return weight - def pad_tensor_to_shape(tensor: torch.Tensor, new_shape: list[int]) -> torch.Tensor: """ Pad a tensor to a new shape with zeros. @@ -482,6 +336,16 @@ def calculate_weight(patches, weight, key, intermediate_dtype=torch.float32, ori if isinstance(v, list): v = (calculate_weight(v[1:], v[0][1](comfy.model_management.cast_to_device(v[0][0], weight.device, intermediate_dtype, copy=True), inplace=True), key, intermediate_dtype=intermediate_dtype), ) + if isinstance(v, weight_adapter.WeightAdapterBase): + output = v.calculate_weight(weight, key, strength, strength_model, offset, function, intermediate_dtype, original_weights) + if output is None: + logging.warning("Calculate Weight Failed: {} {}".format(v.name, key)) + else: + weight = output + if old_weight is not None: + weight = old_weight + continue + if len(v) == 1: patch_type = "diff" elif len(v) == 2: @@ -508,157 +372,6 @@ def calculate_weight(patches, weight, key, intermediate_dtype=torch.float32, ori diff_weight = comfy.model_management.cast_to_device(target_weight, weight.device, intermediate_dtype) - \ comfy.model_management.cast_to_device(original_weights[key][0][0], weight.device, intermediate_dtype) weight += function(strength * comfy.model_management.cast_to_device(diff_weight, weight.device, weight.dtype)) - elif patch_type == "lora": #lora/locon - mat1 = comfy.model_management.cast_to_device(v[0], weight.device, intermediate_dtype) - mat2 = comfy.model_management.cast_to_device(v[1], weight.device, intermediate_dtype) - dora_scale = v[4] - reshape = v[5] - - if reshape is not None: - weight = pad_tensor_to_shape(weight, reshape) - - if v[2] is not None: - alpha = v[2] / mat2.shape[0] - else: - alpha = 1.0 - - if v[3] is not None: - #locon mid weights, hopefully the math is fine because I didn't properly test it - mat3 = comfy.model_management.cast_to_device(v[3], weight.device, intermediate_dtype) - final_shape = [mat2.shape[1], mat2.shape[0], mat3.shape[2], mat3.shape[3]] - mat2 = torch.mm(mat2.transpose(0, 1).flatten(start_dim=1), mat3.transpose(0, 1).flatten(start_dim=1)).reshape(final_shape).transpose(0, 1) - try: - lora_diff = torch.mm(mat1.flatten(start_dim=1), mat2.flatten(start_dim=1)).reshape(weight.shape) - if dora_scale is not None: - weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function) - else: - weight += function(((strength * alpha) * lora_diff).type(weight.dtype)) - except Exception as e: - logging.error("ERROR {} {} {}".format(patch_type, key, e)) - elif patch_type == "lokr": - w1 = v[0] - w2 = v[1] - w1_a = v[3] - w1_b = v[4] - w2_a = v[5] - w2_b = v[6] - t2 = v[7] - dora_scale = v[8] - dim = None - - if w1 is None: - dim = w1_b.shape[0] - w1 = torch.mm(comfy.model_management.cast_to_device(w1_a, weight.device, intermediate_dtype), - comfy.model_management.cast_to_device(w1_b, weight.device, intermediate_dtype)) - else: - w1 = comfy.model_management.cast_to_device(w1, weight.device, intermediate_dtype) - - if w2 is None: - dim = w2_b.shape[0] - if t2 is None: - w2 = torch.mm(comfy.model_management.cast_to_device(w2_a, weight.device, intermediate_dtype), - comfy.model_management.cast_to_device(w2_b, weight.device, intermediate_dtype)) - else: - w2 = torch.einsum('i j k l, j r, i p -> p r k l', - comfy.model_management.cast_to_device(t2, weight.device, intermediate_dtype), - comfy.model_management.cast_to_device(w2_b, weight.device, intermediate_dtype), - comfy.model_management.cast_to_device(w2_a, weight.device, intermediate_dtype)) - else: - w2 = comfy.model_management.cast_to_device(w2, weight.device, intermediate_dtype) - - if len(w2.shape) == 4: - w1 = w1.unsqueeze(2).unsqueeze(2) - if v[2] is not None and dim is not None: - alpha = v[2] / dim - else: - alpha = 1.0 - - try: - lora_diff = torch.kron(w1, w2).reshape(weight.shape) - if dora_scale is not None: - weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function) - else: - weight += function(((strength * alpha) * lora_diff).type(weight.dtype)) - except Exception as e: - logging.error("ERROR {} {} {}".format(patch_type, key, e)) - elif patch_type == "loha": - w1a = v[0] - w1b = v[1] - if v[2] is not None: - alpha = v[2] / w1b.shape[0] - else: - alpha = 1.0 - - w2a = v[3] - w2b = v[4] - dora_scale = v[7] - if v[5] is not None: #cp decomposition - t1 = v[5] - t2 = v[6] - m1 = torch.einsum('i j k l, j r, i p -> p r k l', - comfy.model_management.cast_to_device(t1, weight.device, intermediate_dtype), - comfy.model_management.cast_to_device(w1b, weight.device, intermediate_dtype), - comfy.model_management.cast_to_device(w1a, weight.device, intermediate_dtype)) - - m2 = torch.einsum('i j k l, j r, i p -> p r k l', - comfy.model_management.cast_to_device(t2, weight.device, intermediate_dtype), - comfy.model_management.cast_to_device(w2b, weight.device, intermediate_dtype), - comfy.model_management.cast_to_device(w2a, weight.device, intermediate_dtype)) - else: - m1 = torch.mm(comfy.model_management.cast_to_device(w1a, weight.device, intermediate_dtype), - comfy.model_management.cast_to_device(w1b, weight.device, intermediate_dtype)) - m2 = torch.mm(comfy.model_management.cast_to_device(w2a, weight.device, intermediate_dtype), - comfy.model_management.cast_to_device(w2b, weight.device, intermediate_dtype)) - - try: - lora_diff = (m1 * m2).reshape(weight.shape) - if dora_scale is not None: - weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function) - else: - weight += function(((strength * alpha) * lora_diff).type(weight.dtype)) - except Exception as e: - logging.error("ERROR {} {} {}".format(patch_type, key, e)) - elif patch_type == "glora": - dora_scale = v[5] - - old_glora = False - if v[3].shape[1] == v[2].shape[0] == v[0].shape[0] == v[1].shape[1]: - rank = v[0].shape[0] - old_glora = True - - if v[3].shape[0] == v[2].shape[1] == v[0].shape[1] == v[1].shape[0]: - if old_glora and v[1].shape[0] == weight.shape[0] and weight.shape[0] == weight.shape[1]: - pass - else: - old_glora = False - rank = v[1].shape[0] - - a1 = comfy.model_management.cast_to_device(v[0].flatten(start_dim=1), weight.device, intermediate_dtype) - a2 = comfy.model_management.cast_to_device(v[1].flatten(start_dim=1), weight.device, intermediate_dtype) - b1 = comfy.model_management.cast_to_device(v[2].flatten(start_dim=1), weight.device, intermediate_dtype) - b2 = comfy.model_management.cast_to_device(v[3].flatten(start_dim=1), weight.device, intermediate_dtype) - - if v[4] is not None: - alpha = v[4] / rank - else: - alpha = 1.0 - - try: - if old_glora: - lora_diff = (torch.mm(b2, b1) + torch.mm(torch.mm(weight.flatten(start_dim=1).to(dtype=intermediate_dtype), a2), a1)).reshape(weight.shape) #old lycoris glora - else: - if weight.dim() > 2: - lora_diff = torch.einsum("o i ..., i j -> o j ...", torch.einsum("o i ..., i j -> o j ...", weight.to(dtype=intermediate_dtype), a1), a2).reshape(weight.shape) - else: - lora_diff = torch.mm(torch.mm(weight.to(dtype=intermediate_dtype), a1), a2).reshape(weight.shape) - lora_diff += torch.mm(b1, b2).reshape(weight.shape) - - if dora_scale is not None: - weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function) - else: - weight += function(((strength * alpha) * lora_diff).type(weight.dtype)) - except Exception as e: - logging.error("ERROR {} {} {}".format(patch_type, key, e)) else: logging.warning("patch type not recognized {} {}".format(patch_type, key)) diff --git a/comfy/weight_adapter/__init__.py b/comfy/weight_adapter/__init__.py new file mode 100644 index 000000000..e6cd805b6 --- /dev/null +++ b/comfy/weight_adapter/__init__.py @@ -0,0 +1,13 @@ +from .base import WeightAdapterBase +from .lora import LoRAAdapter +from .loha import LoHaAdapter +from .lokr import LoKrAdapter +from .glora import GLoRAAdapter + + +adapters: list[type[WeightAdapterBase]] = [ + LoRAAdapter, + LoHaAdapter, + LoKrAdapter, + GLoRAAdapter, +] diff --git a/comfy/weight_adapter/base.py b/comfy/weight_adapter/base.py new file mode 100644 index 000000000..54af3babe --- /dev/null +++ b/comfy/weight_adapter/base.py @@ -0,0 +1,94 @@ +from typing import Optional + +import torch +import torch.nn as nn + +import comfy.model_management + + +class WeightAdapterBase: + name: str + loaded_keys: set[str] + weights: list[torch.Tensor] + + @classmethod + def load(cls, x: str, lora: dict[str, torch.Tensor]) -> Optional["WeightAdapterBase"]: + raise NotImplementedError + + def to_train(self) -> "WeightAdapterTrainBase": + raise NotImplementedError + + def calculate_weight( + self, + weight, + key, + strength, + strength_model, + offset, + function, + intermediate_dtype=torch.float32, + original_weight=None, + ): + raise NotImplementedError + + +class WeightAdapterTrainBase(nn.Module): + def __init__(self): + super().__init__() + + # [TODO] Collaborate with LoRA training PR #7032 + + +def weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function): + dora_scale = comfy.model_management.cast_to_device(dora_scale, weight.device, intermediate_dtype) + lora_diff *= alpha + weight_calc = weight + function(lora_diff).type(weight.dtype) + weight_norm = ( + weight_calc.transpose(0, 1) + .reshape(weight_calc.shape[1], -1) + .norm(dim=1, keepdim=True) + .reshape(weight_calc.shape[1], *[1] * (weight_calc.dim() - 1)) + .transpose(0, 1) + ) + + weight_calc *= (dora_scale / weight_norm).type(weight.dtype) + if strength != 1.0: + weight_calc -= weight + weight += strength * (weight_calc) + else: + weight[:] = weight_calc + return weight + + +def pad_tensor_to_shape(tensor: torch.Tensor, new_shape: list[int]) -> torch.Tensor: + """ + Pad a tensor to a new shape with zeros. + + Args: + tensor (torch.Tensor): The original tensor to be padded. + new_shape (List[int]): The desired shape of the padded tensor. + + Returns: + torch.Tensor: A new tensor padded with zeros to the specified shape. + + Note: + If the new shape is smaller than the original tensor in any dimension, + the original tensor will be truncated in that dimension. + """ + if any([new_shape[i] < tensor.shape[i] for i in range(len(new_shape))]): + raise ValueError("The new shape must be larger than the original tensor in all dimensions") + + if len(new_shape) != len(tensor.shape): + raise ValueError("The new shape must have the same number of dimensions as the original tensor") + + # Create a new tensor filled with zeros + padded_tensor = torch.zeros(new_shape, dtype=tensor.dtype, device=tensor.device) + + # Create slicing tuples for both tensors + orig_slices = tuple(slice(0, dim) for dim in tensor.shape) + new_slices = tuple(slice(0, dim) for dim in tensor.shape) + + # Copy the original tensor into the new tensor + padded_tensor[new_slices] = tensor[orig_slices] + + return padded_tensor diff --git a/comfy/weight_adapter/glora.py b/comfy/weight_adapter/glora.py new file mode 100644 index 000000000..939abbba5 --- /dev/null +++ b/comfy/weight_adapter/glora.py @@ -0,0 +1,93 @@ +import logging +from typing import Optional + +import torch +import comfy.model_management +from .base import WeightAdapterBase, weight_decompose + + +class GLoRAAdapter(WeightAdapterBase): + name = "glora" + + def __init__(self, loaded_keys, weights): + self.loaded_keys = loaded_keys + self.weights = weights + + @classmethod + def load( + cls, + x: str, + lora: dict[str, torch.Tensor], + alpha: float, + dora_scale: torch.Tensor, + loaded_keys: set[str] = None, + ) -> Optional["GLoRAAdapter"]: + if loaded_keys is None: + loaded_keys = set() + a1_name = "{}.a1.weight".format(x) + a2_name = "{}.a2.weight".format(x) + b1_name = "{}.b1.weight".format(x) + b2_name = "{}.b2.weight".format(x) + if a1_name in lora: + weights = (lora[a1_name], lora[a2_name], lora[b1_name], lora[b2_name], alpha, dora_scale) + loaded_keys.add(a1_name) + loaded_keys.add(a2_name) + loaded_keys.add(b1_name) + loaded_keys.add(b2_name) + return cls(loaded_keys, weights) + else: + return None + + def calculate_weight( + self, + weight, + key, + strength, + strength_model, + offset, + function, + intermediate_dtype=torch.float32, + original_weight=None, + ): + v = self.weights + dora_scale = v[5] + + old_glora = False + if v[3].shape[1] == v[2].shape[0] == v[0].shape[0] == v[1].shape[1]: + rank = v[0].shape[0] + old_glora = True + + if v[3].shape[0] == v[2].shape[1] == v[0].shape[1] == v[1].shape[0]: + if old_glora and v[1].shape[0] == weight.shape[0] and weight.shape[0] == weight.shape[1]: + pass + else: + old_glora = False + rank = v[1].shape[0] + + a1 = comfy.model_management.cast_to_device(v[0].flatten(start_dim=1), weight.device, intermediate_dtype) + a2 = comfy.model_management.cast_to_device(v[1].flatten(start_dim=1), weight.device, intermediate_dtype) + b1 = comfy.model_management.cast_to_device(v[2].flatten(start_dim=1), weight.device, intermediate_dtype) + b2 = comfy.model_management.cast_to_device(v[3].flatten(start_dim=1), weight.device, intermediate_dtype) + + if v[4] is not None: + alpha = v[4] / rank + else: + alpha = 1.0 + + try: + if old_glora: + lora_diff = (torch.mm(b2, b1) + torch.mm(torch.mm(weight.flatten(start_dim=1).to(dtype=intermediate_dtype), a2), a1)).reshape(weight.shape) #old lycoris glora + else: + if weight.dim() > 2: + lora_diff = torch.einsum("o i ..., i j -> o j ...", torch.einsum("o i ..., i j -> o j ...", weight.to(dtype=intermediate_dtype), a1), a2).reshape(weight.shape) + else: + lora_diff = torch.mm(torch.mm(weight.to(dtype=intermediate_dtype), a1), a2).reshape(weight.shape) + lora_diff += torch.mm(b1, b2).reshape(weight.shape) + + if dora_scale is not None: + weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function) + else: + weight += function(((strength * alpha) * lora_diff).type(weight.dtype)) + except Exception as e: + logging.error("ERROR {} {} {}".format(self.name, key, e)) + return weight diff --git a/comfy/weight_adapter/loha.py b/comfy/weight_adapter/loha.py new file mode 100644 index 000000000..ce79abad5 --- /dev/null +++ b/comfy/weight_adapter/loha.py @@ -0,0 +1,100 @@ +import logging +from typing import Optional + +import torch +import comfy.model_management +from .base import WeightAdapterBase, weight_decompose + + +class LoHaAdapter(WeightAdapterBase): + name = "loha" + + def __init__(self, loaded_keys, weights): + self.loaded_keys = loaded_keys + self.weights = weights + + @classmethod + def load( + cls, + x: str, + lora: dict[str, torch.Tensor], + alpha: float, + dora_scale: torch.Tensor, + loaded_keys: set[str] = None, + ) -> Optional["LoHaAdapter"]: + if loaded_keys is None: + loaded_keys = set() + + hada_w1_a_name = "{}.hada_w1_a".format(x) + hada_w1_b_name = "{}.hada_w1_b".format(x) + hada_w2_a_name = "{}.hada_w2_a".format(x) + hada_w2_b_name = "{}.hada_w2_b".format(x) + hada_t1_name = "{}.hada_t1".format(x) + hada_t2_name = "{}.hada_t2".format(x) + if hada_w1_a_name in lora.keys(): + hada_t1 = None + hada_t2 = None + if hada_t1_name in lora.keys(): + hada_t1 = lora[hada_t1_name] + hada_t2 = lora[hada_t2_name] + loaded_keys.add(hada_t1_name) + loaded_keys.add(hada_t2_name) + + weights = (lora[hada_w1_a_name], lora[hada_w1_b_name], alpha, lora[hada_w2_a_name], lora[hada_w2_b_name], hada_t1, hada_t2, dora_scale) + loaded_keys.add(hada_w1_a_name) + loaded_keys.add(hada_w1_b_name) + loaded_keys.add(hada_w2_a_name) + loaded_keys.add(hada_w2_b_name) + return cls(loaded_keys, weights) + else: + return None + + def calculate_weight( + self, + weight, + key, + strength, + strength_model, + offset, + function, + intermediate_dtype=torch.float32, + original_weight=None, + ): + v = self.weights + w1a = v[0] + w1b = v[1] + if v[2] is not None: + alpha = v[2] / w1b.shape[0] + else: + alpha = 1.0 + + w2a = v[3] + w2b = v[4] + dora_scale = v[7] + if v[5] is not None: #cp decomposition + t1 = v[5] + t2 = v[6] + m1 = torch.einsum('i j k l, j r, i p -> p r k l', + comfy.model_management.cast_to_device(t1, weight.device, intermediate_dtype), + comfy.model_management.cast_to_device(w1b, weight.device, intermediate_dtype), + comfy.model_management.cast_to_device(w1a, weight.device, intermediate_dtype)) + + m2 = torch.einsum('i j k l, j r, i p -> p r k l', + comfy.model_management.cast_to_device(t2, weight.device, intermediate_dtype), + comfy.model_management.cast_to_device(w2b, weight.device, intermediate_dtype), + comfy.model_management.cast_to_device(w2a, weight.device, intermediate_dtype)) + else: + m1 = torch.mm(comfy.model_management.cast_to_device(w1a, weight.device, intermediate_dtype), + comfy.model_management.cast_to_device(w1b, weight.device, intermediate_dtype)) + m2 = torch.mm(comfy.model_management.cast_to_device(w2a, weight.device, intermediate_dtype), + comfy.model_management.cast_to_device(w2b, weight.device, intermediate_dtype)) + + try: + lora_diff = (m1 * m2).reshape(weight.shape) + if dora_scale is not None: + weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function) + else: + weight += function(((strength * alpha) * lora_diff).type(weight.dtype)) + except Exception as e: + logging.error("ERROR {} {} {}".format(self.name, key, e)) + return weight diff --git a/comfy/weight_adapter/lokr.py b/comfy/weight_adapter/lokr.py new file mode 100644 index 000000000..51233db2d --- /dev/null +++ b/comfy/weight_adapter/lokr.py @@ -0,0 +1,133 @@ +import logging +from typing import Optional + +import torch +import comfy.model_management +from .base import WeightAdapterBase, weight_decompose + + +class LoKrAdapter(WeightAdapterBase): + name = "lokr" + + def __init__(self, loaded_keys, weights): + self.loaded_keys = loaded_keys + self.weights = weights + + @classmethod + def load( + cls, + x: str, + lora: dict[str, torch.Tensor], + alpha: float, + dora_scale: torch.Tensor, + loaded_keys: set[str] = None, + ) -> Optional["LoKrAdapter"]: + if loaded_keys is None: + loaded_keys = set() + lokr_w1_name = "{}.lokr_w1".format(x) + lokr_w2_name = "{}.lokr_w2".format(x) + lokr_w1_a_name = "{}.lokr_w1_a".format(x) + lokr_w1_b_name = "{}.lokr_w1_b".format(x) + lokr_t2_name = "{}.lokr_t2".format(x) + lokr_w2_a_name = "{}.lokr_w2_a".format(x) + lokr_w2_b_name = "{}.lokr_w2_b".format(x) + + lokr_w1 = None + if lokr_w1_name in lora.keys(): + lokr_w1 = lora[lokr_w1_name] + loaded_keys.add(lokr_w1_name) + + lokr_w2 = None + if lokr_w2_name in lora.keys(): + lokr_w2 = lora[lokr_w2_name] + loaded_keys.add(lokr_w2_name) + + lokr_w1_a = None + if lokr_w1_a_name in lora.keys(): + lokr_w1_a = lora[lokr_w1_a_name] + loaded_keys.add(lokr_w1_a_name) + + lokr_w1_b = None + if lokr_w1_b_name in lora.keys(): + lokr_w1_b = lora[lokr_w1_b_name] + loaded_keys.add(lokr_w1_b_name) + + lokr_w2_a = None + if lokr_w2_a_name in lora.keys(): + lokr_w2_a = lora[lokr_w2_a_name] + loaded_keys.add(lokr_w2_a_name) + + lokr_w2_b = None + if lokr_w2_b_name in lora.keys(): + lokr_w2_b = lora[lokr_w2_b_name] + loaded_keys.add(lokr_w2_b_name) + + lokr_t2 = None + if lokr_t2_name in lora.keys(): + lokr_t2 = lora[lokr_t2_name] + loaded_keys.add(lokr_t2_name) + + if (lokr_w1 is not None) or (lokr_w2 is not None) or (lokr_w1_a is not None) or (lokr_w2_a is not None): + weights = (lokr_w1, lokr_w2, alpha, lokr_w1_a, lokr_w1_b, lokr_w2_a, lokr_w2_b, lokr_t2, dora_scale) + return cls(loaded_keys, weights) + else: + return None + + def calculate_weight( + self, + weight, + key, + strength, + strength_model, + offset, + function, + intermediate_dtype=torch.float32, + original_weight=None, + ): + v = self.weights + w1 = v[0] + w2 = v[1] + w1_a = v[3] + w1_b = v[4] + w2_a = v[5] + w2_b = v[6] + t2 = v[7] + dora_scale = v[8] + dim = None + + if w1 is None: + dim = w1_b.shape[0] + w1 = torch.mm(comfy.model_management.cast_to_device(w1_a, weight.device, intermediate_dtype), + comfy.model_management.cast_to_device(w1_b, weight.device, intermediate_dtype)) + else: + w1 = comfy.model_management.cast_to_device(w1, weight.device, intermediate_dtype) + + if w2 is None: + dim = w2_b.shape[0] + if t2 is None: + w2 = torch.mm(comfy.model_management.cast_to_device(w2_a, weight.device, intermediate_dtype), + comfy.model_management.cast_to_device(w2_b, weight.device, intermediate_dtype)) + else: + w2 = torch.einsum('i j k l, j r, i p -> p r k l', + comfy.model_management.cast_to_device(t2, weight.device, intermediate_dtype), + comfy.model_management.cast_to_device(w2_b, weight.device, intermediate_dtype), + comfy.model_management.cast_to_device(w2_a, weight.device, intermediate_dtype)) + else: + w2 = comfy.model_management.cast_to_device(w2, weight.device, intermediate_dtype) + + if len(w2.shape) == 4: + w1 = w1.unsqueeze(2).unsqueeze(2) + if v[2] is not None and dim is not None: + alpha = v[2] / dim + else: + alpha = 1.0 + + try: + lora_diff = torch.kron(w1, w2).reshape(weight.shape) + if dora_scale is not None: + weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function) + else: + weight += function(((strength * alpha) * lora_diff).type(weight.dtype)) + except Exception as e: + logging.error("ERROR {} {} {}".format(self.name, key, e)) + return weight diff --git a/comfy/weight_adapter/lora.py b/comfy/weight_adapter/lora.py new file mode 100644 index 000000000..b2e623924 --- /dev/null +++ b/comfy/weight_adapter/lora.py @@ -0,0 +1,142 @@ +import logging +from typing import Optional + +import torch +import comfy.model_management +from .base import WeightAdapterBase, weight_decompose, pad_tensor_to_shape + + +class LoRAAdapter(WeightAdapterBase): + name = "lora" + + def __init__(self, loaded_keys, weights): + self.loaded_keys = loaded_keys + self.weights = weights + + @classmethod + def load( + cls, + x: str, + lora: dict[str, torch.Tensor], + alpha: float, + dora_scale: torch.Tensor, + loaded_keys: set[str] = None, + ) -> Optional["LoRAAdapter"]: + if loaded_keys is None: + loaded_keys = set() + + reshape_name = "{}.reshape_weight".format(x) + regular_lora = "{}.lora_up.weight".format(x) + diffusers_lora = "{}_lora.up.weight".format(x) + diffusers2_lora = "{}.lora_B.weight".format(x) + diffusers3_lora = "{}.lora.up.weight".format(x) + mochi_lora = "{}.lora_B".format(x) + transformers_lora = "{}.lora_linear_layer.up.weight".format(x) + A_name = None + + if regular_lora in lora.keys(): + A_name = regular_lora + B_name = "{}.lora_down.weight".format(x) + mid_name = "{}.lora_mid.weight".format(x) + elif diffusers_lora in lora.keys(): + A_name = diffusers_lora + B_name = "{}_lora.down.weight".format(x) + mid_name = None + elif diffusers2_lora in lora.keys(): + A_name = diffusers2_lora + B_name = "{}.lora_A.weight".format(x) + mid_name = None + elif diffusers3_lora in lora.keys(): + A_name = diffusers3_lora + B_name = "{}.lora.down.weight".format(x) + mid_name = None + elif mochi_lora in lora.keys(): + A_name = mochi_lora + B_name = "{}.lora_A".format(x) + mid_name = None + elif transformers_lora in lora.keys(): + A_name = transformers_lora + B_name = "{}.lora_linear_layer.down.weight".format(x) + mid_name = None + + if A_name is not None: + mid = None + if mid_name is not None and mid_name in lora.keys(): + mid = lora[mid_name] + loaded_keys.add(mid_name) + reshape = None + if reshape_name in lora.keys(): + try: + reshape = lora[reshape_name].tolist() + loaded_keys.add(reshape_name) + except: + pass + weights = (lora[A_name], lora[B_name], alpha, mid, dora_scale, reshape) + loaded_keys.add(A_name) + loaded_keys.add(B_name) + return cls(loaded_keys, weights) + else: + return None + + def calculate_weight( + self, + weight, + key, + strength, + strength_model, + offset, + function, + intermediate_dtype=torch.float32, + original_weight=None, + ): + v = self.weights + mat1 = comfy.model_management.cast_to_device( + v[0], weight.device, intermediate_dtype + ) + mat2 = comfy.model_management.cast_to_device( + v[1], weight.device, intermediate_dtype + ) + dora_scale = v[4] + reshape = v[5] + + if reshape is not None: + weight = pad_tensor_to_shape(weight, reshape) + + if v[2] is not None: + alpha = v[2] / mat2.shape[0] + else: + alpha = 1.0 + + if v[3] is not None: + # locon mid weights, hopefully the math is fine because I didn't properly test it + mat3 = comfy.model_management.cast_to_device( + v[3], weight.device, intermediate_dtype + ) + final_shape = [mat2.shape[1], mat2.shape[0], mat3.shape[2], mat3.shape[3]] + mat2 = ( + torch.mm( + mat2.transpose(0, 1).flatten(start_dim=1), + mat3.transpose(0, 1).flatten(start_dim=1), + ) + .reshape(final_shape) + .transpose(0, 1) + ) + try: + lora_diff = torch.mm( + mat1.flatten(start_dim=1), mat2.flatten(start_dim=1) + ).reshape(weight.shape) + if dora_scale is not None: + weight = weight_decompose( + dora_scale, + weight, + lora_diff, + alpha, + strength, + intermediate_dtype, + function, + ) + else: + weight += function(((strength * alpha) * lora_diff).type(weight.dtype)) + except Exception as e: + logging.error("ERROR {} {} {}".format(self.name, key, e)) + return weight From 3ab231f01f26f9cec03bd94382ae5b6289789d9e Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Mon, 21 Apr 2025 20:36:12 -0700 Subject: [PATCH 07/23] Fix issue with WAN VACE implementation. (#7724) --- comfy/ldm/wan/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py index 4ef86d5f2..b8eec3afb 100644 --- a/comfy/ldm/wan/model.py +++ b/comfy/ldm/wan/model.py @@ -630,7 +630,7 @@ class VaceWanModel(WanModel): ii = self.vace_layers_mapping.get(i, None) if ii is not None: c_skip, c = self.vace_blocks[ii](c, x=x_orig, e=e0, freqs=freqs, context=context, context_img_len=context_img_len) - x += c_skip * vace_strength + x += c_skip * vace_strength # head x = self.head(x, e) From 966c43ce268341de6e60762ef18e7628f7d311bf Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Tue, 22 Apr 2025 16:59:47 +0800 Subject: [PATCH 08/23] Add OFT/BOFT algorithm in weight adapter (#7725) --- comfy/weight_adapter/__init__.py | 4 ++ comfy/weight_adapter/boft.py | 115 +++++++++++++++++++++++++++++++ comfy/weight_adapter/oft.py | 94 +++++++++++++++++++++++++ 3 files changed, 213 insertions(+) create mode 100644 comfy/weight_adapter/boft.py create mode 100644 comfy/weight_adapter/oft.py diff --git a/comfy/weight_adapter/__init__.py b/comfy/weight_adapter/__init__.py index e6cd805b6..d2a1d0151 100644 --- a/comfy/weight_adapter/__init__.py +++ b/comfy/weight_adapter/__init__.py @@ -3,6 +3,8 @@ from .lora import LoRAAdapter from .loha import LoHaAdapter from .lokr import LoKrAdapter from .glora import GLoRAAdapter +from .oft import OFTAdapter +from .boft import BOFTAdapter adapters: list[type[WeightAdapterBase]] = [ @@ -10,4 +12,6 @@ adapters: list[type[WeightAdapterBase]] = [ LoHaAdapter, LoKrAdapter, GLoRAAdapter, + OFTAdapter, + BOFTAdapter, ] diff --git a/comfy/weight_adapter/boft.py b/comfy/weight_adapter/boft.py new file mode 100644 index 000000000..c85adc7ab --- /dev/null +++ b/comfy/weight_adapter/boft.py @@ -0,0 +1,115 @@ +import logging +from typing import Optional + +import torch +import comfy.model_management +from .base import WeightAdapterBase, weight_decompose + + +class BOFTAdapter(WeightAdapterBase): + name = "boft" + + def __init__(self, loaded_keys, weights): + self.loaded_keys = loaded_keys + self.weights = weights + + @classmethod + def load( + cls, + x: str, + lora: dict[str, torch.Tensor], + alpha: float, + dora_scale: torch.Tensor, + loaded_keys: set[str] = None, + ) -> Optional["BOFTAdapter"]: + if loaded_keys is None: + loaded_keys = set() + blocks_name = "{}.boft_blocks".format(x) + rescale_name = "{}.rescale".format(x) + + blocks = None + if blocks_name in lora.keys(): + blocks = lora[blocks_name] + if blocks.ndim == 4: + loaded_keys.add(blocks_name) + + rescale = None + if rescale_name in lora.keys(): + rescale = lora[rescale_name] + loaded_keys.add(rescale_name) + + if blocks is not None: + weights = (blocks, rescale, alpha, dora_scale) + return cls(loaded_keys, weights) + else: + return None + + def calculate_weight( + self, + weight, + key, + strength, + strength_model, + offset, + function, + intermediate_dtype=torch.float32, + original_weight=None, + ): + v = self.weights + blocks = v[0] + rescale = v[1] + alpha = v[2] + dora_scale = v[3] + + blocks = comfy.model_management.cast_to_device(blocks, weight.device, intermediate_dtype) + if rescale is not None: + rescale = comfy.model_management.cast_to_device(rescale, weight.device, intermediate_dtype) + + boft_m, block_num, boft_b, *_ = blocks.shape + + try: + # Get r + I = torch.eye(boft_b, device=blocks.device, dtype=blocks.dtype) + # for Q = -Q^T + q = blocks - blocks.transpose(1, 2) + normed_q = q + if alpha > 0: # alpha in boft/bboft is for constraint + q_norm = torch.norm(q) + 1e-8 + if q_norm > alpha: + normed_q = q * alpha / q_norm + # use float() to prevent unsupported type in .inverse() + r = (I + normed_q) @ (I - normed_q).float().inverse() + r = r.to(original_weight) + + inp = org = original_weight + + r_b = boft_b//2 + for i in range(boft_m): + bi = r[i] + g = 2 + k = 2**i * r_b + if strength != 1: + bi = bi * strength + (1-strength) * I + inp = ( + inp.unflatten(-1, (-1, g, k)) + .transpose(-2, -1) + .flatten(-3) + .unflatten(-1, (-1, boft_b)) + ) + inp = torch.einsum("b n m, b n ... -> b m ...", inp, bi) + inp = ( + inp.flatten(-2).unflatten(-1, (-1, k, g)).transpose(-2, -1).flatten(-3) + ) + + if rescale is not None: + inp = inp * rescale + + lora_diff = inp - org + lora_diff = comfy.model_management.cast_to_device(lora_diff, weight.device, intermediate_dtype) + if dora_scale is not None: + weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function) + else: + weight += function(((strength * alpha) * lora_diff).type(weight.dtype)) + except Exception as e: + logging.error("ERROR {} {} {}".format(self.name, key, e)) + return weight diff --git a/comfy/weight_adapter/oft.py b/comfy/weight_adapter/oft.py new file mode 100644 index 000000000..0ea229b79 --- /dev/null +++ b/comfy/weight_adapter/oft.py @@ -0,0 +1,94 @@ +import logging +from typing import Optional + +import torch +import comfy.model_management +from .base import WeightAdapterBase, weight_decompose + + +class OFTAdapter(WeightAdapterBase): + name = "oft" + + def __init__(self, loaded_keys, weights): + self.loaded_keys = loaded_keys + self.weights = weights + + @classmethod + def load( + cls, + x: str, + lora: dict[str, torch.Tensor], + alpha: float, + dora_scale: torch.Tensor, + loaded_keys: set[str] = None, + ) -> Optional["OFTAdapter"]: + if loaded_keys is None: + loaded_keys = set() + blocks_name = "{}.oft_blocks".format(x) + rescale_name = "{}.rescale".format(x) + + blocks = None + if blocks_name in lora.keys(): + blocks = lora[blocks_name] + if blocks.ndim == 3: + loaded_keys.add(blocks_name) + + rescale = None + if rescale_name in lora.keys(): + rescale = lora[rescale_name] + loaded_keys.add(rescale_name) + + if blocks is not None: + weights = (blocks, rescale, alpha, dora_scale) + return cls(loaded_keys, weights) + else: + return None + + def calculate_weight( + self, + weight, + key, + strength, + strength_model, + offset, + function, + intermediate_dtype=torch.float32, + original_weight=None, + ): + v = self.weights + blocks = v[0] + rescale = v[1] + alpha = v[2] + dora_scale = v[3] + + blocks = comfy.model_management.cast_to_device(blocks, weight.device, intermediate_dtype) + if rescale is not None: + rescale = comfy.model_management.cast_to_device(rescale, weight.device, intermediate_dtype) + + block_num, block_size, *_ = blocks.shape + + try: + # Get r + I = torch.eye(block_size, device=blocks.device, dtype=blocks.dtype) + # for Q = -Q^T + q = blocks - blocks.transpose(1, 2) + normed_q = q + if alpha > 0: # alpha in oft/boft is for constraint + q_norm = torch.norm(q) + 1e-8 + if q_norm > alpha: + normed_q = q * alpha / q_norm + # use float() to prevent unsupported type in .inverse() + r = (I + normed_q) @ (I - normed_q).float().inverse() + r = r.to(original_weight) + lora_diff = torch.einsum( + "k n m, k n ... -> k m ...", + (r * strength) - strength * I, + original_weight, + ) + if dora_scale is not None: + weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function) + else: + weight += function(((strength * alpha) * lora_diff).type(weight.dtype)) + except Exception as e: + logging.error("ERROR {} {} {}".format(self.name, key, e)) + return weight From 454a635c1b8aae9f635e7fb4f696bf7ac2e1fd1f Mon Sep 17 00:00:00 2001 From: Terry Jia Date: Tue, 22 Apr 2025 05:00:28 -0400 Subject: [PATCH 09/23] upstream MaskPreview from ComfyUI_essentials (#7719) --- comfy_extras/nodes_mask.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/comfy_extras/nodes_mask.py b/comfy_extras/nodes_mask.py index 13d2b4bab..99b264a32 100644 --- a/comfy_extras/nodes_mask.py +++ b/comfy_extras/nodes_mask.py @@ -3,7 +3,10 @@ import scipy.ndimage import torch import comfy.utils import node_helpers +import folder_paths +import random +import nodes from nodes import MAX_RESOLUTION def composite(destination, source, x, y, mask = None, multiplier = 8, resize_source = False): @@ -362,6 +365,30 @@ class ThresholdMask: mask = (mask > value).float() return (mask,) +# Mask Preview - original implement from +# https://github.com/cubiq/ComfyUI_essentials/blob/9d9f4bedfc9f0321c19faf71855e228c93bd0dc9/mask.py#L81 +# upstream requested in https://github.com/Kosinkadink/rfcs/blob/main/rfcs/0000-corenodes.md#preview-nodes +class MaskPreview(nodes.SaveImage): + def __init__(self): + self.output_dir = folder_paths.get_temp_directory() + self.type = "temp" + self.prefix_append = "_temp_" + ''.join(random.choice("abcdefghijklmnopqrstupvxyz") for x in range(5)) + self.compress_level = 4 + + @classmethod + def INPUT_TYPES(s): + return { + "required": {"mask": ("MASK",), }, + "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"}, + } + + FUNCTION = "execute" + CATEGORY = "mask" + + def execute(self, mask, filename_prefix="ComfyUI", prompt=None, extra_pnginfo=None): + preview = mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])).movedim(1, -1).expand(-1, -1, -1, 3) + return self.save_images(preview, filename_prefix, prompt, extra_pnginfo) + NODE_CLASS_MAPPINGS = { "LatentCompositeMasked": LatentCompositeMasked, @@ -376,6 +403,7 @@ NODE_CLASS_MAPPINGS = { "FeatherMask": FeatherMask, "GrowMask": GrowMask, "ThresholdMask": ThresholdMask, + "MaskPreview": MaskPreview } NODE_DISPLAY_NAME_MAPPINGS = { From a8f63c0d5b40b4ed12faa1376e973b0e790b1c0d Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Tue, 22 Apr 2025 17:01:27 +0800 Subject: [PATCH 10/23] Support dora_scale on both axis (#7727) --- comfy/weight_adapter/base.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/comfy/weight_adapter/base.py b/comfy/weight_adapter/base.py index 54af3babe..29873519d 100644 --- a/comfy/weight_adapter/base.py +++ b/comfy/weight_adapter/base.py @@ -43,13 +43,23 @@ def weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediat dora_scale = comfy.model_management.cast_to_device(dora_scale, weight.device, intermediate_dtype) lora_diff *= alpha weight_calc = weight + function(lora_diff).type(weight.dtype) - weight_norm = ( - weight_calc.transpose(0, 1) - .reshape(weight_calc.shape[1], -1) - .norm(dim=1, keepdim=True) - .reshape(weight_calc.shape[1], *[1] * (weight_calc.dim() - 1)) - .transpose(0, 1) - ) + + wd_on_output_axis = dora_scale.shape[0] == weight_calc.shape[0] + if wd_on_output_axis: + weight_norm = ( + weight.reshape(weight.shape[0], -1) + .norm(dim=1, keepdim=True) + .reshape(weight.shape[0], *[1] * (weight.dim() - 1)) + ) + else: + weight_norm = ( + weight_calc.transpose(0, 1) + .reshape(weight_calc.shape[1], -1) + .norm(dim=1, keepdim=True) + .reshape(weight_calc.shape[1], *[1] * (weight_calc.dim() - 1)) + .transpose(0, 1) + ) + weight_norm = weight_norm + torch.finfo(weight.dtype).eps weight_calc *= (dora_scale / weight_norm).type(weight.dtype) if strength != 1.0: From 2d6805ce57cede78acb6515112439c5092c7b257 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Tue, 22 Apr 2025 03:17:38 -0700 Subject: [PATCH 11/23] Add option for using fp8_e8m0fnu for model weights. (#7733) Seems to break every model I have tried but worth testing? --- comfy/cli_args.py | 1 + comfy/model_management.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/comfy/cli_args.py b/comfy/cli_args.py index 81f29f098..1b971be3c 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -66,6 +66,7 @@ fpunet_group.add_argument("--bf16-unet", action="store_true", help="Run the diff fpunet_group.add_argument("--fp16-unet", action="store_true", help="Run the diffusion model in fp16") fpunet_group.add_argument("--fp8_e4m3fn-unet", action="store_true", help="Store unet weights in fp8_e4m3fn.") fpunet_group.add_argument("--fp8_e5m2-unet", action="store_true", help="Store unet weights in fp8_e5m2.") +fpunet_group.add_argument("--fp8_e8m0fnu-unet", action="store_true", help="Store unet weights in fp8_e8m0fnu.") fpvae_group = parser.add_mutually_exclusive_group() fpvae_group.add_argument("--fp16-vae", action="store_true", help="Run the VAE in fp16, might cause black images.") diff --git a/comfy/model_management.py b/comfy/model_management.py index 19e6c8dff..43e402243 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -725,6 +725,8 @@ def unet_dtype(device=None, model_params=0, supported_dtypes=[torch.float16, tor return torch.float8_e4m3fn if args.fp8_e5m2_unet: return torch.float8_e5m2 + if args.fp8_e8m0fnu_unet: + return torch.float8_e8m0fnu fp8_dtype = None if weight_dtype in FLOAT8_TYPES: From 92cdc692f47188e6e4c48c5666ac802281240a37 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Tue, 22 Apr 2025 22:57:17 +0100 Subject: [PATCH 12/23] Replace aom-av1 with svt-av1 for saving webm videos, use preset 6 + yuv420p10le pixel format (#7736) * Add support for saving svt-av1 webm videos & yuv420p10le pixel format * Replace aom-av1 with svt-av1 Use yuv420p10le for av1 --- comfy_extras/nodes_video.py | 6 ++++-- requirements.txt | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/comfy_extras/nodes_video.py b/comfy_extras/nodes_video.py index 97ca513d8..a9e244ebe 100644 --- a/comfy_extras/nodes_video.py +++ b/comfy_extras/nodes_video.py @@ -50,13 +50,15 @@ class SaveWEBM: for x in extra_pnginfo: container.metadata[x] = json.dumps(extra_pnginfo[x]) - codec_map = {"vp9": "libvpx-vp9", "av1": "libaom-av1"} + codec_map = {"vp9": "libvpx-vp9", "av1": "libsvtav1"} stream = container.add_stream(codec_map[codec], rate=Fraction(round(fps * 1000), 1000)) stream.width = images.shape[-2] stream.height = images.shape[-3] - stream.pix_fmt = "yuv420p" + stream.pix_fmt = "yuv420p10le" if codec == "av1" else "yuv420p" stream.bit_rate = 0 stream.options = {'crf': str(crf)} + if codec == "av1": + stream.options["preset"] = "6" for frame in images: frame = av.VideoFrame.from_ndarray(torch.clamp(frame[..., :3] * 255, min=0, max=255).to(device=torch.device("cpu"), dtype=torch.uint8).numpy(), format="rgb24") diff --git a/requirements.txt b/requirements.txt index 5c3a854ce..90eb04612 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,4 +22,4 @@ psutil kornia>=0.7.1 spandrel soundfile -av +av>=14.1.0 From 0738e4ea5dd5ecac34d8cf61bb381ea6d159394b Mon Sep 17 00:00:00 2001 From: Robin Huang Date: Tue, 22 Apr 2025 23:18:08 -0700 Subject: [PATCH 13/23] [API nodes] Add backbone for supporting api nodes in ComfyUI (#7745) * Add Ideogram generate node. * Add staging api. * COMFY_API_NODE_NAME node property * switch to boolean flag and use original node name for id * add optional to type * Add API_NODE and common error for missing auth token (#5) * Add Minimax Video Generation + Async Task queue polling example (#6) * [Minimax] Show video preview and embed workflow in ouput (#7) * [API Nodes] Send empty request body instead of empty dictionary. (#8) * Fixed: removed function from rebase. * Add pydantic. * Remove uv.lock * Remove polling operations. * Update stubs workflow. * Remove polling comments. * Update stubs. * Use pydantic v2. * Use pydantic v2. * Add basic OpenAITextToImage node * Add. * convert image to tensor. * Improve types. * Ruff. * Push tests. * Handle multi-form data. - Don't set content-type for multi-part/form - Use data field instead of JSON * Change to api.comfy.org * Handle error code 409. * Remove nodes. --------- Co-authored-by: bymyself Co-authored-by: Yoland Y <4950057+yoland68@users.noreply.github.com> --- comfy/comfy_types/node_typing.py | 4 +- comfy_api_nodes/__init__.py | 0 comfy_api_nodes/apis/client.py | 337 +++++++++++++++++++++++++++++++ requirements.txt | 1 + server.py | 3 + 5 files changed, 344 insertions(+), 1 deletion(-) create mode 100644 comfy_api_nodes/__init__.py create mode 100644 comfy_api_nodes/apis/client.py diff --git a/comfy/comfy_types/node_typing.py b/comfy/comfy_types/node_typing.py index a348791a9..0bdda032e 100644 --- a/comfy/comfy_types/node_typing.py +++ b/comfy/comfy_types/node_typing.py @@ -1,7 +1,7 @@ """Comfy-specific type hinting""" from __future__ import annotations -from typing import Literal, TypedDict +from typing import Literal, TypedDict, Optional from typing_extensions import NotRequired from abc import ABC, abstractmethod from enum import Enum @@ -229,6 +229,8 @@ class ComfyNodeABC(ABC): """Flags a node as experimental, informing users that it may change or not work as expected.""" DEPRECATED: bool """Flags a node as deprecated, indicating to users that they should find alternatives to this node.""" + API_NODE: Optional[bool] + """Flags a node as an API node.""" @classmethod @abstractmethod diff --git a/comfy_api_nodes/__init__.py b/comfy_api_nodes/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/comfy_api_nodes/apis/client.py b/comfy_api_nodes/apis/client.py new file mode 100644 index 000000000..cd81d5a1d --- /dev/null +++ b/comfy_api_nodes/apis/client.py @@ -0,0 +1,337 @@ +import logging + +""" +API Client Framework for api.comfy.org. + +This module provides a flexible framework for making API requests from ComfyUI nodes. +It supports both synchronous and asynchronous API operations with proper type validation. + +Key Components: +-------------- +1. ApiClient - Handles HTTP requests with authentication and error handling +2. ApiEndpoint - Defines a single HTTP endpoint with its request/response models +3. ApiOperation - Executes a single synchronous API operation + +Usage Examples: +-------------- + +# Example 1: Synchronous API Operation +# ------------------------------------ +# For a simple API call that returns the result immediately: + +# 1. Create the API client +api_client = ApiClient( + base_url="https://api.example.com", + api_key="your_api_key_here", + timeout=30.0, + verify_ssl=True +) + +# 2. Define the endpoint +user_info_endpoint = ApiEndpoint( + path="/v1/users/me", + method=HttpMethod.GET, + request_model=EmptyRequest, # No request body needed + response_model=UserProfile, # Pydantic model for the response + query_params=None +) + +# 3. Create the request object +request = EmptyRequest() + +# 4. Create and execute the operation +operation = ApiOperation( + endpoint=user_info_endpoint, + request=request +) +user_profile = operation.execute(client=api_client) # Returns immediately with the result + +""" + +from typing import ( + Dict, + Type, + Optional, + Any, + TypeVar, + Generic, +) +from pydantic import BaseModel +from enum import Enum +import json +import requests +from urllib.parse import urljoin + +T = TypeVar("T", bound=BaseModel) +R = TypeVar("R", bound=BaseModel) + +class EmptyRequest(BaseModel): + """Base class for empty request bodies. + For GET requests, fields will be sent as query parameters.""" + + pass + + +class HttpMethod(str, Enum): + GET = "GET" + POST = "POST" + PUT = "PUT" + DELETE = "DELETE" + PATCH = "PATCH" + + +class ApiClient: + """ + Client for making HTTP requests to an API with authentication and error handling. + """ + + def __init__( + self, + base_url: str, + api_key: Optional[str] = None, + timeout: float = 30.0, + verify_ssl: bool = True, + ): + self.base_url = base_url + self.api_key = api_key + self.timeout = timeout + self.verify_ssl = verify_ssl + + def get_headers(self) -> Dict[str, str]: + """Get headers for API requests, including authentication if available""" + headers = {"Content-Type": "application/json", "Accept": "application/json"} + + if self.api_key: + headers["Authorization"] = f"Bearer {self.api_key}" + + return headers + + def request( + self, + method: str, + path: str, + params: Optional[Dict[str, Any]] = None, + json: Optional[Dict[str, Any]] = None, + files: Optional[Dict[str, Any]] = None, + headers: Optional[Dict[str, str]] = None, + ) -> Dict[str, Any]: + """ + Make an HTTP request to the API + + Args: + method: HTTP method (GET, POST, etc.) + path: API endpoint path (will be joined with base_url) + params: Query parameters + json: JSON body data + files: Files to upload + headers: Additional headers + + Returns: + Parsed JSON response + + Raises: + requests.RequestException: If the request fails + """ + url = urljoin(self.base_url, path) + self.check_auth_token(self.api_key) + # Combine default headers with any provided headers + request_headers = self.get_headers() + if headers: + request_headers.update(headers) + + # Let requests handle the content type when files are present. + if files: + del request_headers["Content-Type"] + + logging.debug(f"[DEBUG] Request Headers: {request_headers}") + logging.debug(f"[DEBUG] Files: {files}") + logging.debug(f"[DEBUG] Params: {params}") + logging.debug(f"[DEBUG] Json: {json}") + + try: + # If files are present, use data parameter instead of json + if files: + form_data = {} + if json: + form_data.update(json) + response = requests.request( + method=method, + url=url, + params=params, + data=form_data, # Use data instead of json + files=files, + headers=request_headers, + timeout=self.timeout, + verify=self.verify_ssl, + ) + else: + response = requests.request( + method=method, + url=url, + params=params, + json=json, + headers=request_headers, + timeout=self.timeout, + verify=self.verify_ssl, + ) + + # Raise exception for error status codes + response.raise_for_status() + except requests.ConnectionError: + raise Exception( + f"Unable to connect to the API server at {self.base_url}. Please check your internet connection or verify the service is available." + ) + + except requests.Timeout: + raise Exception( + f"Request timed out after {self.timeout} seconds. The server might be experiencing high load or the operation is taking longer than expected." + ) + + except requests.HTTPError as e: + status_code = e.response.status_code if hasattr(e, "response") else None + error_message = f"HTTP Error: {str(e)}" + + # Try to extract detailed error message from JSON response + try: + if hasattr(e, "response") and e.response.content: + error_json = e.response.json() + if "error" in error_json and "message" in error_json["error"]: + error_message = f"API Error: {error_json['error']['message']}" + if "type" in error_json["error"]: + error_message += f" (Type: {error_json['error']['type']})" + else: + error_message = f"API Error: {error_json}" + except Exception as json_error: + # If we can't parse the JSON, fall back to the original error message + logging.debug(f"[DEBUG] Failed to parse error response: {str(json_error)}") + + logging.debug(f"[DEBUG] API Error: {error_message} (Status: {status_code})") + if hasattr(e, "response") and e.response.content: + logging.debug(f"[DEBUG] Response content: {e.response.content}") + if status_code == 401: + error_message = "Unauthorized: Please login first to use this node." + if status_code == 402: + error_message = "Payment Required: Please add credits to your account to use this node." + if status_code == 409: + error_message = "There is a problem with your account. Please contact support@comfy.org. " + if status_code == 429: + error_message = "Rate Limit Exceeded: Please try again later." + raise Exception(error_message) + + # Parse and return JSON response + if response.content: + return response.json() + return {} + + def check_auth_token(self, auth_token): + """Verify that an auth token is present.""" + if auth_token is None: + raise Exception("Please login first to use this node.") + return auth_token + + +class ApiEndpoint(Generic[T, R]): + """Defines an API endpoint with its request and response types""" + + def __init__( + self, + path: str, + method: HttpMethod, + request_model: Type[T], + response_model: Type[R], + query_params: Optional[Dict[str, Any]] = None, + ): + """Initialize an API endpoint definition. + + Args: + path: The URL path for this endpoint, can include placeholders like {id} + method: The HTTP method to use (GET, POST, etc.) + request_model: Pydantic model class that defines the structure and validation rules for API requests to this endpoint + response_model: Pydantic model class that defines the structure and validation rules for API responses from this endpoint + query_params: Optional dictionary of query parameters to include in the request + """ + self.path = path + self.method = method + self.request_model = request_model + self.response_model = response_model + self.query_params = query_params or {} + + +class SynchronousOperation(Generic[T, R]): + """ + Represents a single synchronous API operation. + """ + + def __init__( + self, + endpoint: ApiEndpoint[T, R], + request: T, + files: Optional[Dict[str, Any]] = None, + api_base: str = "https://api.comfy.org", + auth_token: Optional[str] = None, + timeout: float = 60.0, + verify_ssl: bool = True, + ): + self.endpoint = endpoint + self.request = request + self.response = None + self.error = None + self.api_base = api_base + self.auth_token = auth_token + self.timeout = timeout + self.verify_ssl = verify_ssl + self.files = files + def execute(self, client: Optional[ApiClient] = None) -> R: + """Execute the API operation using the provided client or create one""" + try: + # Create client if not provided + if client is None: + if self.api_base is None: + raise ValueError("Either client or api_base must be provided") + client = ApiClient( + base_url=self.api_base, + api_key=self.auth_token, + timeout=self.timeout, + verify_ssl=self.verify_ssl, + ) + + # Convert request model to dict, but use None for EmptyRequest + request_dict = None if isinstance(self.request, EmptyRequest) else self.request.model_dump(exclude_none=True) + + # Debug log for request + logging.debug(f"[DEBUG] API Request: {self.endpoint.method.value} {self.endpoint.path}") + logging.debug(f"[DEBUG] Request Data: {json.dumps(request_dict, indent=2)}") + logging.debug(f"[DEBUG] Query Params: {self.endpoint.query_params}") + + # Make the request + resp = client.request( + method=self.endpoint.method.value, + path=self.endpoint.path, + json=request_dict, + params=self.endpoint.query_params, + files=self.files, + ) + + # Debug log for response + logging.debug("=" * 50) + logging.debug("[DEBUG] RESPONSE DETAILS:") + logging.debug("[DEBUG] Status Code: 200 (Success)") + logging.debug(f"[DEBUG] Response Body: {json.dumps(resp, indent=2)}") + logging.debug("=" * 50) + + # Parse and return the response + return self._parse_response(resp) + + except Exception as e: + logging.debug(f"[DEBUG] API Exception: {str(e)}") + raise Exception(str(e)) + + def _parse_response(self, resp): + """Parse response data - can be overridden by subclasses""" + # The response is already the complete object, don't extract just the "data" field + # as that would lose the outer structure (created timestamp, etc.) + + # Parse response using the provided model + self.response = self.endpoint.response_model.model_validate(resp) + logging.debug(f"[DEBUG] Parsed Response: {self.response}") + return self.response diff --git a/requirements.txt b/requirements.txt index 90eb04612..f8ad908ca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,3 +23,4 @@ kornia>=0.7.1 spandrel soundfile av>=14.1.0 +pydantic~=2.0 diff --git a/server.py b/server.py index 0cc97b248..f64ec27d4 100644 --- a/server.py +++ b/server.py @@ -580,6 +580,9 @@ class PromptServer(): info['deprecated'] = True if getattr(obj_class, "EXPERIMENTAL", False): info['experimental'] = True + + if hasattr(obj_class, 'API_NODE'): + info['api_node'] = obj_class.API_NODE return info @routes.get("/object_info") From 552615235dc043f0b07d11e1ff2e6571e6f90d4d Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Wed, 23 Apr 2025 01:12:52 -0700 Subject: [PATCH 14/23] Fix for dino lowvram. (#7748) --- comfy/image_encoders/dino2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/image_encoders/dino2.py b/comfy/image_encoders/dino2.py index 130ed6fd7..976f98c65 100644 --- a/comfy/image_encoders/dino2.py +++ b/comfy/image_encoders/dino2.py @@ -116,7 +116,7 @@ class Dino2Embeddings(torch.nn.Module): def forward(self, pixel_values): x = self.patch_embeddings(pixel_values) # TODO: mask_token? - x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1) + x = torch.cat((self.cls_token.to(device=x.device, dtype=x.dtype).expand(x.shape[0], -1, -1), x), dim=1) x = x + comfy.model_management.cast_to_device(self.position_embeddings, x.device, x.dtype) return x From 21a11ef817e3749047c6b548231210ff84fe331d Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Wed, 23 Apr 2025 02:12:59 -0700 Subject: [PATCH 15/23] Pytorch stable 2.7 is out and support cu128 (#7749) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cf6df7e55..62800bb4f 100644 --- a/README.md +++ b/README.md @@ -216,9 +216,9 @@ Additional discussion and help can be found [here](https://github.com/comfyanony Nvidia users should install stable pytorch using this command: -```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu126``` +```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu128``` -This is the command to install pytorch nightly instead which supports the new blackwell 50xx series GPUs and might have performance improvements. +This is the command to install pytorch nightly instead which might have performance improvements. ```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128``` From 7eaff81be106fa5e1479cfa69f5fd06265611f2e Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Wed, 23 Apr 2025 02:28:24 -0700 Subject: [PATCH 16/23] fp16 accumulation can now be enabled on the stable package. (#7750) --- .../run_nvidia_gpu_fast_fp16_accumulation.bat | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .ci/{windows_nightly_base_files => windows_base_files}/run_nvidia_gpu_fast_fp16_accumulation.bat (100%) diff --git a/.ci/windows_nightly_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat b/.ci/windows_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat similarity index 100% rename from .ci/windows_nightly_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat rename to .ci/windows_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat From 3eaad0590e51bc186b1d533fef906e3f296cdd42 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Wed, 23 Apr 2025 02:47:09 -0700 Subject: [PATCH 17/23] Lower size of release package. (#7751) --- .github/workflows/windows_release_package.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/windows_release_package.yml b/.github/workflows/windows_release_package.yml index 416544f71..8300c2faf 100644 --- a/.github/workflows/windows_release_package.yml +++ b/.github/workflows/windows_release_package.yml @@ -50,7 +50,7 @@ jobs: - uses: actions/checkout@v4 with: - fetch-depth: 0 + fetch-depth: 150 persist-credentials: false - shell: bash run: | @@ -82,7 +82,7 @@ jobs: cd .. - "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=8 -mfb=64 -md=32m -ms=on -mf=BCJ2 ComfyUI_windows_portable.7z ComfyUI_windows_portable + "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=9 -mfb=128 -md=256m -ms=on -mf=BCJ2 ComfyUI_windows_portable.7z ComfyUI_windows_portable mv ComfyUI_windows_portable.7z ComfyUI/new_ComfyUI_windows_portable_nvidia_cu${{ inputs.cu }}_or_cpu.7z cd ComfyUI_windows_portable From 154f2911aaf0333db576a237c6098ed0a8160a7d Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Wed, 23 Apr 2025 03:33:09 -0700 Subject: [PATCH 18/23] Lower size of release package more. (#7754) --- .github/workflows/stable-release.yml | 6 +++--- .github/workflows/windows_release_nightly_pytorch.yml | 2 +- .github/workflows/windows_release_package.yml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/stable-release.yml b/.github/workflows/stable-release.yml index f7d30a9a4..40df7ab88 100644 --- a/.github/workflows/stable-release.yml +++ b/.github/workflows/stable-release.yml @@ -36,7 +36,7 @@ jobs: - uses: actions/checkout@v4 with: ref: ${{ inputs.git_tag }} - fetch-depth: 0 + fetch-depth: 150 persist-credentials: false - uses: actions/cache/restore@v4 id: cache @@ -70,7 +70,7 @@ jobs: cd .. git clone --depth 1 https://github.com/comfyanonymous/taesd - cp taesd/*.pth ./ComfyUI_copy/models/vae_approx/ + cp taesd/*.safetensors ./ComfyUI_copy/models/vae_approx/ mkdir ComfyUI_windows_portable mv python_embeded ComfyUI_windows_portable @@ -85,7 +85,7 @@ jobs: cd .. - "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=8 -mfb=64 -md=32m -ms=on -mf=BCJ2 ComfyUI_windows_portable.7z ComfyUI_windows_portable + "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=9 -mfb=128 -md=512m -ms=on -mf=BCJ2 ComfyUI_windows_portable.7z ComfyUI_windows_portable mv ComfyUI_windows_portable.7z ComfyUI/ComfyUI_windows_portable_nvidia.7z cd ComfyUI_windows_portable diff --git a/.github/workflows/windows_release_nightly_pytorch.yml b/.github/workflows/windows_release_nightly_pytorch.yml index 24599249a..eb5ed9c91 100644 --- a/.github/workflows/windows_release_nightly_pytorch.yml +++ b/.github/workflows/windows_release_nightly_pytorch.yml @@ -56,7 +56,7 @@ jobs: cd .. git clone --depth 1 https://github.com/comfyanonymous/taesd - cp taesd/*.pth ./ComfyUI_copy/models/vae_approx/ + cp taesd/*.safetensors ./ComfyUI_copy/models/vae_approx/ mkdir ComfyUI_windows_portable_nightly_pytorch mv python_embeded ComfyUI_windows_portable_nightly_pytorch diff --git a/.github/workflows/windows_release_package.yml b/.github/workflows/windows_release_package.yml index 8300c2faf..dc79b1f4a 100644 --- a/.github/workflows/windows_release_package.yml +++ b/.github/workflows/windows_release_package.yml @@ -67,7 +67,7 @@ jobs: cd .. git clone --depth 1 https://github.com/comfyanonymous/taesd - cp taesd/*.pth ./ComfyUI_copy/models/vae_approx/ + cp taesd/*.safetensors ./ComfyUI_copy/models/vae_approx/ mkdir ComfyUI_windows_portable mv python_embeded ComfyUI_windows_portable @@ -82,7 +82,7 @@ jobs: cd .. - "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=9 -mfb=128 -md=256m -ms=on -mf=BCJ2 ComfyUI_windows_portable.7z ComfyUI_windows_portable + "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=9 -mfb=128 -md=512m -ms=on -mf=BCJ2 ComfyUI_windows_portable.7z ComfyUI_windows_portable mv ComfyUI_windows_portable.7z ComfyUI/new_ComfyUI_windows_portable_nvidia_cu${{ inputs.cu }}_or_cpu.7z cd ComfyUI_windows_portable From dea1c7474a8e663732a755204970e09006df68c7 Mon Sep 17 00:00:00 2001 From: Robin Huang Date: Wed, 23 Apr 2025 12:38:34 -0700 Subject: [PATCH 19/23] Add support for API Nodes in ComfyUI. (#7726) * Add Ideogram generate node. * Add staging api. * COMFY_API_NODE_NAME node property * switch to boolean flag and use original node name for id * add optional to type * Add API_NODE and common error for missing auth token (#5) * Add Minimax Video Generation + Async Task queue polling example (#6) * [Minimax] Show video preview and embed workflow in ouput (#7) * [API Nodes] Send empty request body instead of empty dictionary. (#8) * Fixed: removed function from rebase. * Add pydantic. * Remove uv.lock * Remove polling operations. * Update stubs workflow. * Remove polling comments. * Update stubs. * Use pydantic v2. * Use pydantic v2. * Add basic OpenAITextToImage node * Add. * convert image to tensor. * Improve types. * Ruff. * Push tests. * Handle multi-form data. - Don't set content-type for multi-part/form - Use data field instead of JSON * Change to api.comfy.org * Handle error code 409. * separate out nodes per openai model * Update error message. * fix wrong output type * re-categorize nodes, remove ideogram (for now) * oops, fix mappings * fix ruff * Update frontend to 1.17.9 * embargo lift rename nodes * remove unused autogenerated model code * fix API type error and add b64 support for 4o * fix ruff * oops forgot mask scaling code * Remove unused types. --------- Co-authored-by: bymyself Co-authored-by: Yoland Y <4950057+yoland68@users.noreply.github.com> Co-authored-by: thot-experiment --- .github/workflows/update-api-stubs.yml | 47 +++ comfy_api_nodes/apis/PixverseController.py | 17 + comfy_api_nodes/apis/PixverseDto.py | 57 +++ comfy_api_nodes/apis/__init__.py | 422 ++++++++++++++++++++ comfy_api_nodes/apis/client.py | 2 +- comfy_api_nodes/nodes_api.py | 425 +++++++++++++++++++++ nodes.py | 9 + requirements.txt | 2 +- 8 files changed, 979 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/update-api-stubs.yml create mode 100644 comfy_api_nodes/apis/PixverseController.py create mode 100644 comfy_api_nodes/apis/PixverseDto.py create mode 100644 comfy_api_nodes/apis/__init__.py create mode 100644 comfy_api_nodes/nodes_api.py diff --git a/.github/workflows/update-api-stubs.yml b/.github/workflows/update-api-stubs.yml new file mode 100644 index 000000000..2ae99b673 --- /dev/null +++ b/.github/workflows/update-api-stubs.yml @@ -0,0 +1,47 @@ +name: Generate Pydantic Stubs from api.comfy.org + +on: + schedule: + - cron: '0 0 * * 1' + workflow_dispatch: + +jobs: + generate-models: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install 'datamodel-code-generator[http]' + + - name: Generate API models + run: | + datamodel-codegen --use-subclass-enum --url https://api.comfy.org/openapi --output comfy_api_nodes/apis --output-model-type pydantic_v2.BaseModel + + - name: Check for changes + id: git-check + run: | + git diff --exit-code comfy_api_nodes/apis || echo "changes=true" >> $GITHUB_OUTPUT + + - name: Create Pull Request + if: steps.git-check.outputs.changes == 'true' + uses: peter-evans/create-pull-request@v5 + with: + commit-message: 'chore: update API models from OpenAPI spec' + title: 'Update API models from api.comfy.org' + body: | + This PR updates the API models based on the latest api.comfy.org OpenAPI specification. + + Generated automatically by the a Github workflow. + branch: update-api-stubs + delete-branch: true + base: main diff --git a/comfy_api_nodes/apis/PixverseController.py b/comfy_api_nodes/apis/PixverseController.py new file mode 100644 index 000000000..29a3ab33b --- /dev/null +++ b/comfy_api_nodes/apis/PixverseController.py @@ -0,0 +1,17 @@ +# generated by datamodel-codegen: +# filename: https://api.comfy.org/openapi +# timestamp: 2025-04-23T15:56:33+00:00 + +from __future__ import annotations + +from typing import Optional + +from pydantic import BaseModel + +from . import PixverseDto + + +class ResponseData(BaseModel): + ErrCode: Optional[int] = None + ErrMsg: Optional[str] = None + Resp: Optional[PixverseDto.V2OpenAPII2VResp] = None diff --git a/comfy_api_nodes/apis/PixverseDto.py b/comfy_api_nodes/apis/PixverseDto.py new file mode 100644 index 000000000..399512214 --- /dev/null +++ b/comfy_api_nodes/apis/PixverseDto.py @@ -0,0 +1,57 @@ +# generated by datamodel-codegen: +# filename: https://api.comfy.org/openapi +# timestamp: 2025-04-23T15:56:33+00:00 + +from __future__ import annotations + +from typing import Optional + +from pydantic import BaseModel, Field, constr + + +class V2OpenAPII2VResp(BaseModel): + video_id: Optional[int] = Field(None, description='Video_id') + + +class V2OpenAPIT2VReq(BaseModel): + aspect_ratio: str = Field( + ..., description='Aspect ratio (16:9, 4:3, 1:1, 3:4, 9:16)', examples=['16:9'] + ) + duration: int = Field( + ..., + description='Video duration (5, 8 seconds, --model=v3.5 only allows 5,8; --quality=1080p does not support 8s)', + examples=[5], + ) + model: str = Field( + ..., description='Model version (only supports v3.5)', examples=['v3.5'] + ) + motion_mode: Optional[str] = Field( + 'normal', + description='Motion mode (normal, fast, --fast only available when duration=5; --quality=1080p does not support fast)', + examples=['normal'], + ) + negative_prompt: Optional[constr(max_length=2048)] = Field( + None, description='Negative prompt\n' + ) + prompt: constr(max_length=2048) = Field(..., description='Prompt') + quality: str = Field( + ..., + description='Video quality ("360p"(Turbo model), "540p", "720p", "1080p")', + examples=['540p'], + ) + seed: Optional[int] = Field(None, description='Random seed, range: 0 - 2147483647') + style: Optional[str] = Field( + None, + description='Style (effective when model=v3.5, "anime", "3d_animation", "clay", "comic", "cyberpunk") Do not include style parameter unless needed', + examples=['anime'], + ) + template_id: Optional[int] = Field( + None, + description='Template ID (template_id must be activated before use)', + examples=[302325299692608], + ) + water_mark: Optional[bool] = Field( + False, + description='Watermark (true: add watermark, false: no watermark)', + examples=[False], + ) diff --git a/comfy_api_nodes/apis/__init__.py b/comfy_api_nodes/apis/__init__.py new file mode 100644 index 000000000..e7ea9b332 --- /dev/null +++ b/comfy_api_nodes/apis/__init__.py @@ -0,0 +1,422 @@ +# generated by datamodel-codegen: +# filename: https://api.comfy.org/openapi +# timestamp: 2025-04-23T15:56:33+00:00 + +from __future__ import annotations + +from datetime import datetime +from enum import Enum +from typing import Any, Dict, List, Optional + +from pydantic import AnyUrl, BaseModel, Field, confloat, conint + +class Customer(BaseModel): + createdAt: Optional[datetime] = Field( + None, description='The date and time the user was created' + ) + email: Optional[str] = Field(None, description='The email address for this user') + id: str = Field(..., description='The firebase UID of the user') + name: Optional[str] = Field(None, description='The name for this user') + updatedAt: Optional[datetime] = Field( + None, description='The date and time the user was last updated' + ) + + +class Error(BaseModel): + details: Optional[List[str]] = Field( + None, + description='Optional detailed information about the error or hints for resolving it.', + ) + message: Optional[str] = Field( + None, description='A clear and concise description of the error.' + ) + + +class ErrorResponse(BaseModel): + error: str + message: str + +class ImageRequest(BaseModel): + aspect_ratio: Optional[str] = Field( + None, + description="Optional. The aspect ratio (e.g., 'ASPECT_16_9', 'ASPECT_1_1'). Cannot be used with resolution. Defaults to 'ASPECT_1_1' if unspecified.", + ) + color_palette: Optional[Dict[str, Any]] = Field( + None, description='Optional. Color palette object. Only for V_2, V_2_TURBO.' + ) + magic_prompt_option: Optional[str] = Field( + None, description="Optional. MagicPrompt usage ('AUTO', 'ON', 'OFF')." + ) + model: str = Field(..., description="The model used (e.g., 'V_2', 'V_2A_TURBO')") + negative_prompt: Optional[str] = Field( + None, + description='Optional. Description of what to exclude. Only for V_1, V_1_TURBO, V_2, V_2_TURBO.', + ) + num_images: Optional[conint(ge=1, le=8)] = Field( + 1, description='Optional. Number of images to generate (1-8). Defaults to 1.' + ) + prompt: str = Field( + ..., description='Required. The prompt to use to generate the image.' + ) + resolution: Optional[str] = Field( + None, + description="Optional. Resolution (e.g., 'RESOLUTION_1024_1024'). Only for model V_2. Cannot be used with aspect_ratio.", + ) + seed: Optional[conint(ge=0, le=2147483647)] = Field( + None, description='Optional. A number between 0 and 2147483647.' + ) + style_type: Optional[str] = Field( + None, + description="Optional. Style type ('AUTO', 'GENERAL', 'REALISTIC', 'DESIGN', 'RENDER_3D', 'ANIME'). Only for models V_2 and above.", + ) + + +class Datum(BaseModel): + is_image_safe: Optional[bool] = Field( + None, description='Indicates whether the image is considered safe.' + ) + prompt: Optional[str] = Field( + None, description='The prompt used to generate this image.' + ) + resolution: Optional[str] = Field( + None, description="The resolution of the generated image (e.g., '1024x1024')." + ) + seed: Optional[int] = Field( + None, description='The seed value used for this generation.' + ) + style_type: Optional[str] = Field( + None, + description="The style type used for generation (e.g., 'REALISTIC', 'ANIME').", + ) + url: Optional[str] = Field(None, description='URL to the generated image.') + + +class Code(Enum): + int_1100 = 1100 + int_1101 = 1101 + int_1102 = 1102 + int_1103 = 1103 + + +class Code1(Enum): + int_1000 = 1000 + int_1001 = 1001 + int_1002 = 1002 + int_1003 = 1003 + int_1004 = 1004 + + +class AspectRatio(str, Enum): + field_16_9 = '16:9' + field_9_16 = '9:16' + field_1_1 = '1:1' + + +class Config(BaseModel): + horizontal: Optional[confloat(ge=-10.0, le=10.0)] = None + pan: Optional[confloat(ge=-10.0, le=10.0)] = None + roll: Optional[confloat(ge=-10.0, le=10.0)] = None + tilt: Optional[confloat(ge=-10.0, le=10.0)] = None + vertical: Optional[confloat(ge=-10.0, le=10.0)] = None + zoom: Optional[confloat(ge=-10.0, le=10.0)] = None + + +class Type(str, Enum): + simple = 'simple' + down_back = 'down_back' + forward_up = 'forward_up' + right_turn_forward = 'right_turn_forward' + left_turn_forward = 'left_turn_forward' + + +class CameraControl(BaseModel): + config: Optional[Config] = None + type: Optional[Type] = Field(None, description='Predefined camera movements type') + + +class Duration(str, Enum): + field_5 = 5 + field_10 = 10 + + +class Mode(str, Enum): + std = 'std' + pro = 'pro' + + +class TaskInfo(BaseModel): + external_task_id: Optional[str] = None + + +class Video(BaseModel): + duration: Optional[str] = Field(None, description='Total video duration') + id: Optional[str] = Field(None, description='Generated video ID') + url: Optional[AnyUrl] = Field(None, description='URL for generated video') + + +class TaskResult(BaseModel): + videos: Optional[List[Video]] = None + + +class TaskStatus(str, Enum): + submitted = 'submitted' + processing = 'processing' + succeed = 'succeed' + failed = 'failed' + + +class Data(BaseModel): + created_at: Optional[int] = Field(None, description='Task creation time') + task_id: Optional[str] = Field(None, description='Task ID') + task_info: Optional[TaskInfo] = None + task_result: Optional[TaskResult] = None + task_status: Optional[TaskStatus] = None + updated_at: Optional[int] = Field(None, description='Task update time') + + +class AspectRatio1(str, Enum): + field_16_9 = '16:9' + field_9_16 = '9:16' + field_1_1 = '1:1' + field_4_3 = '4:3' + field_3_4 = '3:4' + field_3_2 = '3:2' + field_2_3 = '2:3' + field_21_9 = '21:9' + + +class ImageReference(str, Enum): + subject = 'subject' + face = 'face' + + +class Image(BaseModel): + index: Optional[int] = Field(None, description='Image Number (0-9)') + url: Optional[AnyUrl] = Field(None, description='URL for generated image') + + +class TaskResult1(BaseModel): + images: Optional[List[Image]] = None + + +class Data1(BaseModel): + created_at: Optional[int] = Field(None, description='Task creation time') + task_id: Optional[str] = Field(None, description='Task ID') + task_result: Optional[TaskResult1] = None + task_status: Optional[TaskStatus] = None + task_status_msg: Optional[str] = Field(None, description='Task status information') + updated_at: Optional[int] = Field(None, description='Task update time') + + +class AspectRatio2(str, Enum): + field_16_9 = '16:9' + field_9_16 = '9:16' + field_1_1 = '1:1' + + +class CameraControl1(BaseModel): + config: Optional[Config] = None + type: Optional[Type] = Field(None, description='Predefined camera movements type') + + +class ModelName2(str, Enum): + kling_v1 = 'kling-v1' + kling_v1_6 = 'kling-v1-6' + + +class TaskResult2(BaseModel): + videos: Optional[List[Video]] = None + + +class Data2(BaseModel): + created_at: Optional[int] = Field(None, description='Task creation time') + task_id: Optional[str] = Field(None, description='Task ID') + task_info: Optional[TaskInfo] = None + task_result: Optional[TaskResult2] = None + task_status: Optional[TaskStatus] = None + updated_at: Optional[int] = Field(None, description='Task update time') + + +class Code2(Enum): + int_1200 = 1200 + int_1201 = 1201 + int_1202 = 1202 + int_1203 = 1203 + + +class ResourcePackType(str, Enum): + decreasing_total = 'decreasing_total' + constant_period = 'constant_period' + + +class Status(str, Enum): + toBeOnline = 'toBeOnline' + online = 'online' + expired = 'expired' + runOut = 'runOut' + + +class ResourcePackSubscribeInfo(BaseModel): + effective_time: Optional[int] = Field( + None, description='Effective time, Unix timestamp in ms' + ) + invalid_time: Optional[int] = Field( + None, description='Expiration time, Unix timestamp in ms' + ) + purchase_time: Optional[int] = Field( + None, description='Purchase time, Unix timestamp in ms' + ) + remaining_quantity: Optional[float] = Field( + None, description='Remaining quantity (updated with a 12-hour delay)' + ) + resource_pack_id: Optional[str] = Field(None, description='Resource package ID') + resource_pack_name: Optional[str] = Field(None, description='Resource package name') + resource_pack_type: Optional[ResourcePackType] = Field( + None, + description='Resource package type (decreasing_total=decreasing total, constant_period=constant periodicity)', + ) + status: Optional[Status] = Field(None, description='Resource Package Status') + total_quantity: Optional[float] = Field(None, description='Total quantity') + +class Background(str, Enum): + transparent = 'transparent' + opaque = 'opaque' + + +class Moderation(str, Enum): + low = 'low' + auto = 'auto' + + +class OutputFormat(str, Enum): + png = 'png' + webp = 'webp' + jpeg = 'jpeg' + + +class Quality(str, Enum): + low = 'low' + medium = 'medium' + high = 'high' + + +class OpenAIImageEditRequest(BaseModel): + background: Optional[str] = Field( + None, description='Background transparency', examples=['opaque'] + ) + model: str = Field( + ..., description='The model to use for image editing', examples=['gpt-image-1'] + ) + moderation: Optional[Moderation] = Field( + None, description='Content moderation setting', examples=['auto'] + ) + n: Optional[int] = Field( + None, description='The number of images to generate', examples=[1] + ) + output_compression: Optional[int] = Field( + None, description='Compression level for JPEG or WebP (0-100)', examples=[100] + ) + output_format: Optional[OutputFormat] = Field( + None, description='Format of the output image', examples=['png'] + ) + prompt: str = Field( + ..., + description='A text description of the desired edit', + examples=['Give the rocketship rainbow coloring'], + ) + quality: Optional[str] = Field( + None, description='The quality of the edited image', examples=['low'] + ) + size: Optional[str] = Field( + None, description='Size of the output image', examples=['1024x1024'] + ) + user: Optional[str] = Field( + None, + description='A unique identifier for end-user monitoring', + examples=['user-1234'], + ) + + +class Quality1(str, Enum): + low = 'low' + medium = 'medium' + high = 'high' + standard = 'standard' + hd = 'hd' + + +class ResponseFormat(str, Enum): + url = 'url' + b64_json = 'b64_json' + + +class Style(str, Enum): + vivid = 'vivid' + natural = 'natural' + + +class OpenAIImageGenerationRequest(BaseModel): + background: Optional[Background] = Field( + None, description='Background transparency', examples=['opaque'] + ) + model: Optional[str] = Field( + None, description='The model to use for image generation', examples=['dall-e-3'] + ) + moderation: Optional[Moderation] = Field( + None, description='Content moderation setting', examples=['auto'] + ) + n: Optional[int] = Field( + None, + description='The number of images to generate (1-10). Only 1 supported for dall-e-3.', + examples=[1], + ) + output_compression: Optional[int] = Field( + None, description='Compression level for JPEG or WebP (0-100)', examples=[100] + ) + output_format: Optional[OutputFormat] = Field( + None, description='Format of the output image', examples=['png'] + ) + prompt: str = Field( + ..., + description='A text description of the desired image', + examples=['Draw a rocket in front of a blackhole in deep space'], + ) + quality: Optional[Quality1] = Field( + None, description='The quality of the generated image', examples=['high'] + ) + response_format: Optional[ResponseFormat] = Field( + None, description='Response format of image data', examples=['b64_json'] + ) + size: Optional[str] = Field( + None, + description='Size of the image (e.g., 1024x1024, 1536x1024, auto)', + examples=['1024x1536'], + ) + style: Optional[Style] = Field( + None, description='Style of the image (only for dall-e-3)', examples=['vivid'] + ) + user: Optional[str] = Field( + None, + description='A unique identifier for end-user monitoring', + examples=['user-1234'], + ) + + +class Datum1(BaseModel): + b64_json: Optional[str] = Field(None, description='Base64 encoded image data') + revised_prompt: Optional[str] = Field(None, description='Revised prompt') + url: Optional[str] = Field(None, description='URL of the image') + + +class OpenAIImageGenerationResponse(BaseModel): + data: Optional[List[Datum1]] = None +class User(BaseModel): + email: Optional[str] = Field(None, description='The email address for this user.') + id: Optional[str] = Field(None, description='The unique id for this user.') + isAdmin: Optional[bool] = Field( + None, description='Indicates if the user has admin privileges.' + ) + isApproved: Optional[bool] = Field( + None, description='Indicates if the user is approved.' + ) + name: Optional[str] = Field(None, description='The name for this user.') diff --git a/comfy_api_nodes/apis/client.py b/comfy_api_nodes/apis/client.py index cd81d5a1d..9bc3d76d5 100644 --- a/comfy_api_nodes/apis/client.py +++ b/comfy_api_nodes/apis/client.py @@ -226,7 +226,7 @@ class ApiClient: def check_auth_token(self, auth_token): """Verify that an auth token is present.""" if auth_token is None: - raise Exception("Please login first to use this node.") + raise Exception("Unauthorized: Please login first to use this node.") return auth_token diff --git a/comfy_api_nodes/nodes_api.py b/comfy_api_nodes/nodes_api.py new file mode 100644 index 000000000..92f4a0c87 --- /dev/null +++ b/comfy_api_nodes/nodes_api.py @@ -0,0 +1,425 @@ +import io +from inspect import cleandoc + +from comfy.utils import common_upscale +from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeDict +from comfy_api_nodes.apis import ( + OpenAIImageGenerationRequest, + OpenAIImageEditRequest, + OpenAIImageGenerationResponse +) +from comfy_api_nodes.apis.client import ApiEndpoint, HttpMethod, SynchronousOperation + +import numpy as np +from PIL import Image +import requests +import torch +import math +import base64 + +def downscale_input(image): + samples = image.movedim(-1,1) + #downscaling input images to roughly the same size as the outputs + total = int(1536 * 1024) + scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2])) + if scale_by >= 1: + return image + width = round(samples.shape[3] * scale_by) + height = round(samples.shape[2] * scale_by) + + s = common_upscale(samples, width, height, "lanczos", "disabled") + s = s.movedim(1,-1) + return s + +def validate_and_cast_response (response): + # validate raw JSON response + data = response.data + if not data or len(data) == 0: + raise Exception("No images returned from API endpoint") + + # Get base64 image data + image_url = data[0].url + b64_data = data[0].b64_json + if not image_url and not b64_data: + raise Exception("No image was generated in the response") + + if b64_data: + img_data = base64.b64decode(b64_data) + img = Image.open(io.BytesIO(img_data)) + + elif image_url: + img_response = requests.get(image_url) + if img_response.status_code != 200: + raise Exception("Failed to download the image") + img = Image.open(io.BytesIO(img_response.content)) + + img = img.convert("RGB") # Ensure RGB format + + # Convert to numpy array, normalize to float32 between 0 and 1 + img_array = np.array(img).astype(np.float32) / 255.0 + + # Convert to torch tensor and add batch dimension + return torch.from_numpy(img_array)[None,] + +class OpenAIDalle2(ComfyNodeABC): + """ + Generates images synchronously via OpenAI's DALL·E 2 endpoint. + + Uses the proxy at /proxy/openai/images/generations. Returned URLs are short‑lived, + so download or cache results if you need to keep them. + """ + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": { + "prompt": (IO.STRING, { + "multiline": True, + "default": "", + "tooltip": "Text prompt for DALL·E", + }), + }, + "optional": { + "seed": (IO.INT, { + "default": 0, + "min": 0, + "max": 2**31-1, + "step": 1, + "display": "number", + "tooltip": "not implemented yet in backend", + }), + "size": (IO.COMBO, { + "options": ["256x256", "512x512", "1024x1024"], + "default": "1024x1024", + "tooltip": "Image size", + }), + "n": (IO.INT, { + "default": 1, + "min": 1, + "max": 8, + "step": 1, + "display": "number", + "tooltip": "How many images to generate", + }), + "image": (IO.IMAGE, { + "default": None, + "tooltip": "Optional reference image for image editing.", + }), + "mask": (IO.MASK, { + "default": None, + "tooltip": "Optional mask for inpainting (white areas will be replaced)", + }), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG" + } + } + + RETURN_TYPES = (IO.IMAGE,) + FUNCTION = "api_call" + CATEGORY = "api node" + DESCRIPTION = cleandoc(__doc__ or "") + API_NODE = True + + def api_call(self, prompt, seed=0, image=None, mask=None, n=1, size="1024x1024", auth_token=None): + model = "dall-e-2" + path = "/proxy/openai/images/generations" + request_class = OpenAIImageGenerationRequest + img_binary = None + + if image is not None and mask is not None: + path = "/proxy/openai/images/edits" + request_class = OpenAIImageEditRequest + + input_tensor = image.squeeze().cpu() + height, width, channels = input_tensor.shape + rgba_tensor = torch.ones(height, width, 4, device="cpu") + rgba_tensor[:, :, :channels] = input_tensor + + if mask.shape[1:] != image.shape[1:-1]: + raise Exception("Mask and Image must be the same size") + rgba_tensor[:,:,3] = (1-mask.squeeze().cpu()) + + rgba_tensor = downscale_input(rgba_tensor.unsqueeze(0)).squeeze() + + image_np = (rgba_tensor.numpy() * 255).astype(np.uint8) + img = Image.fromarray(image_np) + img_byte_arr = io.BytesIO() + img.save(img_byte_arr, format='PNG') + img_byte_arr.seek(0) + img_binary = img_byte_arr#.getvalue() + img_binary.name = "image.png" + elif image is not None or mask is not None: + raise Exception("Dall-E 2 image editing requires an image AND a mask") + + # Build the operation + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=path, + method=HttpMethod.POST, + request_model=request_class, + response_model=OpenAIImageGenerationResponse + ), + request=request_class( + model=model, + prompt=prompt, + n=n, + size=size, + seed=seed, + ), + files={ + "image": img_binary, + } if img_binary else None, + auth_token=auth_token + ) + + response = operation.execute() + + img_tensor = validate_and_cast_response(response) + return (img_tensor,) + +class OpenAIDalle3(ComfyNodeABC): + """ + Generates images synchronously via OpenAI's DALL·E 3 endpoint. + + Uses the proxy at /proxy/openai/images/generations. Returned URLs are short‑lived, + so download or cache results if you need to keep them. + """ + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": { + "prompt": (IO.STRING, { + "multiline": True, + "default": "", + "tooltip": "Text prompt for DALL·E", + }), + }, + "optional": { + "seed": (IO.INT, { + "default": 0, + "min": 0, + "max": 2**31-1, + "step": 1, + "display": "number", + "tooltip": "not implemented yet in backend", + }), + "quality" : (IO.COMBO, { + "options": ["standard","hd"], + "default": "standard", + "tooltip": "Image quality", + }), + "style": (IO.COMBO, { + "options": ["natural","vivid"], + "default": "natural", + "tooltip": "Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images.", + }), + "size": (IO.COMBO, { + "options": ["1024x1024", "1024x1792", "1792x1024"], + "default": "1024x1024", + "tooltip": "Image size", + }), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG" + } + } + + RETURN_TYPES = (IO.IMAGE,) + FUNCTION = "api_call" + CATEGORY = "api node" + DESCRIPTION = cleandoc(__doc__ or "") + API_NODE = True + + def api_call(self, prompt, seed=0, style="natural", quality="standard", size="1024x1024", auth_token=None): + model = "dall-e-3" + + # build the operation + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/openai/images/generations", + method=HttpMethod.POST, + request_model=OpenAIImageGenerationRequest, + response_model=OpenAIImageGenerationResponse + ), + request=OpenAIImageGenerationRequest( + model=model, + prompt=prompt, + quality=quality, + size=size, + style=style, + seed=seed, + ), + auth_token=auth_token + ) + + response = operation.execute() + + img_tensor = validate_and_cast_response(response) + return (img_tensor,) + +class OpenAIGPTImage1(ComfyNodeABC): + """ + Generates images synchronously via OpenAI's GPT Image 1 endpoint. + + Uses the proxy at /proxy/openai/images/generations. Returned URLs are short‑lived, + so download or cache results if you need to keep them. + """ + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": { + "prompt": (IO.STRING, { + "multiline": True, + "default": "", + "tooltip": "Text prompt for GPT Image 1", + }), + }, + "optional": { + "seed": (IO.INT, { + "default": 0, + "min": 0, + "max": 2**31-1, + "step": 1, + "display": "number", + "tooltip": "not implemented yet in backend", + }), + "quality": (IO.COMBO, { + "options": ["low","medium","high"], + "default": "low", + "tooltip": "Image quality, affects cost and generation time.", + }), + "background": (IO.COMBO, { + "options": ["opaque","transparent"], + "default": "opaque", + "tooltip": "Return image with or without background", + }), + "size": (IO.COMBO, { + "options": ["auto", "1024x1024", "1024x1536", "1536x1024"], + "default": "auto", + "tooltip": "Image size", + }), + "n": (IO.INT, { + "default": 1, + "min": 1, + "max": 8, + "step": 1, + "display": "number", + "tooltip": "How many images to generate", + }), + "image": (IO.IMAGE, { + "default": None, + "tooltip": "Optional reference image for image editing.", + }), + "mask": (IO.MASK, { + "default": None, + "tooltip": "Optional mask for inpainting (white areas will be replaced)", + }), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG" + } + } + + RETURN_TYPES = (IO.IMAGE,) + FUNCTION = "api_call" + CATEGORY = "api node" + DESCRIPTION = cleandoc(__doc__ or "") + API_NODE = True + + def api_call(self, prompt, seed=0, quality="low", background="opaque", image=None, mask=None, n=1, size="1024x1024", auth_token=None): + model = "gpt-image-1" + path = "/proxy/openai/images/generations" + request_class = OpenAIImageGenerationRequest + img_binary = None + mask_binary = None + + + if image is not None: + path = "/proxy/openai/images/edits" + request_class = OpenAIImageEditRequest + + scaled_image = downscale_input(image).squeeze() + + image_np = (scaled_image.numpy() * 255).astype(np.uint8) + img = Image.fromarray(image_np) + img_byte_arr = io.BytesIO() + img.save(img_byte_arr, format='PNG') + img_byte_arr.seek(0) + img_binary = img_byte_arr#.getvalue() + img_binary.name = "image.png" + + if mask is not None: + if image is None: + raise Exception("Cannot use a mask without an input image") + if mask.shape[1:] != image.shape[1:-1]: + raise Exception("Mask and Image must be the same size") + batch, height, width = mask.shape + rgba_mask = torch.zeros(height, width, 4, device="cpu") + rgba_mask[:,:,3] = (1-mask.squeeze().cpu()) + + scaled_mask = downscale_input(rgba_mask.unsqueeze(0)).squeeze() + + mask_np = (scaled_mask.numpy() * 255).astype(np.uint8) + mask_img = Image.fromarray(mask_np) + mask_img_byte_arr = io.BytesIO() + mask_img.save(mask_img_byte_arr, format='PNG') + mask_img_byte_arr.seek(0) + mask_binary = mask_img_byte_arr#.getvalue() + mask_binary.name = "mask.png" + + files = {} + if img_binary: + files["image"] = img_binary + if mask_binary: + files["mask"] = mask_binary + + # Build the operation + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=path, + method=HttpMethod.POST, + request_model=request_class, + response_model=OpenAIImageGenerationResponse + ), + request=request_class( + model=model, + prompt=prompt, + quality=quality, + background=background, + n=n, + seed=seed, + size=size, + ), + files=files if files else None, + auth_token=auth_token + ) + + response = operation.execute() + + img_tensor = validate_and_cast_response(response) + return (img_tensor,) + + +# A dictionary that contains all nodes you want to export with their names +# NOTE: names should be globally unique +NODE_CLASS_MAPPINGS = { + "OpenAIDalle2": OpenAIDalle2, + "OpenAIDalle3": OpenAIDalle3, + "OpenAIGPTImage1": OpenAIGPTImage1, +} + +# A dictionary that contains the friendly/humanly readable titles for the nodes +NODE_DISPLAY_NAME_MAPPINGS = { + "OpenAIDalle2": "OpenAI DALL·E 2", + "OpenAIDalle3": "OpenAI DALL·E 3", + "OpenAIGPTImage1": "OpenAI GPT Image 1", +} diff --git a/nodes.py b/nodes.py index b1ab62aad..73a62d930 100644 --- a/nodes.py +++ b/nodes.py @@ -2260,11 +2260,20 @@ def init_builtin_extra_nodes(): "nodes_fresca.py", ] + api_nodes_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_api_nodes") + api_nodes_files = [ + "nodes_api.py", + ] + import_failed = [] for node_file in extras_files: if not load_custom_node(os.path.join(extras_dir, node_file), module_parent="comfy_extras"): import_failed.append(node_file) + for node_file in api_nodes_files: + if not load_custom_node(os.path.join(api_nodes_dir, node_file), module_parent="comfy_api_nodes"): + import_failed.append(node_file) + return import_failed diff --git a/requirements.txt b/requirements.txt index f8ad908ca..2ac241261 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -comfyui-frontend-package==1.16.9 +comfyui-frontend-package==1.17.9 comfyui-workflow-templates==0.1.3 torch torchsde From e8ddc2be95e3c70363414dfca94f57d6dad25c8f Mon Sep 17 00:00:00 2001 From: filtered <176114999+webfiltered@users.noreply.github.com> Date: Thu, 24 Apr 2025 06:02:41 +1000 Subject: [PATCH 20/23] [BugFix] Update frontend to 1.17.10 (#7762) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2ac241261..291f81838 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -comfyui-frontend-package==1.17.9 +comfyui-frontend-package==1.17.10 comfyui-workflow-templates==0.1.3 torch torchsde From 2c1d686ec61f26f3a64bb4c1afdcdb78bb943a4f Mon Sep 17 00:00:00 2001 From: thot experiment <94414189+thot-experiment@users.noreply.github.com> Date: Wed, 23 Apr 2025 13:10:10 -0700 Subject: [PATCH 21/23] implement multi image prompting for gpt-image-1 and fix transparency in outputs (#7763) * implement multi image prompting for GPTI Image 1 * fix transparency not working * fix ruff --- comfy_api_nodes/nodes_api.py | 43 ++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/comfy_api_nodes/nodes_api.py b/comfy_api_nodes/nodes_api.py index 92f4a0c87..7bca0b503 100644 --- a/comfy_api_nodes/nodes_api.py +++ b/comfy_api_nodes/nodes_api.py @@ -53,7 +53,7 @@ def validate_and_cast_response (response): raise Exception("Failed to download the image") img = Image.open(io.BytesIO(img_response.content)) - img = img.convert("RGB") # Ensure RGB format + img = img.convert("RGBA") # Convert to numpy array, normalize to float32 between 0 and 1 img_array = np.array(img).astype(np.float32) / 255.0 @@ -339,25 +339,38 @@ class OpenAIGPTImage1(ComfyNodeABC): model = "gpt-image-1" path = "/proxy/openai/images/generations" request_class = OpenAIImageGenerationRequest - img_binary = None + img_binaries = [] mask_binary = None - + files = [] if image is not None: path = "/proxy/openai/images/edits" request_class = OpenAIImageEditRequest - scaled_image = downscale_input(image).squeeze() + batch_size = image.shape[0] - image_np = (scaled_image.numpy() * 255).astype(np.uint8) - img = Image.fromarray(image_np) - img_byte_arr = io.BytesIO() - img.save(img_byte_arr, format='PNG') - img_byte_arr.seek(0) - img_binary = img_byte_arr#.getvalue() - img_binary.name = "image.png" + + for i in range(batch_size): + single_image = image[i:i+1] + scaled_image = downscale_input(single_image).squeeze() + + image_np = (scaled_image.numpy() * 255).astype(np.uint8) + img = Image.fromarray(image_np) + img_byte_arr = io.BytesIO() + img.save(img_byte_arr, format='PNG') + img_byte_arr.seek(0) + img_binary = img_byte_arr + img_binary.name = f"image_{i}.png" + + img_binaries.append(img_binary) + if batch_size == 1: + files.append(("image", img_binary)) + else: + files.append(("image[]", img_binary)) if mask is not None: + if image.shape[0] != 1: + raise Exception("Cannot use a mask with multiple image") if image is None: raise Exception("Cannot use a mask without an input image") if mask.shape[1:] != image.shape[1:-1]: @@ -373,14 +386,10 @@ class OpenAIGPTImage1(ComfyNodeABC): mask_img_byte_arr = io.BytesIO() mask_img.save(mask_img_byte_arr, format='PNG') mask_img_byte_arr.seek(0) - mask_binary = mask_img_byte_arr#.getvalue() + mask_binary = mask_img_byte_arr mask_binary.name = "mask.png" + files.append(("mask", mask_binary)) - files = {} - if img_binary: - files["image"] = img_binary - if mask_binary: - files["mask"] = mask_binary # Build the operation operation = SynchronousOperation( From 188b383c35f0a790e407cb337dd554fccb188f6f Mon Sep 17 00:00:00 2001 From: thot experiment <94414189+thot-experiment@users.noreply.github.com> Date: Wed, 23 Apr 2025 14:53:34 -0700 Subject: [PATCH 22/23] change timeout to 7 days (#7765) --- comfy_api_nodes/apis/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy_api_nodes/apis/client.py b/comfy_api_nodes/apis/client.py index 9bc3d76d5..384e559dc 100644 --- a/comfy_api_nodes/apis/client.py +++ b/comfy_api_nodes/apis/client.py @@ -269,7 +269,7 @@ class SynchronousOperation(Generic[T, R]): files: Optional[Dict[str, Any]] = None, api_base: str = "https://api.comfy.org", auth_token: Optional[str] = None, - timeout: float = 60.0, + timeout: float = 604800.0, verify_ssl: bool = True, ): self.endpoint = endpoint From 11b68ebd22c2137661ec6a70f39943a337edf897 Mon Sep 17 00:00:00 2001 From: filtered <176114999+webfiltered@users.noreply.github.com> Date: Thu, 24 Apr 2025 08:16:12 +1000 Subject: [PATCH 23/23] [BugFix] Update frontend to 1.17.11 (#7766) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 291f81838..10cc177af 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -comfyui-frontend-package==1.17.10 +comfyui-frontend-package==1.17.11 comfyui-workflow-templates==0.1.3 torch torchsde