Compare commits

...

4 Commits

Author SHA1 Message Date
Benjamin Lu
2a60aeb0a1
Merge 29938f760f12443b70820d4c5b0c0461463ff00d into 56fa7dbe380cb5591c5542f8aa51ce2fc26beedf 2025-12-08 15:08:56 +09:00
comfyanonymous
56fa7dbe38
Properly load the newbie diffusion model. (#11172)
There is still one of the text encoders missing and I didn't actually test it.
2025-12-07 07:44:55 -05:00
Benjamin Lu
29938f760f Adjust branch order in workflow triggers 2025-12-02 17:26:04 -08:00
Benjamin Lu
b54b39fd3f Update release workflows for branch process 2025-12-02 17:16:33 -08:00
10 changed files with 66 additions and 9 deletions

View File

@ -5,6 +5,7 @@ on:
push:
branches:
- master
- release/**
paths-ignore:
- 'app/**'
- 'input/**'

View File

@ -2,9 +2,9 @@ name: Execution Tests
on:
push:
branches: [ main, master ]
branches: [ main, master, release/** ]
pull_request:
branches: [ main, master ]
branches: [ main, master, release/** ]
jobs:
test:

View File

@ -2,9 +2,9 @@ name: Test server launches without errors
on:
push:
branches: [ main, master ]
branches: [ main, master, release/** ]
pull_request:
branches: [ main, master ]
branches: [ main, master, release/** ]
jobs:
test:

View File

@ -2,9 +2,9 @@ name: Unit Tests
on:
push:
branches: [ main, master ]
branches: [ main, master, release/** ]
pull_request:
branches: [ main, master ]
branches: [ main, master, release/** ]
jobs:
test:

View File

@ -6,6 +6,7 @@ on:
- "pyproject.toml"
branches:
- master
- release/**
jobs:
update-version:

View File

@ -9,6 +9,12 @@ on:
type: string
default: "129"
release_tag:
description: 'Git tag to attach nightly package (e.g., v0.37.0); leave empty to upload to latest'
required: false
type: string
default: ""
python_minor:
description: 'python minor version'
required: true
@ -34,6 +40,7 @@ jobs:
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.release_tag || 'master' }}
fetch-depth: 30
persist-credentials: false
- uses: actions/setup-python@v5
@ -89,5 +96,5 @@ jobs:
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: ComfyUI_windows_portable_nvidia_or_cpu_nightly_pytorch.7z
tag: "latest"
tag: ${{ inputs.release_tag || 'latest' }}
overwrite: true

View File

@ -9,6 +9,12 @@ on:
type: string
default: "129"
release_tag:
description: 'Git tag to package and attach (e.g., v0.37.0); leave empty to build from master and upload to latest'
required: false
type: string
default: ""
python_minor:
description: 'python minor version'
required: true
@ -50,6 +56,7 @@ jobs:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.release_tag || 'master' }}
fetch-depth: 150
persist-credentials: false
- shell: bash
@ -101,6 +108,5 @@ jobs:
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: new_ComfyUI_windows_portable_nvidia_cu${{ inputs.cu }}_or_cpu.7z
tag: "latest"
tag: ${{ inputs.release_tag || 'latest' }}
overwrite: true

View File

@ -377,6 +377,7 @@ class NextDiT(nn.Module):
z_image_modulation=False,
time_scale=1.0,
pad_tokens_multiple=None,
clip_text_dim=None,
image_model=None,
device=None,
dtype=None,
@ -447,6 +448,31 @@ class NextDiT(nn.Module):
),
)
self.clip_text_pooled_proj = None
if clip_text_dim is not None:
self.clip_text_dim = clip_text_dim
self.clip_text_pooled_proj = nn.Sequential(
operation_settings.get("operations").RMSNorm(clip_text_dim, eps=norm_eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")),
operation_settings.get("operations").Linear(
clip_text_dim,
clip_text_dim,
bias=True,
device=operation_settings.get("device"),
dtype=operation_settings.get("dtype"),
),
)
self.time_text_embed = nn.Sequential(
nn.SiLU(),
operation_settings.get("operations").Linear(
min(dim, 1024) + clip_text_dim,
min(dim, 1024),
bias=True,
device=operation_settings.get("device"),
dtype=operation_settings.get("dtype"),
),
)
self.layers = nn.ModuleList(
[
JointTransformerBlock(
@ -585,6 +611,15 @@ class NextDiT(nn.Module):
cap_feats = self.cap_embedder(cap_feats) # (N, L, D) # todo check if able to batchify w.o. redundant compute
if self.clip_text_pooled_proj is not None:
pooled = kwargs.get("clip_text_pooled", None)
if pooled is not None:
pooled = self.clip_text_pooled_proj(pooled)
else:
pooled = torch.zeros((1, self.clip_text_dim), device=x.device, dtype=x.dtype)
adaln_input = self.time_text_embed(torch.cat((t, pooled), dim=-1))
patches = transformer_options.get("patches", {})
x_is_tensor = isinstance(x, torch.Tensor)
img, mask, img_size, cap_size, freqs_cis = self.patchify_and_embed(x, cap_feats, cap_mask, t, num_tokens, transformer_options=transformer_options)

View File

@ -1110,6 +1110,10 @@ class Lumina2(BaseModel):
if 'num_tokens' not in out:
out['num_tokens'] = comfy.conds.CONDConstant(cross_attn.shape[1])
clip_text_pooled = kwargs["pooled_output"] # Newbie
if clip_text_pooled is not None:
out['clip_text_pooled'] = comfy.conds.CONDRegular(clip_text_pooled)
return out
class WAN21(BaseModel):

View File

@ -423,6 +423,9 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
dit_config["axes_lens"] = [300, 512, 512]
dit_config["rope_theta"] = 10000.0
dit_config["ffn_dim_multiplier"] = 4.0
ctd_weight = state_dict.get('{}clip_text_pooled_proj.0.weight'.format(key_prefix), None)
if ctd_weight is not None:
dit_config["clip_text_dim"] = ctd_weight.shape[0]
elif dit_config["dim"] == 3840: # Z image
dit_config["n_heads"] = 30
dit_config["n_kv_heads"] = 30