Merge 29938f760f12443b70820d4c5b0c0461463ff00d into 56fa7dbe380cb5591c5542f8aa51ce2fc26beedf

Properly load the newbie diffusion model. (#11172 )
There is still one of the text encoders missing and I didn't actually test it.
2025-12-08 21:44:33 +08:00 · 2025-12-08 15:08:56 +09:00 · 2025-12-07 07:44:55 -05:00 · 2025-12-02 17:26:04 -08:00 · 2025-12-02 17:16:33 -08:00
10 changed files with 66 additions and 9 deletions
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@ -5,6 +5,7 @@ on:
  push:
    branches:
      - master
+      - release/**
    paths-ignore:
      - 'app/**'
      - 'input/**'
--- a/.github/workflows/test-execution.yml
+++ b/.github/workflows/test-execution.yml
@ -2,9 +2,9 @@ name: Execution Tests

 on:
  push:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]
  pull_request:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]

 jobs:
  test:
--- a/.github/workflows/test-launch.yml
+++ b/.github/workflows/test-launch.yml
@ -2,9 +2,9 @@ name: Test server launches without errors

 on:
  push:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]
  pull_request:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]

 jobs:
  test:
--- a/.github/workflows/test-unit.yml
+++ b/.github/workflows/test-unit.yml
@ -2,9 +2,9 @@ name: Unit Tests

 on:
  push:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]
  pull_request:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]

 jobs:
  test:
--- a/.github/workflows/update-version.yml
+++ b/.github/workflows/update-version.yml
@ -6,6 +6,7 @@ on:
      - "pyproject.toml"
    branches:
      - master
+      - release/**

 jobs:
  update-version:
--- a/.github/workflows/windows_release_nightly_pytorch.yml
+++ b/.github/workflows/windows_release_nightly_pytorch.yml
@ -9,6 +9,12 @@ on:
        type: string
        default: "129"

+      release_tag:
+        description: 'Git tag to attach nightly package (e.g., v0.37.0); leave empty to upload to latest'
+        required: false
+        type: string
+        default: ""
+
      python_minor:
        description: 'python minor version'
        required: true
@ -34,6 +40,7 @@ jobs:
    steps:
        - uses: actions/checkout@v4
          with:
+            ref: ${{ inputs.release_tag || 'master' }}
            fetch-depth: 30
            persist-credentials: false
        - uses: actions/setup-python@v5
@ -89,5 +96,5 @@ jobs:
          with:
                repo_token: ${{ secrets.GITHUB_TOKEN }}
                file: ComfyUI_windows_portable_nvidia_or_cpu_nightly_pytorch.7z
-                tag: "latest"
+                tag: ${{ inputs.release_tag || 'latest' }}
                overwrite: true
--- a/.github/workflows/windows_release_package.yml
+++ b/.github/workflows/windows_release_package.yml
@ -9,6 +9,12 @@ on:
        type: string
        default: "129"

+      release_tag:
+        description: 'Git tag to package and attach (e.g., v0.37.0); leave empty to build from master and upload to latest'
+        required: false
+        type: string
+        default: ""
+
      python_minor:
        description: 'python minor version'
        required: true
@ -50,6 +56,7 @@ jobs:

        - uses: actions/checkout@v4
          with:
+            ref: ${{ inputs.release_tag || 'master' }}
            fetch-depth: 150
            persist-credentials: false
        - shell: bash
@ -101,6 +108,5 @@ jobs:
          with:
                repo_token: ${{ secrets.GITHUB_TOKEN }}
                file: new_ComfyUI_windows_portable_nvidia_cu${{ inputs.cu }}_or_cpu.7z
-                tag: "latest"
+                tag: ${{ inputs.release_tag || 'latest' }}
                overwrite: true
-
--- a/comfy/ldm/lumina/model.py
+++ b/comfy/ldm/lumina/model.py
@ -377,6 +377,7 @@ class NextDiT(nn.Module):
        z_image_modulation=False,
        time_scale=1.0,
        pad_tokens_multiple=None,
+        clip_text_dim=None,
        image_model=None,
        device=None,
        dtype=None,
@ -447,6 +448,31 @@ class NextDiT(nn.Module):
            ),
        )

+        self.clip_text_pooled_proj = None
+
+        if clip_text_dim is not None:
+            self.clip_text_dim = clip_text_dim
+            self.clip_text_pooled_proj = nn.Sequential(
+                operation_settings.get("operations").RMSNorm(clip_text_dim, eps=norm_eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")),
+                operation_settings.get("operations").Linear(
+                    clip_text_dim,
+                    clip_text_dim,
+                    bias=True,
+                    device=operation_settings.get("device"),
+                    dtype=operation_settings.get("dtype"),
+                ),
+            )
+            self.time_text_embed = nn.Sequential(
+                nn.SiLU(),
+                operation_settings.get("operations").Linear(
+                    min(dim, 1024) + clip_text_dim,
+                    min(dim, 1024),
+                    bias=True,
+                    device=operation_settings.get("device"),
+                    dtype=operation_settings.get("dtype"),
+                ),
+            )
+
        self.layers = nn.ModuleList(
            [
                JointTransformerBlock(
@ -585,6 +611,15 @@ class NextDiT(nn.Module):

        cap_feats = self.cap_embedder(cap_feats)  # (N, L, D)  # todo check if able to batchify w.o. redundant compute

+        if self.clip_text_pooled_proj is not None:
+            pooled = kwargs.get("clip_text_pooled", None)
+            if pooled is not None:
+                pooled = self.clip_text_pooled_proj(pooled)
+            else:
+                pooled = torch.zeros((1, self.clip_text_dim), device=x.device, dtype=x.dtype)
+
+            adaln_input = self.time_text_embed(torch.cat((t, pooled), dim=-1))
+
        patches = transformer_options.get("patches", {})
        x_is_tensor = isinstance(x, torch.Tensor)
        img, mask, img_size, cap_size, freqs_cis = self.patchify_and_embed(x, cap_feats, cap_mask, t, num_tokens, transformer_options=transformer_options)
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@ -1110,6 +1110,10 @@ class Lumina2(BaseModel):
            if 'num_tokens' not in out:
                out['num_tokens'] = comfy.conds.CONDConstant(cross_attn.shape[1])

+        clip_text_pooled = kwargs["pooled_output"]  # Newbie
+        if clip_text_pooled is not None:
+            out['clip_text_pooled'] = comfy.conds.CONDRegular(clip_text_pooled)
+
        return out

 class WAN21(BaseModel):
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@ -423,6 +423,9 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
            dit_config["axes_lens"] = [300, 512, 512]
            dit_config["rope_theta"] = 10000.0
            dit_config["ffn_dim_multiplier"] = 4.0
+            ctd_weight = state_dict.get('{}clip_text_pooled_proj.0.weight'.format(key_prefix), None)
+            if ctd_weight is not None:
+                dit_config["clip_text_dim"] = ctd_weight.shape[0]
        elif dit_config["dim"] == 3840:  # Z image
            dit_config["n_heads"] = 30
            dit_config["n_kv_heads"] = 30
Author	SHA1	Message	Date
Benjamin Lu	2a60aeb0a1	Merge 29938f760f12443b70820d4c5b0c0461463ff00d into 56fa7dbe380cb5591c5542f8aa51ce2fc26beedf	2025-12-08 15:08:56 +09:00
comfyanonymous	56fa7dbe38	Properly load the newbie diffusion model. (#11172 ) There is still one of the text encoders missing and I didn't actually test it.	2025-12-07 07:44:55 -05:00
Benjamin Lu	29938f760f	Adjust branch order in workflow triggers	2025-12-02 17:26:04 -08:00
Benjamin Lu	b54b39fd3f	Update release workflows for branch process	2025-12-02 17:16:33 -08:00