From 440268d3940eb14a01595439bbc05c4aacde9c72 Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Wed, 3 Dec 2025 23:52:31 +0200
Subject: [PATCH 1/5] convert nodes_load_3d.py to V3 schema (#10990)

---
 comfy_api/latest/_ui.py       |  13 +++-
 comfy_extras/nodes_load_3d.py | 127 ++++++++++++++++------------------
 2 files changed, 71 insertions(+), 69 deletions(-)

diff --git a/comfy_api/latest/_ui.py b/comfy_api/latest/_ui.py
index b0bbabe2a..6d1bea599 100644
--- a/comfy_api/latest/_ui.py
+++ b/comfy_api/latest/_ui.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import json
 import os
 import random
+import uuid
 from io import BytesIO
 from typing import Type
 
@@ -436,9 +437,19 @@ class PreviewUI3D(_UIOutput):
     def __init__(self, model_file, camera_info, **kwargs):
         self.model_file = model_file
         self.camera_info = camera_info
+        self.bg_image_path = None
+        bg_image = kwargs.get("bg_image", None)
+        if bg_image is not None:
+            img_array = (bg_image[0].cpu().numpy() * 255).astype(np.uint8)
+            img = PILImage.fromarray(img_array)
+            temp_dir = folder_paths.get_temp_directory()
+            filename = f"bg_{uuid.uuid4().hex}.png"
+            bg_image_path = os.path.join(temp_dir, filename)
+            img.save(bg_image_path, compress_level=1)
+            self.bg_image_path = f"temp/{filename}"
 
     def as_dict(self):
-        return {"result": [self.model_file, self.camera_info]}
+        return {"result": [self.model_file, self.camera_info, self.bg_image_path]}
 
 
 class PreviewText(_UIOutput):
diff --git a/comfy_extras/nodes_load_3d.py b/comfy_extras/nodes_load_3d.py
index 54c66ef68..545588ef8 100644
--- a/comfy_extras/nodes_load_3d.py
+++ b/comfy_extras/nodes_load_3d.py
@@ -2,22 +2,18 @@ import nodes
 import folder_paths
 import os
 
-from comfy.comfy_types import IO
-from comfy_api.input_impl import VideoFromFile
+from typing_extensions import override
+from comfy_api.latest import IO, ComfyExtension, InputImpl, UI
 
 from pathlib import Path
 
-from PIL import Image
-import numpy as np
-
-import uuid
 
 def normalize_path(path):
     return path.replace('\\', '/')
 
-class Load3D():
+class Load3D(IO.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
         input_dir = os.path.join(folder_paths.get_input_directory(), "3d")
 
         os.makedirs(input_dir, exist_ok=True)
@@ -30,23 +26,29 @@ class Load3D():
             for file_path in input_path.rglob("*")
             if file_path.suffix.lower() in {'.gltf', '.glb', '.obj', '.fbx', '.stl'}
         ]
+        return IO.Schema(
+            node_id="Load3D",
+            display_name="Load 3D & Animation",
+            category="3d",
+            is_experimental=True,
+            inputs=[
+                IO.Combo.Input("model_file", options=sorted(files), upload=IO.UploadType.model),
+                IO.Load3D.Input("image"),
+                IO.Int.Input("width", default=1024, min=1, max=4096, step=1),
+                IO.Int.Input("height", default=1024, min=1, max=4096, step=1),
+            ],
+            outputs=[
+                IO.Image.Output(display_name="image"),
+                IO.Mask.Output(display_name="mask"),
+                IO.String.Output(display_name="mesh_path"),
+                IO.Image.Output(display_name="normal"),
+                IO.Load3DCamera.Output(display_name="camera_info"),
+                IO.Video.Output(display_name="recording_video"),
+            ],
+        )
 
-        return {"required": {
-            "model_file": (sorted(files), {"file_upload": True}),
-            "image": ("LOAD_3D", {}),
-            "width": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}),
-            "height": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}),
-        }}
-
-    RETURN_TYPES = ("IMAGE", "MASK", "STRING", "IMAGE", "LOAD3D_CAMERA", IO.VIDEO)
-    RETURN_NAMES = ("image", "mask", "mesh_path", "normal", "camera_info", "recording_video")
-
-    FUNCTION = "process"
-    EXPERIMENTAL = True
-
-    CATEGORY = "3d"
-
-    def process(self, model_file, image, **kwargs):
+    @classmethod
+    def execute(cls, model_file, image, **kwargs) -> IO.NodeOutput:
         image_path = folder_paths.get_annotated_filepath(image['image'])
         mask_path = folder_paths.get_annotated_filepath(image['mask'])
         normal_path = folder_paths.get_annotated_filepath(image['normal'])
@@ -61,58 +63,47 @@ class Load3D():
         if image['recording'] != "":
             recording_video_path = folder_paths.get_annotated_filepath(image['recording'])
 
-            video = VideoFromFile(recording_video_path)
+            video = InputImpl.VideoFromFile(recording_video_path)
 
-        return output_image, output_mask, model_file, normal_image, image['camera_info'], video
+        return IO.NodeOutput(output_image, output_mask, model_file, normal_image, image['camera_info'], video)
 
-class Preview3D():
+    process = execute  # TODO: remove
+
+
+class Preview3D(IO.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "model_file": ("STRING", {"default": "", "multiline": False}),
-        },
-        "optional": {
-            "camera_info": ("LOAD3D_CAMERA", {}),
-            "bg_image": ("IMAGE", {})
-        }}
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="Preview3D",
+            display_name="Preview 3D & Animation",
+            category="3d",
+            is_experimental=True,
+            is_output_node=True,
+            inputs=[
+                IO.String.Input("model_file", default="", multiline=False),
+                IO.Load3DCamera.Input("camera_info", optional=True),
+                IO.Image.Input("bg_image", optional=True),
+            ],
+            outputs=[],
+        )
 
-    OUTPUT_NODE = True
-    RETURN_TYPES = ()
-
-    CATEGORY = "3d"
-
-    FUNCTION = "process"
-    EXPERIMENTAL = True
-
-    def process(self, model_file, **kwargs):
+    @classmethod
+    def execute(cls, model_file, **kwargs) -> IO.NodeOutput:
         camera_info = kwargs.get("camera_info", None)
         bg_image = kwargs.get("bg_image", None)
+        return IO.NodeOutput(ui=UI.PreviewUI3D(model_file, camera_info, bg_image=bg_image))
 
-        bg_image_path = None
-        if bg_image is not None:
+    process = execute  # TODO: remove
 
-            img_array = (bg_image[0].cpu().numpy() * 255).astype(np.uint8)
-            img = Image.fromarray(img_array)
 
-            temp_dir = folder_paths.get_temp_directory()
-            filename = f"bg_{uuid.uuid4().hex}.png"
-            bg_image_path = os.path.join(temp_dir, filename)
-            img.save(bg_image_path, compress_level=1)
+class Load3DExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
+        return [
+            Load3D,
+            Preview3D,
+        ]
 
-            bg_image_path = f"temp/{filename}"
 
-        return {
-            "ui": {
-                "result": [model_file, camera_info, bg_image_path]
-            }
-        }
-
-NODE_CLASS_MAPPINGS = {
-    "Load3D": Load3D,
-    "Preview3D": Preview3D,
-}
-
-NODE_DISPLAY_NAME_MAPPINGS = {
-    "Load3D": "Load 3D & Animation",
-    "Preview3D": "Preview 3D & Animation",
-}
+async def comfy_entrypoint() -> Load3DExtension:
+    return Load3DExtension()

From dce518c2b4f99634b5fdde1924d9b0bd468fe1ce Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Thu, 4 Dec 2025 03:35:04 +0200
Subject: [PATCH 2/5] convert nodes_audio.py to V3 schema (#10798)

---
 comfy_api/latest/_ui.py     |   9 +-
 comfy_extras/nodes_audio.py | 744 ++++++++++++++++++------------------
 2 files changed, 382 insertions(+), 371 deletions(-)

diff --git a/comfy_api/latest/_ui.py b/comfy_api/latest/_ui.py
index 6d1bea599..5a75a3aae 100644
--- a/comfy_api/latest/_ui.py
+++ b/comfy_api/latest/_ui.py
@@ -319,9 +319,10 @@ class AudioSaveHelper:
             for key, value in metadata.items():
                 output_container.metadata[key] = value
 
+            layout = "mono" if waveform.shape[0] == 1 else "stereo"
             # Set up the output stream with appropriate properties
             if format == "opus":
-                out_stream = output_container.add_stream("libopus", rate=sample_rate)
+                out_stream = output_container.add_stream("libopus", rate=sample_rate, layout=layout)
                 if quality == "64k":
                     out_stream.bit_rate = 64000
                 elif quality == "96k":
@@ -333,7 +334,7 @@ class AudioSaveHelper:
                 elif quality == "320k":
                     out_stream.bit_rate = 320000
             elif format == "mp3":
-                out_stream = output_container.add_stream("libmp3lame", rate=sample_rate)
+                out_stream = output_container.add_stream("libmp3lame", rate=sample_rate, layout=layout)
                 if quality == "V0":
                     # TODO i would really love to support V3 and V5 but there doesn't seem to be a way to set the qscale level, the property below is a bool
                     out_stream.codec_context.qscale = 1
@@ -342,12 +343,12 @@ class AudioSaveHelper:
                 elif quality == "320k":
                     out_stream.bit_rate = 320000
             else:  # format == "flac":
-                out_stream = output_container.add_stream("flac", rate=sample_rate)
+                out_stream = output_container.add_stream("flac", rate=sample_rate, layout=layout)
 
             frame = av.AudioFrame.from_ndarray(
                 waveform.movedim(0, 1).reshape(1, -1).float().numpy(),
                 format="flt",
-                layout="mono" if waveform.shape[0] == 1 else "stereo",
+                layout=layout,
             )
             frame.sample_rate = sample_rate
             frame.pts = 0
diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py
index 2ed7e0b22..812301fb7 100644
--- a/comfy_extras/nodes_audio.py
+++ b/comfy_extras/nodes_audio.py
@@ -6,65 +6,80 @@ import torch
 import comfy.model_management
 import folder_paths
 import os
-import io
-import json
-import random
 import hashlib
 import node_helpers
 import logging
-from comfy.cli_args import args
-from comfy.comfy_types import FileLocator
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, IO, UI
 
-class EmptyLatentAudio:
-    def __init__(self):
-        self.device = comfy.model_management.intermediate_device()
+class EmptyLatentAudio(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="EmptyLatentAudio",
+            display_name="Empty Latent Audio",
+            category="latent/audio",
+            inputs=[
+                IO.Float.Input("seconds", default=47.6, min=1.0, max=1000.0, step=0.1),
+                IO.Int.Input(
+                    "batch_size", default=1, min=1, max=4096, tooltip="The number of latent images in the batch."
+                ),
+            ],
+            outputs=[IO.Latent.Output()],
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"seconds": ("FLOAT", {"default": 47.6, "min": 1.0, "max": 1000.0, "step": 0.1}),
-                             "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096, "tooltip": "The number of latent images in the batch."}),
-                             }}
-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "generate"
-
-    CATEGORY = "latent/audio"
-
-    def generate(self, seconds, batch_size):
+    def execute(cls, seconds, batch_size) -> IO.NodeOutput:
         length = round((seconds * 44100 / 2048) / 2) * 2
-        latent = torch.zeros([batch_size, 64, length], device=self.device)
-        return ({"samples":latent, "type": "audio"}, )
+        latent = torch.zeros([batch_size, 64, length], device=comfy.model_management.intermediate_device())
+        return IO.NodeOutput({"samples":latent, "type": "audio"})
 
-class ConditioningStableAudio:
+    generate = execute  # TODO: remove
+
+
+class ConditioningStableAudio(IO.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"positive": ("CONDITIONING", ),
-                             "negative": ("CONDITIONING", ),
-                             "seconds_start": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1000.0, "step": 0.1}),
-                             "seconds_total": ("FLOAT", {"default": 47.0, "min": 0.0, "max": 1000.0, "step": 0.1}),
-                             }}
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="ConditioningStableAudio",
+            category="conditioning",
+            inputs=[
+                IO.Conditioning.Input("positive"),
+                IO.Conditioning.Input("negative"),
+                IO.Float.Input("seconds_start", default=0.0, min=0.0, max=1000.0, step=0.1),
+                IO.Float.Input("seconds_total", default=47.0, min=0.0, max=1000.0, step=0.1),
+            ],
+            outputs=[
+                IO.Conditioning.Output(display_name="positive"),
+                IO.Conditioning.Output(display_name="negative"),
+            ],
+        )
 
-    RETURN_TYPES = ("CONDITIONING","CONDITIONING")
-    RETURN_NAMES = ("positive", "negative")
-
-    FUNCTION = "append"
-
-    CATEGORY = "conditioning"
-
-    def append(self, positive, negative, seconds_start, seconds_total):
+    @classmethod
+    def execute(cls, positive, negative, seconds_start, seconds_total) -> IO.NodeOutput:
         positive = node_helpers.conditioning_set_values(positive, {"seconds_start": seconds_start, "seconds_total": seconds_total})
         negative = node_helpers.conditioning_set_values(negative, {"seconds_start": seconds_start, "seconds_total": seconds_total})
-        return (positive, negative)
+        return IO.NodeOutput(positive, negative)
 
-class VAEEncodeAudio:
+    append = execute  # TODO: remove
+
+
+class VAEEncodeAudio(IO.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "audio": ("AUDIO", ), "vae": ("VAE", )}}
-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "encode"
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="VAEEncodeAudio",
+            display_name="VAE Encode Audio",
+            category="latent/audio",
+            inputs=[
+                IO.Audio.Input("audio"),
+                IO.Vae.Input("vae"),
+            ],
+            outputs=[IO.Latent.Output()],
+        )
 
-    CATEGORY = "latent/audio"
-
-    def encode(self, vae, audio):
+    @classmethod
+    def execute(cls, vae, audio) -> IO.NodeOutput:
         sample_rate = audio["sample_rate"]
         if 44100 != sample_rate:
             waveform = torchaudio.functional.resample(audio["waveform"], sample_rate, 44100)
@@ -72,213 +87,134 @@ class VAEEncodeAudio:
             waveform = audio["waveform"]
 
         t = vae.encode(waveform.movedim(1, -1))
-        return ({"samples":t}, )
+        return IO.NodeOutput({"samples":t})
 
-class VAEDecodeAudio:
+    encode = execute  # TODO: remove
+
+
+class VAEDecodeAudio(IO.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "samples": ("LATENT", ), "vae": ("VAE", )}}
-    RETURN_TYPES = ("AUDIO",)
-    FUNCTION = "decode"
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="VAEDecodeAudio",
+            display_name="VAE Decode Audio",
+            category="latent/audio",
+            inputs=[
+                IO.Latent.Input("samples"),
+                IO.Vae.Input("vae"),
+            ],
+            outputs=[IO.Audio.Output()],
+        )
 
-    CATEGORY = "latent/audio"
-
-    def decode(self, vae, samples):
+    @classmethod
+    def execute(cls, vae, samples) -> IO.NodeOutput:
         audio = vae.decode(samples["samples"]).movedim(-1, 1)
         std = torch.std(audio, dim=[1,2], keepdim=True) * 5.0
         std[std < 1.0] = 1.0
         audio /= std
-        return ({"waveform": audio, "sample_rate": 44100}, )
+        return IO.NodeOutput({"waveform": audio, "sample_rate": 44100})
+
+    decode = execute  # TODO: remove
 
 
-def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=None, extra_pnginfo=None, quality="128k"):
-
-    filename_prefix += self.prefix_append
-    full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir)
-    results: list[FileLocator] = []
-
-    # Prepare metadata dictionary
-    metadata = {}
-    if not args.disable_metadata:
-        if prompt is not None:
-            metadata["prompt"] = json.dumps(prompt)
-        if extra_pnginfo is not None:
-            for x in extra_pnginfo:
-                metadata[x] = json.dumps(extra_pnginfo[x])
-
-    # Opus supported sample rates
-    OPUS_RATES = [8000, 12000, 16000, 24000, 48000]
-
-    for (batch_number, waveform) in enumerate(audio["waveform"].cpu()):
-        filename_with_batch_num = filename.replace("%batch_num%", str(batch_number))
-        file = f"{filename_with_batch_num}_{counter:05}_.{format}"
-        output_path = os.path.join(full_output_folder, file)
-
-        # Use original sample rate initially
-        sample_rate = audio["sample_rate"]
-
-        # Handle Opus sample rate requirements
-        if format == "opus":
-            if sample_rate > 48000:
-                sample_rate = 48000
-            elif sample_rate not in OPUS_RATES:
-                # Find the next highest supported rate
-                for rate in sorted(OPUS_RATES):
-                    if rate > sample_rate:
-                        sample_rate = rate
-                        break
-                if sample_rate not in OPUS_RATES:  # Fallback if still not supported
-                    sample_rate = 48000
-
-            # Resample if necessary
-            if sample_rate != audio["sample_rate"]:
-                waveform = torchaudio.functional.resample(waveform, audio["sample_rate"], sample_rate)
-
-        # Create output with specified format
-        output_buffer = io.BytesIO()
-        output_container = av.open(output_buffer, mode='w', format=format)
-
-        # Set metadata on the container
-        for key, value in metadata.items():
-            output_container.metadata[key] = value
-
-        layout = 'mono' if waveform.shape[0] == 1 else 'stereo'
-        # Set up the output stream with appropriate properties
-        if format == "opus":
-            out_stream = output_container.add_stream("libopus", rate=sample_rate, layout=layout)
-            if quality == "64k":
-                out_stream.bit_rate = 64000
-            elif quality == "96k":
-                out_stream.bit_rate = 96000
-            elif quality == "128k":
-                out_stream.bit_rate = 128000
-            elif quality == "192k":
-                out_stream.bit_rate = 192000
-            elif quality == "320k":
-                out_stream.bit_rate = 320000
-        elif format == "mp3":
-            out_stream = output_container.add_stream("libmp3lame", rate=sample_rate, layout=layout)
-            if quality == "V0":
-                #TODO i would really love to support V3 and V5 but there doesn't seem to be a way to set the qscale level, the property below is a bool
-                out_stream.codec_context.qscale = 1
-            elif quality == "128k":
-                out_stream.bit_rate = 128000
-            elif quality == "320k":
-                out_stream.bit_rate = 320000
-        else: #format == "flac":
-            out_stream = output_container.add_stream("flac", rate=sample_rate, layout=layout)
-
-        frame = av.AudioFrame.from_ndarray(waveform.movedim(0, 1).reshape(1, -1).float().numpy(), format='flt', layout=layout)
-        frame.sample_rate = sample_rate
-        frame.pts = 0
-        output_container.mux(out_stream.encode(frame))
-
-        # Flush encoder
-        output_container.mux(out_stream.encode(None))
-
-        # Close containers
-        output_container.close()
-
-        # Write the output to file
-        output_buffer.seek(0)
-        with open(output_path, 'wb') as f:
-            f.write(output_buffer.getbuffer())
-
-        results.append({
-            "filename": file,
-            "subfolder": subfolder,
-            "type": self.type
-        })
-        counter += 1
-
-    return { "ui": { "audio": results } }
-
-class SaveAudio:
-    def __init__(self):
-        self.output_dir = folder_paths.get_output_directory()
-        self.type = "output"
-        self.prefix_append = ""
+class SaveAudio(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="SaveAudio",
+            display_name="Save Audio (FLAC)",
+            category="audio",
+            inputs=[
+                IO.Audio.Input("audio"),
+                IO.String.Input("filename_prefix", default="audio/ComfyUI"),
+            ],
+            hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo],
+            is_output_node=True,
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "audio": ("AUDIO", ),
-                            "filename_prefix": ("STRING", {"default": "audio/ComfyUI"}),
-                            },
-                "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},
-                }
+    def execute(cls, audio, filename_prefix="ComfyUI", format="flac") -> IO.NodeOutput:
+        return IO.NodeOutput(
+            ui=UI.AudioSaveHelper.get_save_audio_ui(audio, filename_prefix=filename_prefix, cls=cls, format=format)
+        )
 
-    RETURN_TYPES = ()
-    FUNCTION = "save_flac"
+    save_flac = execute  # TODO: remove
 
-    OUTPUT_NODE = True
 
-    CATEGORY = "audio"
-
-    def save_flac(self, audio, filename_prefix="ComfyUI", format="flac", prompt=None, extra_pnginfo=None):
-        return save_audio(self, audio, filename_prefix, format, prompt, extra_pnginfo)
-
-class SaveAudioMP3:
-    def __init__(self):
-        self.output_dir = folder_paths.get_output_directory()
-        self.type = "output"
-        self.prefix_append = ""
+class SaveAudioMP3(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="SaveAudioMP3",
+            display_name="Save Audio (MP3)",
+            category="audio",
+            inputs=[
+                IO.Audio.Input("audio"),
+                IO.String.Input("filename_prefix", default="audio/ComfyUI"),
+                IO.Combo.Input("quality", options=["V0", "128k", "320k"], default="V0"),
+            ],
+            hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo],
+            is_output_node=True,
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "audio": ("AUDIO", ),
-                            "filename_prefix": ("STRING", {"default": "audio/ComfyUI"}),
-                            "quality": (["V0", "128k", "320k"], {"default": "V0"}),
-                            },
-                "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},
-                }
+    def execute(cls, audio, filename_prefix="ComfyUI", format="mp3", quality="128k") -> IO.NodeOutput:
+        return IO.NodeOutput(
+            ui=UI.AudioSaveHelper.get_save_audio_ui(
+                audio, filename_prefix=filename_prefix, cls=cls, format=format, quality=quality
+            )
+        )
 
-    RETURN_TYPES = ()
-    FUNCTION = "save_mp3"
+    save_mp3 = execute  # TODO: remove
 
-    OUTPUT_NODE = True
 
-    CATEGORY = "audio"
-
-    def save_mp3(self, audio, filename_prefix="ComfyUI", format="mp3", prompt=None, extra_pnginfo=None, quality="128k"):
-        return save_audio(self, audio, filename_prefix, format, prompt, extra_pnginfo, quality)
-
-class SaveAudioOpus:
-    def __init__(self):
-        self.output_dir = folder_paths.get_output_directory()
-        self.type = "output"
-        self.prefix_append = ""
+class SaveAudioOpus(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="SaveAudioOpus",
+            display_name="Save Audio (Opus)",
+            category="audio",
+            inputs=[
+                IO.Audio.Input("audio"),
+                IO.String.Input("filename_prefix", default="audio/ComfyUI"),
+                IO.Combo.Input("quality", options=["64k", "96k", "128k", "192k", "320k"], default="128k"),
+            ],
+            hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo],
+            is_output_node=True,
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "audio": ("AUDIO", ),
-                            "filename_prefix": ("STRING", {"default": "audio/ComfyUI"}),
-                            "quality": (["64k", "96k", "128k", "192k", "320k"], {"default": "128k"}),
-                            },
-                "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},
-                }
+    def execute(cls, audio, filename_prefix="ComfyUI", format="opus", quality="V3") -> IO.NodeOutput:
+        return IO.NodeOutput(
+            ui=UI.AudioSaveHelper.get_save_audio_ui(
+                audio, filename_prefix=filename_prefix, cls=cls, format=format, quality=quality
+            )
+        )
 
-    RETURN_TYPES = ()
-    FUNCTION = "save_opus"
+    save_opus = execute  # TODO: remove
 
-    OUTPUT_NODE = True
 
-    CATEGORY = "audio"
-
-    def save_opus(self, audio, filename_prefix="ComfyUI", format="opus", prompt=None, extra_pnginfo=None, quality="V3"):
-        return save_audio(self, audio, filename_prefix, format, prompt, extra_pnginfo, quality)
-
-class PreviewAudio(SaveAudio):
-    def __init__(self):
-        self.output_dir = folder_paths.get_temp_directory()
-        self.type = "temp"
-        self.prefix_append = "_temp_" + ''.join(random.choice("abcdefghijklmnopqrstupvxyz") for x in range(5))
+class PreviewAudio(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="PreviewAudio",
+            display_name="Preview Audio",
+            category="audio",
+            inputs=[
+                IO.Audio.Input("audio"),
+            ],
+            hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo],
+            is_output_node=True,
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"audio": ("AUDIO", ), },
-                "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},
-                }
+    def execute(cls, audio) -> IO.NodeOutput:
+        return IO.NodeOutput(ui=UI.PreviewAudio(audio, cls=cls))
+
+    save_flac = execute  # TODO: remove
+
 
 def f32_pcm(wav: torch.Tensor) -> torch.Tensor:
     """Convert audio to float 32 bits PCM format."""
@@ -316,26 +252,30 @@ def load(filepath: str) -> tuple[torch.Tensor, int]:
         wav = f32_pcm(wav)
         return wav, sr
 
-class LoadAudio:
+class LoadAudio(IO.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
         input_dir = folder_paths.get_input_directory()
         files = folder_paths.filter_files_content_types(os.listdir(input_dir), ["audio", "video"])
-        return {"required": {"audio": (sorted(files), {"audio_upload": True})}}
+        return IO.Schema(
+            node_id="LoadAudio",
+            display_name="Load Audio",
+            category="audio",
+            inputs=[
+                IO.Combo.Input("audio", upload=IO.UploadType.audio, options=sorted(files)),
+            ],
+            outputs=[IO.Audio.Output()],
+        )
 
-    CATEGORY = "audio"
-
-    RETURN_TYPES = ("AUDIO", )
-    FUNCTION = "load"
-
-    def load(self, audio):
+    @classmethod
+    def execute(cls, audio) -> IO.NodeOutput:
         audio_path = folder_paths.get_annotated_filepath(audio)
         waveform, sample_rate = load(audio_path)
         audio = {"waveform": waveform.unsqueeze(0), "sample_rate": sample_rate}
-        return (audio, )
+        return IO.NodeOutput(audio)
 
     @classmethod
-    def IS_CHANGED(s, audio):
+    def fingerprint_inputs(cls, audio):
         image_path = folder_paths.get_annotated_filepath(audio)
         m = hashlib.sha256()
         with open(image_path, 'rb') as f:
@@ -343,46 +283,69 @@ class LoadAudio:
         return m.digest().hex()
 
     @classmethod
-    def VALIDATE_INPUTS(s, audio):
+    def validate_inputs(cls, audio):
         if not folder_paths.exists_annotated_filepath(audio):
             return "Invalid audio file: {}".format(audio)
         return True
 
-class RecordAudio:
+    load = execute  # TODO: remove
+
+
+class RecordAudio(IO.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"audio": ("AUDIO_RECORD", {})}}
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecordAudio",
+            display_name="Record Audio",
+            category="audio",
+            inputs=[
+                IO.Custom("AUDIO_RECORD").Input("audio"),
+            ],
+            outputs=[IO.Audio.Output()],
+        )
 
-    CATEGORY = "audio"
-
-    RETURN_TYPES = ("AUDIO", )
-    FUNCTION = "load"
-
-    def load(self, audio):
+    @classmethod
+    def execute(cls, audio) -> IO.NodeOutput:
         audio_path = folder_paths.get_annotated_filepath(audio)
 
         waveform, sample_rate = load(audio_path)
         audio = {"waveform": waveform.unsqueeze(0), "sample_rate": sample_rate}
-        return (audio, )
+        return IO.NodeOutput(audio)
+
+    load = execute  # TODO: remove
 
 
-class TrimAudioDuration:
+class TrimAudioDuration(IO.ComfyNode):
     @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "audio": ("AUDIO",),
-                "start_index": ("FLOAT", {"default": 0.0, "min": -0xffffffffffffffff, "max": 0xffffffffffffffff, "step": 0.01, "tooltip": "Start time in seconds, can be negative to count from the end (supports sub-seconds)."}),
-                "duration": ("FLOAT", {"default": 60.0, "min": 0.0, "step": 0.01, "tooltip": "Duration in seconds"}),
-            },
-        }
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="TrimAudioDuration",
+            display_name="Trim Audio Duration",
+            description="Trim audio tensor into chosen time range.",
+            category="audio",
+            inputs=[
+                IO.Audio.Input("audio"),
+                IO.Float.Input(
+                    "start_index",
+                    default=0.0,
+                    min=-0xffffffffffffffff,
+                    max=0xffffffffffffffff,
+                    step=0.01,
+                    tooltip="Start time in seconds, can be negative to count from the end (supports sub-seconds).",
+                ),
+                IO.Float.Input(
+                    "duration",
+                    default=60.0,
+                    min=0.0,
+                    step=0.01,
+                    tooltip="Duration in seconds",
+                ),
+            ],
+            outputs=[IO.Audio.Output()],
+        )
 
-    FUNCTION = "trim"
-    RETURN_TYPES = ("AUDIO",)
-    CATEGORY = "audio"
-    DESCRIPTION = "Trim audio tensor into chosen time range."
-
-    def trim(self, audio, start_index, duration):
+    @classmethod
+    def execute(cls, audio, start_index, duration) -> IO.NodeOutput:
         waveform = audio["waveform"]
         sample_rate = audio["sample_rate"]
         audio_length = waveform.shape[-1]
@@ -399,23 +362,30 @@ class TrimAudioDuration:
         if start_frame >= end_frame:
             raise ValueError("AudioTrim: Start time must be less than end time and be within the audio length.")
 
-        return ({"waveform": waveform[..., start_frame:end_frame], "sample_rate": sample_rate},)
+        return IO.NodeOutput({"waveform": waveform[..., start_frame:end_frame], "sample_rate": sample_rate})
+
+    trim = execute  # TODO: remove
 
 
-class SplitAudioChannels:
+class SplitAudioChannels(IO.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "audio": ("AUDIO",),
-        }}
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="SplitAudioChannels",
+            display_name="Split Audio Channels",
+            description="Separates the audio into left and right channels.",
+            category="audio",
+            inputs=[
+                IO.Audio.Input("audio"),
+            ],
+            outputs=[
+                IO.Audio.Output(display_name="left"),
+                IO.Audio.Output(display_name="right"),
+            ],
+        )
 
-    RETURN_TYPES = ("AUDIO", "AUDIO")
-    RETURN_NAMES = ("left", "right")
-    FUNCTION = "separate"
-    CATEGORY = "audio"
-    DESCRIPTION = "Separates the audio into left and right channels."
-
-    def separate(self, audio):
+    @classmethod
+    def execute(cls, audio) -> IO.NodeOutput:
         waveform = audio["waveform"]
         sample_rate = audio["sample_rate"]
 
@@ -425,7 +395,9 @@ class SplitAudioChannels:
         left_channel = waveform[..., 0:1, :]
         right_channel = waveform[..., 1:2, :]
 
-        return ({"waveform": left_channel, "sample_rate": sample_rate}, {"waveform": right_channel, "sample_rate": sample_rate})
+        return IO.NodeOutput({"waveform": left_channel, "sample_rate": sample_rate}, {"waveform": right_channel, "sample_rate": sample_rate})
+
+    separate = execute  # TODO: remove
 
 
 def match_audio_sample_rates(waveform_1, sample_rate_1, waveform_2, sample_rate_2):
@@ -443,21 +415,29 @@ def match_audio_sample_rates(waveform_1, sample_rate_1, waveform_2, sample_rate_
     return waveform_1, waveform_2, output_sample_rate
 
 
-class AudioConcat:
+class AudioConcat(IO.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "audio1": ("AUDIO",),
-            "audio2": ("AUDIO",),
-            "direction": (['after', 'before'], {"default": 'after', "tooltip": "Whether to append audio2 after or before audio1."}),
-        }}
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="AudioConcat",
+            display_name="Audio Concat",
+            description="Concatenates the audio1 to audio2 in the specified direction.",
+            category="audio",
+            inputs=[
+                IO.Audio.Input("audio1"),
+                IO.Audio.Input("audio2"),
+                IO.Combo.Input(
+                    "direction",
+                    options=['after', 'before'],
+                    default="after",
+                    tooltip="Whether to append audio2 after or before audio1.",
+                )
+            ],
+            outputs=[IO.Audio.Output()],
+        )
 
-    RETURN_TYPES = ("AUDIO",)
-    FUNCTION = "concat"
-    CATEGORY = "audio"
-    DESCRIPTION = "Concatenates the audio1 to audio2 in the specified direction."
-
-    def concat(self, audio1, audio2, direction):
+    @classmethod
+    def execute(cls, audio1, audio2, direction) -> IO.NodeOutput:
         waveform_1 = audio1["waveform"]
         waveform_2 = audio2["waveform"]
         sample_rate_1 = audio1["sample_rate"]
@@ -477,26 +457,33 @@ class AudioConcat:
         elif direction == 'before':
             concatenated_audio = torch.cat((waveform_2, waveform_1), dim=2)
 
-        return ({"waveform": concatenated_audio, "sample_rate": output_sample_rate},)
+        return IO.NodeOutput({"waveform": concatenated_audio, "sample_rate": output_sample_rate})
+
+    concat = execute  # TODO: remove
 
 
-class AudioMerge:
+class AudioMerge(IO.ComfyNode):
     @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "audio1": ("AUDIO",),
-                "audio2": ("AUDIO",),
-                "merge_method": (["add", "mean", "subtract", "multiply"], {"tooltip": "The method used to combine the audio waveforms."}),
-            },
-        }
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="AudioMerge",
+            display_name="Audio Merge",
+            description="Combine two audio tracks by overlaying their waveforms.",
+            category="audio",
+            inputs=[
+                IO.Audio.Input("audio1"),
+                IO.Audio.Input("audio2"),
+                IO.Combo.Input(
+                    "merge_method",
+                    options=["add", "mean", "subtract", "multiply"],
+                    tooltip="The method used to combine the audio waveforms.",
+                )
+            ],
+            outputs=[IO.Audio.Output()],
+        )
 
-    FUNCTION = "merge"
-    RETURN_TYPES = ("AUDIO",)
-    CATEGORY = "audio"
-    DESCRIPTION = "Combine two audio tracks by overlaying their waveforms."
-
-    def merge(self, audio1, audio2, merge_method):
+    @classmethod
+    def execute(cls, audio1, audio2, merge_method) -> IO.NodeOutput:
         waveform_1 = audio1["waveform"]
         waveform_2 = audio2["waveform"]
         sample_rate_1 = audio1["sample_rate"]
@@ -530,85 +517,108 @@ class AudioMerge:
         if max_val > 1.0:
             waveform = waveform / max_val
 
-        return ({"waveform": waveform, "sample_rate": output_sample_rate},)
+        return IO.NodeOutput({"waveform": waveform, "sample_rate": output_sample_rate})
+
+    merge = execute  # TODO: remove
 
 
-class AudioAdjustVolume:
+class AudioAdjustVolume(IO.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "audio": ("AUDIO",),
-            "volume": ("INT", {"default": 1.0, "min": -100, "max": 100, "tooltip": "Volume adjustment in decibels (dB). 0 = no change, +6 = double, -6 = half, etc"}),
-        }}
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="AudioAdjustVolume",
+            display_name="Audio Adjust Volume",
+            category="audio",
+            inputs=[
+                IO.Audio.Input("audio"),
+                IO.Int.Input(
+                    "volume",
+                    default=1,
+                    min=-100,
+                    max=100,
+                    tooltip="Volume adjustment in decibels (dB). 0 = no change, +6 = double, -6 = half, etc",
+                )
+            ],
+            outputs=[IO.Audio.Output()],
+        )
 
-    RETURN_TYPES = ("AUDIO",)
-    FUNCTION = "adjust_volume"
-    CATEGORY = "audio"
-
-    def adjust_volume(self, audio, volume):
+    @classmethod
+    def execute(cls, audio, volume) -> IO.NodeOutput:
         if volume == 0:
-            return (audio,)
+            return IO.NodeOutput(audio)
         waveform = audio["waveform"]
         sample_rate = audio["sample_rate"]
 
         gain = 10 ** (volume / 20)
         waveform = waveform * gain
 
-        return ({"waveform": waveform, "sample_rate": sample_rate},)
+        return IO.NodeOutput({"waveform": waveform, "sample_rate": sample_rate})
+
+    adjust_volume = execute  # TODO: remove
 
 
-class EmptyAudio:
+class EmptyAudio(IO.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "duration": ("FLOAT", {"default": 60.0, "min": 0.0, "max": 0xffffffffffffffff, "step": 0.01, "tooltip": "Duration of the empty audio clip in seconds"}),
-            "sample_rate": ("INT", {"default": 44100, "tooltip": "Sample rate of the empty audio clip."}),
-            "channels": ("INT", {"default": 2, "min": 1, "max": 2, "tooltip": "Number of audio channels (1 for mono, 2 for stereo)."}),
-        }}
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="EmptyAudio",
+            display_name="Empty Audio",
+            category="audio",
+            inputs=[
+                IO.Float.Input(
+                    "duration",
+                    default=60.0,
+                    min=0.0,
+                    max=0xffffffffffffffff,
+                    step=0.01,
+                    tooltip="Duration of the empty audio clip in seconds",
+                ),
+                IO.Float.Input(
+                    "sample_rate",
+                    default=44100,
+                    tooltip="Sample rate of the empty audio clip.",
+                ),
+                IO.Float.Input(
+                    "channels",
+                    default=2,
+                    min=1,
+                    max=2,
+                    tooltip="Number of audio channels (1 for mono, 2 for stereo).",
+                ),
+            ],
+            outputs=[IO.Audio.Output()],
+        )
 
-    RETURN_TYPES = ("AUDIO",)
-    FUNCTION = "create_empty_audio"
-    CATEGORY = "audio"
-
-    def create_empty_audio(self, duration, sample_rate, channels):
+    @classmethod
+    def execute(cls, duration, sample_rate, channels) -> IO.NodeOutput:
         num_samples = int(round(duration * sample_rate))
         waveform = torch.zeros((1, channels, num_samples), dtype=torch.float32)
-        return ({"waveform": waveform, "sample_rate": sample_rate},)
+        return IO.NodeOutput({"waveform": waveform, "sample_rate": sample_rate})
+
+    create_empty_audio = execute  # TODO: remove
 
 
-NODE_CLASS_MAPPINGS = {
-    "EmptyLatentAudio": EmptyLatentAudio,
-    "VAEEncodeAudio": VAEEncodeAudio,
-    "VAEDecodeAudio": VAEDecodeAudio,
-    "SaveAudio": SaveAudio,
-    "SaveAudioMP3": SaveAudioMP3,
-    "SaveAudioOpus": SaveAudioOpus,
-    "LoadAudio": LoadAudio,
-    "PreviewAudio": PreviewAudio,
-    "ConditioningStableAudio": ConditioningStableAudio,
-    "RecordAudio": RecordAudio,
-    "TrimAudioDuration": TrimAudioDuration,
-    "SplitAudioChannels": SplitAudioChannels,
-    "AudioConcat": AudioConcat,
-    "AudioMerge": AudioMerge,
-    "AudioAdjustVolume": AudioAdjustVolume,
-    "EmptyAudio": EmptyAudio,
-}
+class AudioExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
+        return [
+            EmptyLatentAudio,
+            VAEEncodeAudio,
+            VAEDecodeAudio,
+            SaveAudio,
+            SaveAudioMP3,
+            SaveAudioOpus,
+            LoadAudio,
+            PreviewAudio,
+            ConditioningStableAudio,
+            RecordAudio,
+            TrimAudioDuration,
+            SplitAudioChannels,
+            AudioConcat,
+            AudioMerge,
+            AudioAdjustVolume,
+            EmptyAudio,
+        ]
 
-NODE_DISPLAY_NAME_MAPPINGS = {
-    "EmptyLatentAudio": "Empty Latent Audio",
-    "VAEEncodeAudio": "VAE Encode Audio",
-    "VAEDecodeAudio": "VAE Decode Audio",
-    "PreviewAudio": "Preview Audio",
-    "LoadAudio": "Load Audio",
-    "SaveAudio": "Save Audio (FLAC)",
-    "SaveAudioMP3": "Save Audio (MP3)",
-    "SaveAudioOpus": "Save Audio (Opus)",
-    "RecordAudio": "Record Audio",
-    "TrimAudioDuration": "Trim Audio Duration",
-    "SplitAudioChannels": "Split Audio Channels",
-    "AudioConcat": "Audio Concat",
-    "AudioMerge": "Audio Merge",
-    "AudioAdjustVolume": "Audio Adjust Volume",
-    "EmptyAudio": "Empty Audio",
-}
+async def comfy_entrypoint() -> AudioExtension:
+    return AudioExtension()

From ecdc8697d53919a9178bf53ef327a110582db8ea Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 3 Dec 2025 19:49:28 -0800
Subject: [PATCH 3/5] Qwen Image Lora training fix from #11090 (#11094)

---
 comfy_extras/nodes_train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy_extras/nodes_train.py b/comfy_extras/nodes_train.py
index cb24ab709..19b8baaf4 100644
--- a/comfy_extras/nodes_train.py
+++ b/comfy_extras/nodes_train.py
@@ -623,7 +623,7 @@ class TrainLoraNode(io.ComfyNode):
                 noise = comfy_extras.nodes_custom_sampler.Noise_RandomNoise(seed)
                 if multi_res:
                     # use first latent as dummy latent if multi_res
-                    latents = latents[0].repeat(num_images, 1, 1, 1)
+                    latents = latents[0].repeat((num_images,) + ((1,) * (latents[0].ndim - 1)))
                 guider.sample(
                     noise.generate_noise({"samples": latents}),
                     latents,

From ea17add3c62197b10fd0b71d9169d339adc55c47 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 3 Dec 2025 20:15:15 -0800
Subject: [PATCH 4/5] Fix case where text encoders where running on the CPU
 instead of GPU. (#11095)

---
 comfy/sd.py       | 2 ++
 comfy/sd1_clip.py | 9 ++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/comfy/sd.py b/comfy/sd.py
index f9e5efab5..734bd2845 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -193,6 +193,7 @@ class CLIP:
                 self.cond_stage_model.set_clip_options({"projected_pooled": False})
 
             self.load_model()
+            self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
             all_hooks.reset()
             self.patcher.patch_hooks(None)
             if show_pbar:
@@ -240,6 +241,7 @@ class CLIP:
             self.cond_stage_model.set_clip_options({"projected_pooled": False})
 
         self.load_model()
+        self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
         o = self.cond_stage_model.encode_token_weights(tokens)
         cond, pooled = o[:2]
         if return_dict:
diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py
index 0fc9ab3db..503a51843 100644
--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@@ -147,6 +147,7 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
         self.layer_norm_hidden_state = layer_norm_hidden_state
         self.return_projected_pooled = return_projected_pooled
         self.return_attention_masks = return_attention_masks
+        self.execution_device = None
 
         if layer == "hidden":
             assert layer_idx is not None
@@ -163,6 +164,7 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
     def set_clip_options(self, options):
         layer_idx = options.get("layer", self.layer_idx)
         self.return_projected_pooled = options.get("projected_pooled", self.return_projected_pooled)
+        self.execution_device = options.get("execution_device", self.execution_device)
         if isinstance(self.layer, list) or self.layer == "all":
             pass
         elif layer_idx is None or abs(layer_idx) > self.num_layers:
@@ -175,6 +177,7 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
         self.layer = self.options_default[0]
         self.layer_idx = self.options_default[1]
         self.return_projected_pooled = self.options_default[2]
+        self.execution_device = None
 
     def process_tokens(self, tokens, device):
         end_token = self.special_tokens.get("end", None)
@@ -258,7 +261,11 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
         return torch.cat(embeds_out), torch.tensor(attention_masks, device=device, dtype=torch.long), num_tokens, embeds_info
 
     def forward(self, tokens):
-        device = self.transformer.get_input_embeddings().weight.device
+        if self.execution_device is None:
+            device = self.transformer.get_input_embeddings().weight.device
+        else:
+            device = self.execution_device
+
         embeds, attention_mask, num_tokens, embeds_info = self.process_tokens(tokens, device)
 
         attention_mask_model = None

From 6be85c7920224b45bbc6417e00147815e78c12a9 Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Thu, 4 Dec 2025 14:28:44 +1000
Subject: [PATCH 5/5] mp: use look-ahead actuals for stream offload VRAM
 calculation (#11096)

TIL that the WAN TE has a 2GB weight followed by 16MB as the next size
down. This means that team 8GB VRAM would fully offload the TE in async
offload mode as it just multiplied this giant size my the num streams.

Do the more complex logic of summing up the upcoming to-load weight
sizes to avoid triple counting this massive weight.

partial unload does the converse of recording the NS most recent
unloads as they go.
---
 comfy/model_patcher.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index df2d8e827..3dcac3eef 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -699,12 +699,12 @@ class ModelPatcher:
             offloaded = []
             offload_buffer = 0
             loading.sort(reverse=True)
-            for x in loading:
+            for i, x in enumerate(loading):
                 module_offload_mem, module_mem, n, m, params = x
 
                 lowvram_weight = False
 
-                potential_offload = max(offload_buffer, module_offload_mem + (comfy.model_management.NUM_STREAMS * module_mem))
+                potential_offload = max(offload_buffer, module_offload_mem + sum([ x1[1] for x1 in loading[i+1:i+1+comfy.model_management.NUM_STREAMS]]))
                 lowvram_fits = mem_counter + module_mem + potential_offload < lowvram_model_memory
 
                 weight_key = "{}.weight".format(n)
@@ -876,14 +876,18 @@ class ModelPatcher:
             patch_counter = 0
             unload_list = self._load_list()
             unload_list.sort()
+
             offload_buffer = self.model.model_offload_buffer_memory
+            if len(unload_list) > 0:
+                NS = comfy.model_management.NUM_STREAMS
+                offload_weight_factor = [ min(offload_buffer / (NS + 1), unload_list[0][1]) ] * NS
 
             for unload in unload_list:
                 if memory_to_free + offload_buffer - self.model.model_offload_buffer_memory < memory_freed:
                     break
                 module_offload_mem, module_mem, n, m, params = unload
 
-                potential_offload = module_offload_mem + (comfy.model_management.NUM_STREAMS * module_mem)
+                potential_offload = module_offload_mem + sum(offload_weight_factor)
 
                 lowvram_possible = hasattr(m, "comfy_cast_weights")
                 if hasattr(m, "comfy_patched_weights") and m.comfy_patched_weights == True:
@@ -935,6 +939,8 @@ class ModelPatcher:
                         m.comfy_patched_weights = False
                         memory_freed += module_mem
                         offload_buffer = max(offload_buffer, potential_offload)
+                        offload_weight_factor.append(module_mem)
+                        offload_weight_factor.pop(0)
                         logging.debug("freed {}".format(n))
 
                         for param in params: