some cleanup

2026-06-01 01:07:06 +08:00 · 2024-05-02 16:53:02 +03:00 · 2024-05-02 16:53:02 +03:00 · d31a7ce2a1
commit d31a7ce2a1
parent 500f2cc20b
4 changed files with 131 additions and 283 deletions
--- a/init.py
+++ b/init.py
@ -3,6 +3,7 @@ from .nodes.curve_nodes import *
 from .nodes.batchcrop_nodes import *
 from .nodes.audioscheduler_nodes import *
 from .nodes.image_nodes import *
 from .nodes.intrinsic_lora_nodes import *
 NODE_CLASS_MAPPINGS = {
    #constants
    "INTConstant": INTConstant,
@ -106,7 +107,6 @@ NODE_CLASS_MAPPINGS = {
    "SV3D_BatchSchedule": SV3D_BatchSchedule,
    "LoadResAdapterNormalization": LoadResAdapterNormalization,
    "Superprompt": Superprompt,
    "GLIGENTextBoxApplyBatch": GLIGENTextBoxApplyBatch,
    "GLIGENTextBoxApplyBatchCoords": GLIGENTextBoxApplyBatchCoords,
    "Intrinsic_lora_sampling": Intrinsic_lora_sampling,
@ -175,7 +175,6 @@ NODE_DISPLAY_NAME_MAPPINGS = {
    "ImageTransformByNormalizedAmplitude": "ImageTransformByNormalizedAmplitude",
    "GetLatentsFromBatchIndexed": "GetLatentsFromBatchIndexed",
    "StringConstant": "StringConstant",
    "GLIGENTextBoxApplyBatch": "GLIGENTextBoxApplyBatch (deprecated)",
    "CondPassThrough": "CondPassThrough",
    "ImageUpscaleWithModelBatched": "ImageUpscaleWithModelBatched",
    "ScaleBatchPromptSchedule": "ScaleBatchPromptSchedule",
--- a/nodes/image_nodes.py
+++ b/nodes/image_nodes.py
@ -660,12 +660,12 @@ class ImagePadForOutpaintMasked:
        if mask is None:
            new_mask = torch.ones(
-                (H + top + bottom, W + left + right),
+                (B, H + top + bottom, W + left + right),
                dtype=torch.float32,
            )
            t = torch.zeros(
-            (H, W),
+            (B, H, W),
            dtype=torch.float32
            )
        else:
@ -673,8 +673,6 @@ class ImagePadForOutpaintMasked:
            mask = F.pad(mask, (left, right, top, bottom), mode='constant', value=0)
            mask = 1 - mask
            t = torch.zeros_like(mask)
        if feathering > 0 and feathering * 2 < H and feathering * 2 < W:
@ -694,14 +692,14 @@ class ImagePadForOutpaintMasked:
                    v = (feathering - d) / feathering
                    if mask is None:
-                        t[i, j] = v * v
+                        t[:, i, j] = v * v
                    else:
                        t[:, top + i, left + j] = v * v
        if mask is None:
            mask = new_mask.squeeze(0)
-            mask[top:top + H, left:left + W] = t
+            mask[:, top:top + H, left:left + W] = t
-            mask = mask.unsqueeze(0)
+            #mask = mask.unsqueeze(0)
        return (new_image, mask,)
--- a/nodes/intrinsic_lora_nodes.py
+++ b/nodes/intrinsic_lora_nodes.py
@ -0,0 +1,115 @@
 import folder_paths
 import os
 import torch
 import torch.nn.functional as F
 from comfy.utils import ProgressBar
 import comfy.sample
 from nodes import CLIPTextEncode
 script_directory = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 folder_paths.add_model_folder_path("intristic_loras", os.path.join(script_directory, "intristic_loras"))
 class Intrinsic_lora_sampling:
    def __init__(self):
        self.loaded_lora = None
    @classmethod
    def INPUT_TYPES(s):
        return {"required": { "model": ("MODEL",),
                "lora_name": (folder_paths.get_filename_list("intristic_loras"), ),
                "task": (
                [   
                    'depth map',
                    'surface normals',
                    'albedo',
                    'shading',
                ],
                {
                "default": 'depth map'
                    }),
                "text": ("STRING", {"multiline": True, "default": ""}),
                "clip": ("CLIP", ),
                "vae": ("VAE", ),
                "per_batch": ("INT", {"default": 16, "min": 1, "max": 4096, "step": 1}),
        },
            "optional": {
            "image": ("IMAGE",),
            "optional_latent": ("LATENT",),
            },
        }
    RETURN_TYPES = ("IMAGE", "LATENT",)
    FUNCTION = "onestepsample"
    CATEGORY = "KJNodes"
    DESCRIPTION = """
 Sampler to use the intrinsic loras:  
 https://github.com/duxiaodan/intrinsic-lora  
 These LoRAs are tiny and thus included  
 with this node pack.
 """
    def onestepsample(self, model, lora_name, clip, vae, text, task, per_batch, image=None, optional_latent=None):
        pbar = ProgressBar(3)
        if optional_latent is None:
            image_list = []
            for start_idx in range(0, image.shape[0], per_batch):
                sub_pixels = vae.vae_encode_crop_pixels(image[start_idx:start_idx+per_batch])
                image_list.append(vae.encode(sub_pixels[:,:,:,:3]))
            sample = torch.cat(image_list, dim=0)
        else:
            sample = optional_latent["samples"]
        noise = torch.zeros(sample.size(), dtype=sample.dtype, layout=sample.layout, device="cpu")
        prompt = task + "," + text
        positive, = CLIPTextEncode.encode(self, clip, prompt)
        negative = positive #negative shouldn't do anything in this scenario
        pbar.update(1)
        #custom model sampling to pass latent through as it is
        class X0_PassThrough(comfy.model_sampling.EPS):
            def calculate_denoised(self, sigma, model_output, model_input):
                return model_output
            def calculate_input(self, sigma, noise):
                return noise
        sampling_base = comfy.model_sampling.ModelSamplingDiscrete
        sampling_type = X0_PassThrough
        class ModelSamplingAdvanced(sampling_base, sampling_type):
            pass
        model_sampling = ModelSamplingAdvanced(model.model.model_config)
        #load lora
        model_clone = model.clone()
        lora_path = folder_paths.get_full_path("intristic_loras", lora_name)        
        lora = comfy.utils.load_torch_file(lora_path, safe_load=True)
        self.loaded_lora = (lora_path, lora)
        model_clone_with_lora = comfy.sd.load_lora_for_models(model_clone, None, lora, 1.0, 0)[0]
        model_clone_with_lora.add_object_patch("model_sampling", model_sampling)
        samples = {"samples": comfy.sample.sample(model_clone_with_lora, noise, 1, 1.0, "euler", "simple", positive, negative, sample,
                                  denoise=1.0, disable_noise=True, start_step=0, last_step=1,
                                  force_full_denoise=True, noise_mask=None, callback=None, disable_pbar=True, seed=None)}
        pbar.update(1)
        decoded = []
        for start_idx in range(0, samples["samples"].shape[0], per_batch):
            decoded.append(vae.decode(samples["samples"][start_idx:start_idx+per_batch]))
        image_out = torch.cat(decoded, dim=0)
        pbar.update(1)
        if task == 'depth map':
            imax = image_out.max()
            imin = image_out.min()
            image_out = (image_out-imin)/(imax-imin)
            image_out = torch.max(image_out, dim=3, keepdim=True)[0].repeat(1, 1, 1, 3)
        elif task == 'surface normals':
            image_out = F.normalize(image_out * 2 - 1, dim=3) / 2 + 0.5
            image_out = 1.0 - image_out
        else:
            image_out = image_out.clamp(-1.,1.)
        return (image_out, samples,)
--- a/nodes/nodes.py
+++ b/nodes/nodes.py
@ -14,9 +14,8 @@ import os
 import io
 import model_management
-from nodes import MAX_RESOLUTION, CLIPTextEncode
+from nodes import MAX_RESOLUTION
 import comfy.sample
 import folder_paths
 from ..utility.utility import tensor2pil, pil2tensor
@ -178,12 +177,7 @@ class CreateFluidMask:
        return (torch.cat(out, dim=0),torch.cat(masks, dim=0),)
 class CreateAudioMask:
-    def __init__(self):
+       
        try:
            import librosa
            self.librosa = librosa
        except ImportError:
            print("Can not import librosa. Install it with 'pip install librosa'")
    RETURN_TYPES = ("IMAGE",)
    FUNCTION = "createaudiomask"
    CATEGORY = "KJNodes/deprecated"
@ -202,14 +196,17 @@ class CreateAudioMask:
    } 
    def createaudiomask(self, frames, width, height, invert, audio_path, scale):
-             # Define the number of images in the batch
+        try:
            import librosa
        except ImportError:
            raise Exception("Can not import librosa. Install it with 'pip install librosa'")
        batch_size = frames
        out = []
        masks = []
        if audio_path == "audio.wav": #I don't know why relative path won't work otherwise...
            audio_path = os.path.join(script_directory, audio_path)
-        audio, sr = self.librosa.load(audio_path)
+        audio, sr = librosa.load(audio_path)
-        spectrogram = np.abs(self.librosa.stft(audio))
+        spectrogram = np.abs(librosa.stft(audio))
        for i in range(batch_size):
           image = Image.new("RGB", (width, height), "black")
@ -2432,265 +2429,6 @@ https://huggingface.co/stabilityai/sv3d
        latent = torch.zeros([batch_size, 4, height // 8, width // 8])
        return (final_positive, final_negative, {"samples": latent})
 def parse_coordinates(coordinates_str):
    coordinates = {}
    pattern = r'(\d+):\((\d+),(\d+)\)'
    matches = re.findall(pattern, coordinates_str)
    for match in matches:
        index, x, y = map(int, match)
        coordinates[index] = (x, y)
    return coordinates
 def interpolate_coordinates(coordinates_dict, batch_size):
    sorted_coords = sorted(coordinates_dict.items())
    interpolated = {}
    for i, ((index1, (x1, y1)), (index2, (x2, y2))) in enumerate(zip(sorted_coords, sorted_coords[1:])):
        distance = index2 - index1
        x_step = (x2 - x1) / distance
        y_step = (y2 - y1) / distance
        for j in range(distance):
            interpolated_x = round(x1 + j * x_step)
            interpolated_y = round(y1 + j * y_step)
            interpolated[index1 + j] = (interpolated_x, interpolated_y)
    interpolated[sorted_coords[-1][0]] = sorted_coords[-1][1]
    # Ensure we have coordinates for all indices in the batch
    last_index, last_coords = sorted_coords[-1]
    for i in range(last_index + 1, batch_size):
        interpolated[i] = last_coords
    return interpolated
 def interpolate_coordinates_with_curves(coordinates_dict, batch_size):
    from scipy.interpolate import CubicSpline
    sorted_coords = sorted(coordinates_dict.items())
    x_coords, y_coords = zip(*[coord for index, coord in sorted_coords])
    # Create the spline curve functions
    indices = np.array([index for index, coord in sorted_coords])
    cs_x = CubicSpline(indices, x_coords)
    cs_y = CubicSpline(indices, y_coords)
    # Generate interpolated coordinates using the spline functions
    interpolated_indices = np.arange(0, batch_size)
    interpolated_x = cs_x(interpolated_indices)
    interpolated_y = cs_y(interpolated_indices)
    # Round the interpolated coordinates and create the dictionary
    interpolated = {i: (round(x), round(y)) for i, (x, y) in enumerate(zip(interpolated_x, interpolated_y))}
    return interpolated
 def plot_to_tensor(coordinates_dict, interpolated_dict, height, width, box_size):
    from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
    import matplotlib.patches as patches
    original_x, original_y = zip(*coordinates_dict.values())
    interpolated_x, interpolated_y = zip(*interpolated_dict.values())
    fig, ax = plt.subplots(figsize=(width/100, height/100), dpi=100)
    ax.scatter(original_x, original_y, color='blue', label='Original Points')
    ax.scatter(interpolated_x, interpolated_y, color='red', alpha=0.5, label='Interpolated Points')
    ax.plot(interpolated_x, interpolated_y, color='grey', linestyle='--', linewidth=0.5)
    # Draw a box at each interpolated coordinate
    for x, y in interpolated_dict.values():
        rect = patches.Rectangle((x - box_size/2, y - box_size/2), box_size, box_size,
                                 linewidth=1, edgecolor='green', facecolor='none')
        ax.add_patch(rect)
    ax.set_title('Interpolated Coordinates')
    ax.set_xlabel('X Coordinate')
    ax.set_ylabel('Y Coordinate')
    ax.legend()
    ax.set_xlim(0, width)  # Set the x-axis to match the input latent width
    ax.set_ylim(height, 0)  # Set the y-axis to match the input latent height, with (0,0) at top-left
    canvas = FigureCanvas(fig)
    canvas.draw()
    width, height = fig.get_size_inches() * fig.get_dpi()
    image_np = np.frombuffer(canvas.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
    image_tensor = torch.from_numpy(image_np).float() / 255.0
    image_tensor = image_tensor.unsqueeze(0)
    plt.close(fig)
    return image_tensor
 class GLIGENTextBoxApplyBatch:
    @classmethod
    def INPUT_TYPES(s):
        return {"required": {"conditioning_to": ("CONDITIONING", ),
                              "latents": ("LATENT", ),
                              "clip": ("CLIP", ),
                              "gligen_textbox_model": ("GLIGEN", ),
                              "text": ("STRING", {"multiline": True}),
                              "width": ("INT", {"default": 64, "min": 8, "max": MAX_RESOLUTION, "step": 8}),
                              "height": ("INT", {"default": 64, "min": 8, "max": MAX_RESOLUTION, "step": 8}),
                              "coordinates": ("STRING", {"multiline": True}),
                              "interpolation": (
                                [   
                                    'straight',
                                    'CubicSpline',
                                ],
                                {
                                "default": 'CubicSpline'
                                 }),
                             }}
    RETURN_TYPES = ("CONDITIONING", "IMAGE",)
    FUNCTION = "append"
    CATEGORY = "KJNodes/experimental"
    DESCRIPTION = """
 Experimental, deprecated, check the GLIGENTextBoxApplyBatchCoords instead.
 """
    def append(self, latents, conditioning_to, clip, gligen_textbox_model, text, width, height, coordinates, interpolation):
        coordinates_dict = parse_coordinates(coordinates)
        batch_size = sum(tensor.size(0) for tensor in latents.values())
        c = []
        cond, cond_pooled = clip.encode_from_tokens(clip.tokenize(text), return_pooled=True)
        # Interpolate coordinates for the entire batch
        if interpolation == 'CubicSpline':
            interpolated_coords = interpolate_coordinates_with_curves(coordinates_dict, batch_size)
        if interpolation == 'straight':
            interpolated_coords = interpolate_coordinates(coordinates_dict, batch_size)
        plot_image_tensor = plot_to_tensor(coordinates_dict, interpolated_coords, 512, 512, height)
        for t in conditioning_to:
            n = [t[0], t[1].copy()]
            position_params_batch = [[] for _ in range(batch_size)]  # Initialize a list of empty lists for each batch item
            for i in range(batch_size):
                x_position, y_position = interpolated_coords[i] 
                position_param = (cond_pooled, height // 8, width // 8, y_position // 8, x_position // 8)
                position_params_batch[i].append(position_param)  # Append position_param to the correct sublist
                print("x ",x_position, "y ", y_position)
            prev = []
            if "gligen" in n[1]:
                prev = n[1]['gligen'][2]
            else:
                prev = [[] for _ in range(batch_size)]
            # Concatenate prev and position_params_batch, ensuring both are lists of lists
            # and each sublist corresponds to a batch item
            combined_position_params = [prev_item + batch_item for prev_item, batch_item in zip(prev, position_params_batch)]
            n[1]['gligen'] = ("position_batched", gligen_textbox_model, combined_position_params)
            c.append(n)
        return (c, plot_image_tensor,)
 folder_paths.add_model_folder_path("intristic_loras", os.path.join(script_directory, "intristic_loras"))
 class Intrinsic_lora_sampling:
    def __init__(self):
        self.loaded_lora = None
    @classmethod
    def INPUT_TYPES(s):
        return {"required": { "model": ("MODEL",),
                "lora_name": (folder_paths.get_filename_list("intristic_loras"), ),
                "task": (
                [   
                    'depth map',
                    'surface normals',
                    'albedo',
                    'shading',
                ],
                {
                "default": 'depth map'
                    }),
                "text": ("STRING", {"multiline": True, "default": ""}),
                "clip": ("CLIP", ),
                "vae": ("VAE", ),
                "per_batch": ("INT", {"default": 16, "min": 1, "max": 4096, "step": 1}),
        },
            "optional": {
            "image": ("IMAGE",),
            "optional_latent": ("LATENT",),
            },
        }
    RETURN_TYPES = ("IMAGE", "LATENT",)
    FUNCTION = "onestepsample"
    CATEGORY = "KJNodes"
    DESCRIPTION = """
 Sampler to use the intrinsic loras:  
 https://github.com/duxiaodan/intrinsic-lora  
 These LoRAs are tiny and thus included  
 with this node pack.
 """
    def onestepsample(self, model, lora_name, clip, vae, text, task, per_batch, image=None, optional_latent=None):
        pbar = comfy.utils.ProgressBar(3)
        if optional_latent is None:
            image_list = []
            for start_idx in range(0, image.shape[0], per_batch):
                sub_pixels = vae.vae_encode_crop_pixels(image[start_idx:start_idx+per_batch])
                image_list.append(vae.encode(sub_pixels[:,:,:,:3]))
            sample = torch.cat(image_list, dim=0)
        else:
            sample = optional_latent["samples"]
        noise = torch.zeros(sample.size(), dtype=sample.dtype, layout=sample.layout, device="cpu")
        prompt = task + "," + text
        positive, = CLIPTextEncode.encode(self, clip, prompt)
        negative = positive #negative shouldn't do anything in this scenario
        pbar.update(1)
        #custom model sampling to pass latent through as it is
        class X0_PassThrough(comfy.model_sampling.EPS):
            def calculate_denoised(self, sigma, model_output, model_input):
                return model_output
            def calculate_input(self, sigma, noise):
                return noise
        sampling_base = comfy.model_sampling.ModelSamplingDiscrete
        sampling_type = X0_PassThrough
        class ModelSamplingAdvanced(sampling_base, sampling_type):
            pass
        model_sampling = ModelSamplingAdvanced(model.model.model_config)
        #load lora
        model_clone = model.clone()
        lora_path = folder_paths.get_full_path("intristic_loras", lora_name)        
        lora = comfy.utils.load_torch_file(lora_path, safe_load=True)
        self.loaded_lora = (lora_path, lora)
        model_clone_with_lora = comfy.sd.load_lora_for_models(model_clone, None, lora, 1.0, 0)[0]
        model_clone_with_lora.add_object_patch("model_sampling", model_sampling)
        samples = {"samples": comfy.sample.sample(model_clone_with_lora, noise, 1, 1.0, "euler", "simple", positive, negative, sample,
                                  denoise=1.0, disable_noise=True, start_step=0, last_step=1,
                                  force_full_denoise=True, noise_mask=None, callback=None, disable_pbar=True, seed=None)}
        pbar.update(1)
        decoded = []
        for start_idx in range(0, samples["samples"].shape[0], per_batch):
            decoded.append(vae.decode(samples["samples"][start_idx:start_idx+per_batch]))
        image_out = torch.cat(decoded, dim=0)
        pbar.update(1)
        if task == 'depth map':
            imax = image_out.max()
            imin = image_out.min()
            image_out = (image_out-imin)/(imax-imin)
            image_out = torch.max(image_out, dim=3, keepdim=True)[0].repeat(1, 1, 1, 3)
        elif task == 'surface normals':
            image_out = F.normalize(image_out * 2 - 1, dim=3) / 2 + 0.5
            image_out = 1.0 - image_out
        else:
            image_out = image_out.clamp(-1.,1.)
        return (image_out, samples,)
 class RemapMaskRange:
    @classmethod
@ -2842,7 +2580,6 @@ or a .txt file with RealEstate camera intrinsics and coordinates, in a 3D plot.
    def plot(self, pose_file_path, scale, base_xval, zval, use_exact_fx, relative_c2w, use_viewer, cameractrl_poses=None):
        import matplotlib as mpl
        import matplotlib.pyplot as plt
        import io
        from torchvision.transforms import ToTensor
        x_min = -2.0 * scale
@ -3052,7 +2789,6 @@ If no image is provided, mode is set to text-to-image
            args.disable_metadata = False
        import requests
        from io import BytesIO
        from torchvision import transforms
        data = {
@ -3068,7 +2804,7 @@ If no image is provided, mode is set to text-to-image
            to_pil = transforms.ToPILImage()
            pil_image = to_pil(image)
            # Save the PIL Image to a BytesIO object
-            buffer = BytesIO()
+            buffer = io.BytesIO()
            pil_image.save(buffer, format='PNG')
            buffer.seek(0)
            files = {"image": ("image.png", buffer, "image/png")}
@ -3105,7 +2841,7 @@ If no image is provided, mode is set to text-to-image
        if response.status_code == 200:
            # Convert the response content to a PIL Image
-            image = Image.open(BytesIO(response.content))
+            image = Image.open(io.BytesIO(response.content))
            # Convert the PIL Image to a PyTorch tensor
            transform = transforms.ToTensor()
            tensor_image = transform(image)