From d61894be21ba2f035c0192db5d31549a0bcfd479 Mon Sep 17 00:00:00 2001 From: kijai <40791699+kijai@users.noreply.github.com> Date: Thu, 2 May 2024 02:34:57 +0300 Subject: [PATCH] Gligen node updates --- __init__.py | 2 +- nodes/curve_nodes.py | 73 +++++++++++++++++++++++++++++++------------- nodes/nodes.py | 6 ++-- 3 files changed, 54 insertions(+), 27 deletions(-) diff --git a/__init__.py b/__init__.py index bc0f998..0cac1c9 100644 --- a/__init__.py +++ b/__init__.py @@ -175,7 +175,7 @@ NODE_DISPLAY_NAME_MAPPINGS = { "ImageTransformByNormalizedAmplitude": "ImageTransformByNormalizedAmplitude", "GetLatentsFromBatchIndexed": "GetLatentsFromBatchIndexed", "StringConstant": "StringConstant", - "GLIGENTextBoxApplyBatch": "GLIGENTextBoxApplyBatch", + "GLIGENTextBoxApplyBatch": "GLIGENTextBoxApplyBatch (deprecated)", "CondPassThrough": "CondPassThrough", "ImageUpscaleWithModelBatched": "ImageUpscaleWithModelBatched", "ScaleBatchPromptSchedule": "ScaleBatchPromptSchedule", diff --git a/nodes/curve_nodes.py b/nodes/curve_nodes.py index ae1ca6c..d44ca32 100644 --- a/nodes/curve_nodes.py +++ b/nodes/curve_nodes.py @@ -497,38 +497,61 @@ class GLIGENTextBoxApplyBatchCoords: "gligen_textbox_model": ("GLIGEN", ), "coordinates": ("STRING", {"forceInput": True}), "text": ("STRING", {"multiline": True}), - "width": ("INT", {"default": 64, "min": 8, "max": 4096, "step": 8}), - "height": ("INT", {"default": 64, "min": 8, "max": 4096, "step": 8}), + "width": ("INT", {"default": 128, "min": 8, "max": 4096, "step": 8}), + "height": ("INT", {"default": 128, "min": 8, "max": 4096, "step": 8}), }, + "optional": {"size_multiplier": ("FLOAT", {"default": [1.0], "forceInput": True})}, } RETURN_TYPES = ("CONDITIONING", "IMAGE", ) + RETURN_NAMES = ("conditioning", "coord_preview", ) FUNCTION = "append" CATEGORY = "KJNodes/experimental" DESCRIPTION = """ -Experimental, does not function yet as ComfyUI base changes are needed +This node allows scheduling GLIGEN text box positions in a batch, +to be used with AnimateDiff-Evolved. Intended to pair with the +Spline Editor -node. + +GLIGEN model can be downloaded through the Manage's "Install Models" menu. +Or directly from here: +https://huggingface.co/comfyanonymous/GLIGEN_pruned_safetensors/tree/main + +Inputs: +- **latents** input is used to calculate batch size +- **clip** is your standard text encoder, use same as for the main prompt +- **gligen_textbox_model** connects to GLIGEN Loader +- **coordinates** takes a json string of points, directly compatible +with the spline editor node. +- **text** is the part of the prompt to set position for +- **width** and **height** are the size of the GLIGEN bounding box + +Outputs: +- **conditioning** goes between to clip text encode and the sampler +- **coord_preview** is an optional preview of the coordinates and +bounding boxes. + """ - def append(self, latents, coordinates, conditioning_to, clip, gligen_textbox_model, text, width, height): + def append(self, latents, coordinates, conditioning_to, clip, gligen_textbox_model, text, width, height, size_multiplier=[1.0]): coordinates = json.loads(coordinates.replace("'", '"')) coordinates = [(coord['x'], coord['y']) for coord in coordinates] batch_size = sum(tensor.size(0) for tensor in latents.values()) - assert len(coordinates) == batch_size, "The number of coordinates does not match the number of latents" - c = [] - cond, cond_pooled = clip.encode_from_tokens(clip.tokenize(text), return_pooled=True) + if len(coordinates) != batch_size: + print("GLIGENTextBoxApplyBatchCoords WARNING: The number of coordinates does not match the number of latents") - image_height = latents['samples'].shape[-2] * 8 - image_width = latents['samples'].shape[-1] * 8 - plot_image_tensor = self.plot_coordinates_to_tensor(coordinates, image_height, image_width, height, text) + c = [] + _, cond_pooled = clip.encode_from_tokens(clip.tokenize(text), return_pooled=True) for t in conditioning_to: n = [t[0], t[1].copy()] position_params_batch = [[] for _ in range(batch_size)] # Initialize a list of empty lists for each batch item - + if len(size_multiplier) != batch_size: + size_multiplier = size_multiplier * (batch_size // len(size_multiplier)) + size_multiplier[:batch_size % len(size_multiplier)] + for i in range(batch_size): - x_position, y_position = coordinates[i] - position_param = (cond_pooled, height // 8, width // 8, y_position // 8, x_position // 8) + x_position, y_position = coordinates[i] + position_param = (cond_pooled, int((height // 8) * size_multiplier[i]), int((width // 8) * size_multiplier[i]), y_position // 8, x_position // 8) position_params_batch[i].append(position_param) # Append position_param to the correct sublist prev = [] @@ -541,35 +564,41 @@ Experimental, does not function yet as ComfyUI base changes are needed combined_position_params = [prev_item + batch_item for prev_item, batch_item in zip(prev, position_params_batch)] n[1]['gligen'] = ("position_batched", gligen_textbox_model, combined_position_params) c.append(n) + + image_height = latents['samples'].shape[-2] * 8 + image_width = latents['samples'].shape[-1] * 8 + plot_image_tensor = self.plot_coordinates_to_tensor(coordinates, image_height, image_width, height, size_multiplier, text) return (c, plot_image_tensor,) - def plot_coordinates_to_tensor(self, coordinates, height, width, box_size, prompt): + def plot_coordinates_to_tensor(self, coordinates, height, width, bbox_height, size_multiplier, prompt): import matplotlib matplotlib.use('Agg') from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas - # Convert coordinates to separate x and y lists - #x_coords, y_coords = zip(*coordinates) - fig, ax = matplotlib.pyplot.subplots(figsize=(width/100, height/100), dpi=100) - #ax.scatter(x_coords, y_coords, color='yellow', label='_nolegend_') + cmap = matplotlib.pyplot.get_cmap('rainbow') # Draw a box at each coordinate - for x, y in coordinates: + for i, ((x, y), size) in enumerate(zip(coordinates, size_multiplier)): + color_index = i / (len(coordinates) - 1) + color = cmap(color_index) + box_size = bbox_height * size rect = matplotlib.patches.Rectangle((x - box_size/2, y - box_size/2), box_size, box_size, - linewidth=1, edgecolor='green', facecolor='none', alpha=0.5) + linewidth=1, edgecolor=color, facecolor='none', alpha=0.5) ax.add_patch(rect) - # Draw arrows from one point to another to indicate direction + # Draw arrows from one point to another to indicate direction for i in range(len(coordinates) - 1): + color_index = i / (len(coordinates) - 1) + color = cmap(color_index) x1, y1 = coordinates[i] x2, y2 = coordinates[i + 1] ax.annotate("", xy=(x2, y2), xytext=(x1, y1), arrowprops=dict(arrowstyle="->", linestyle="-", lw=1, - color='orange', + color=color, mutation_scale=10)) matplotlib.pyplot.rcParams['text.color'] = '#999999' fig.patch.set_facecolor('#353535') diff --git a/nodes/nodes.py b/nodes/nodes.py index 1e3868f..4c1b815 100644 --- a/nodes/nodes.py +++ b/nodes/nodes.py @@ -2545,7 +2545,7 @@ class GLIGENTextBoxApplyBatch: FUNCTION = "append" CATEGORY = "KJNodes/experimental" DESCRIPTION = """ -Experimental, does not function yet as ComfyUI base changes are needed +Experimental, deprecated, check the GLIGENTextBoxApplyBatchCoords instead. """ def append(self, latents, conditioning_to, clip, gligen_textbox_model, text, width, height, coordinates, interpolation): @@ -2580,13 +2580,11 @@ Experimental, does not function yet as ComfyUI base changes are needed # Concatenate prev and position_params_batch, ensuring both are lists of lists # and each sublist corresponds to a batch item combined_position_params = [prev_item + batch_item for prev_item, batch_item in zip(prev, position_params_batch)] - n[1]['gligen'] = ("position", gligen_textbox_model, combined_position_params) + n[1]['gligen'] = ("position_batched", gligen_textbox_model, combined_position_params) c.append(n) return (c, plot_image_tensor,) - - folder_paths.add_model_folder_path("intristic_loras", os.path.join(script_directory, "intristic_loras")) class Intrinsic_lora_sampling: