add smaller resolutions

2025-12-24 20:34:24 +08:00 · 2024-09-19 02:05:52 +03:00 · 2024-09-19 02:05:52 +03:00 · 818e31d2d2
commit 818e31d2d2
parent 14515acd4a
2 changed files with 14 additions and 4 deletions
--- a/nodes.py
+++ b/nodes.py
@ -242,6 +242,7 @@ class CogVideoTextEncode:
            },
            "optional": {
                "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                "force_offload": ("BOOLEAN", {"default": True}),
            }
        }

@ -250,7 +251,7 @@ class CogVideoTextEncode:
    FUNCTION = "process"
    CATEGORY = "CogVideoWrapper"

-    def process(self, clip, prompt, strength=1.0):
+    def process(self, clip, prompt, strength=1.0, force_offload=True):
        load_device = mm.text_encoder_device()
        offload_device = mm.text_encoder_offload_device()
        clip.tokenizer.t5xxl.pad_to_max_length = True
@ -260,6 +261,7 @@ class CogVideoTextEncode:

        embeds = clip.encode_from_tokens(tokens, return_pooled=False, return_dict=False)
        embeds *= strength
+        if force_offload:
            clip.cond_stage_model.to(offload_device)

        return (embeds, )
@ -500,6 +502,10 @@ class CogVideoXFunSampler:
                "video_length": ("INT", {"default": 49, "min": 5, "max": 49, "step": 4}),
                "base_resolution": (
                    [ 
+                        256,
+                        320,
+                        384,
+                        448,
                        512,
                        768,
                        960,
@ -622,6 +628,10 @@ class CogVideoXFunVid2VidSampler:
                "video_length": ("INT", {"default": 49, "min": 5, "max": 49, "step": 4}),
                "base_resolution": (
                    [ 
+                        256,
+                        320,
+                        384,
+                        448,
                        512,
                        768,
                        960,
--- a/pipeline_cogvideox.py
+++ b/pipeline_cogvideox.py
@ -434,7 +434,7 @@ class CogVideoXPipeline(DiffusionPipeline):
            num_frames,
            height,
            width,
-            prompt_embeds.dtype,
+            self.vae.dtype,
            device,
            generator,
            timesteps,
@ -454,7 +454,7 @@ class CogVideoXPipeline(DiffusionPipeline):
                height // self.vae_scale_factor_spatial,
                width // self.vae_scale_factor_spatial,
            )
-            latent_padding = torch.zeros(padding_shape, device=device, dtype=self.transformer.dtype)
+            latent_padding = torch.zeros(padding_shape, device=device, dtype=self.vae.dtype)
            image_cond_latents = torch.cat([image_cond_latents, latent_padding], dim=1)
       
        # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline