add smaller resolutions

This commit is contained in:
kijai 2024-09-19 02:05:52 +03:00
parent 14515acd4a
commit 818e31d2d2
2 changed files with 14 additions and 4 deletions

View File

@ -242,6 +242,7 @@ class CogVideoTextEncode:
}, },
"optional": { "optional": {
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
"force_offload": ("BOOLEAN", {"default": True}),
} }
} }
@ -250,7 +251,7 @@ class CogVideoTextEncode:
FUNCTION = "process" FUNCTION = "process"
CATEGORY = "CogVideoWrapper" CATEGORY = "CogVideoWrapper"
def process(self, clip, prompt, strength=1.0): def process(self, clip, prompt, strength=1.0, force_offload=True):
load_device = mm.text_encoder_device() load_device = mm.text_encoder_device()
offload_device = mm.text_encoder_offload_device() offload_device = mm.text_encoder_offload_device()
clip.tokenizer.t5xxl.pad_to_max_length = True clip.tokenizer.t5xxl.pad_to_max_length = True
@ -260,7 +261,8 @@ class CogVideoTextEncode:
embeds = clip.encode_from_tokens(tokens, return_pooled=False, return_dict=False) embeds = clip.encode_from_tokens(tokens, return_pooled=False, return_dict=False)
embeds *= strength embeds *= strength
clip.cond_stage_model.to(offload_device) if force_offload:
clip.cond_stage_model.to(offload_device)
return (embeds, ) return (embeds, )
@ -500,6 +502,10 @@ class CogVideoXFunSampler:
"video_length": ("INT", {"default": 49, "min": 5, "max": 49, "step": 4}), "video_length": ("INT", {"default": 49, "min": 5, "max": 49, "step": 4}),
"base_resolution": ( "base_resolution": (
[ [
256,
320,
384,
448,
512, 512,
768, 768,
960, 960,
@ -622,6 +628,10 @@ class CogVideoXFunVid2VidSampler:
"video_length": ("INT", {"default": 49, "min": 5, "max": 49, "step": 4}), "video_length": ("INT", {"default": 49, "min": 5, "max": 49, "step": 4}),
"base_resolution": ( "base_resolution": (
[ [
256,
320,
384,
448,
512, 512,
768, 768,
960, 960,

View File

@ -434,7 +434,7 @@ class CogVideoXPipeline(DiffusionPipeline):
num_frames, num_frames,
height, height,
width, width,
prompt_embeds.dtype, self.vae.dtype,
device, device,
generator, generator,
timesteps, timesteps,
@ -454,7 +454,7 @@ class CogVideoXPipeline(DiffusionPipeline):
height // self.vae_scale_factor_spatial, height // self.vae_scale_factor_spatial,
width // self.vae_scale_factor_spatial, width // self.vae_scale_factor_spatial,
) )
latent_padding = torch.zeros(padding_shape, device=device, dtype=self.transformer.dtype) latent_padding = torch.zeros(padding_shape, device=device, dtype=self.vae.dtype)
image_cond_latents = torch.cat([image_cond_latents, latent_padding], dim=1) image_cond_latents = torch.cat([image_cond_latents, latent_padding], dim=1)
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline