From 573150de283d727ab7b904ddbc774cbb9e186db1 Mon Sep 17 00:00:00 2001 From: kijai <40791699+kijai@users.noreply.github.com> Date: Wed, 20 Nov 2024 16:41:34 +0200 Subject: [PATCH] fix Tora when no autocast --- pipeline_cogvideox.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pipeline_cogvideox.py b/pipeline_cogvideox.py index c7e4545..59269b2 100644 --- a/pipeline_cogvideox.py +++ b/pipeline_cogvideox.py @@ -571,7 +571,7 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin): # raise ValueError(f"Tora trajectory length {trajectory_length} does not match inpaint_latents count {latents.shape[2]}") for module in self.transformer.fuser_list: for param in module.parameters(): - param.data = param.data.to(device) + param.data = param.data.to(self.vae_dtype).to(device) logger.info(f"Sampling {num_frames} frames in {latent_frames} latent frames at {width}x{height} with {num_inference_steps} inference steps") @@ -733,8 +733,6 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin): # broadcast to batch dimension in a way that's compatible with ONNX/Core ML timestep = t.expand(latent_model_input.shape[0]) - - if controlnet is not None: controlnet_states = None if (control_start <= current_step_percentage <= control_end): @@ -752,7 +750,6 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin): else: controlnet_states = controlnet_states.to(dtype=self.vae_dtype) - # predict noise model_output noise_pred = self.transformer( hidden_states=latent_model_input,