From 0a121dba53d088b6d018577d3ce77292445ada3c Mon Sep 17 00:00:00 2001
From: kijai <40791699+kijai@users.noreply.github.com>
Date: Tue, 12 Nov 2024 07:39:00 +0200
Subject: [PATCH] fix FasterCache

---
 custom_cogvideox_transformer_3d.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/custom_cogvideox_transformer_3d.py b/custom_cogvideox_transformer_3d.py
index 10b9e4f..47b9488 100644
--- a/custom_cogvideox_transformer_3d.py
+++ b/custom_cogvideox_transformer_3d.py
@@ -630,11 +630,11 @@ class CogVideoXTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
             #   - However, for CogVideoX-5b-I2V also takes concatenated input image latents (number of input channels is twice the output channels)
             
             if p_t is None:
-                output = hidden_states.reshape(batch_size, num_frames, height // p, width // p, -1, p, p)
+                output = hidden_states.reshape(1, num_frames, height // p, width // p, -1, p, p)
                 output = output.permute(0, 1, 4, 2, 5, 3, 6).flatten(5, 6).flatten(3, 4)
             else:
                 output = hidden_states.reshape(
-                    batch_size, (num_frames + p_t - 1) // p_t, height // p, width // p, -1, p_t, p, p
+                    1, (num_frames + p_t - 1) // p_t, height // p, width // p, -1, p_t, p, p
                 )
                 output = output.permute(0, 1, 5, 4, 2, 6, 3, 7).flatten(6, 7).flatten(4, 5).flatten(1, 2)