diff --git a/embeddings.py b/embeddings.py index 111ba04..618f478 100644 --- a/embeddings.py +++ b/embeddings.py @@ -67,8 +67,9 @@ class CogVideoXPatchEmbed(nn.Module): post_time_compression_frames, self.spatial_interpolation_scale, self.temporal_interpolation_scale, + output_type="pt", ) - pos_embedding = torch.from_numpy(pos_embedding).flatten(0, 1) + pos_embedding = pos_embedding.flatten(0, 1) joint_pos_embedding = torch.zeros( 1, self.max_text_seq_length + num_patches, self.embed_dim, requires_grad=False ) diff --git a/requirements.txt b/requirements.txt index 8ab8109..287e442 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ huggingface_hub -diffusers>=0.31.0 +diffusers>=0.33.1 accelerate>=0.33.0 einops peft