From 389fb0323fa1fbe0328b8b421efb4ee8919644e1 Mon Sep 17 00:00:00 2001 From: sko00o Date: Thu, 31 Jul 2025 14:59:28 +0800 Subject: [PATCH 1/2] update diffusers>=0.33.1 --- embeddings.py | 3 ++- requirements.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/embeddings.py b/embeddings.py index 111ba04..618f478 100644 --- a/embeddings.py +++ b/embeddings.py @@ -67,8 +67,9 @@ class CogVideoXPatchEmbed(nn.Module): post_time_compression_frames, self.spatial_interpolation_scale, self.temporal_interpolation_scale, + output_type="pt", ) - pos_embedding = torch.from_numpy(pos_embedding).flatten(0, 1) + pos_embedding = pos_embedding.flatten(0, 1) joint_pos_embedding = torch.zeros( 1, self.max_text_seq_length + num_patches, self.embed_dim, requires_grad=False ) diff --git a/requirements.txt b/requirements.txt index 8ab8109..287e442 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ huggingface_hub -diffusers>=0.31.0 +diffusers>=0.33.1 accelerate>=0.33.0 einops peft From 881bbbf6c9d948e1bfacbabc8940a9201875dff1 Mon Sep 17 00:00:00 2001 From: sko00o Date: Thu, 31 Jul 2025 15:01:48 +0800 Subject: [PATCH 2/2] Added validation for `max_size` parameter in `get_3d_rotary_pos_embed` function when `grid_type` is set to 'slice'. --- embeddings.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/embeddings.py b/embeddings.py index 618f478..fb993d3 100644 --- a/embeddings.py +++ b/embeddings.py @@ -174,6 +174,8 @@ def get_3d_rotary_pos_embed( grid_t = np.arange(temporal_size, dtype=np.float32) grid_t = np.linspace(0, temporal_size, temporal_size, endpoint=False, dtype=np.float32) elif grid_type == "slice": + if max_size is None: + raise ValueError("`max_size` must be provided when `grid_type` is 'slice'") max_h, max_w = max_size grid_size_h, grid_size_w = grid_size grid_h = np.arange(max_h, dtype=np.float32)