Merge pull request #261 from Dango233/Dango233-patch-1

Fix fused sdpa
2025-12-08 20:34:23 +08:00 · 2024-11-20 12:37:26 +02:00 · 2024-11-20 12:37:26 +02:00 · b9f7b6e338
commit b9f7b6e338
parent ce329e0dce b31a025673
1 changed files with 2 additions and 2 deletions
--- a/custom_cogvideox_transformer_3d.py
+++ b/custom_cogvideox_transformer_3d.py
@ -128,7 +128,7 @@ class CogVideoXAttnProcessor2_0:
        if attention_mode == "sageattn" or attention_mode == "fused_sageattn":
            hidden_states = sageattn_func(query, key, value, attn_mask=attention_mask, dropout_p=0.0,is_causal=False)
            hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
-        elif attention_mode == "sdpa":
+        elif attention_mode == "sdpa" or attention_mode == "fused_sdpa":
            hidden_states = F.scaled_dot_product_attention(
                query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
                )
@ -751,4 +751,4 @@ class CogVideoXTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
        if not return_dict:
            return (output,)
        return Transformer2DModelOutput(sample=output)
-    
+