support HunyuanVideo

This commit is contained in:
LiewFeng 2024-12-24 20:56:48 +08:00
parent beb3a51d9d
commit 7c56245d3b
12 changed files with 515 additions and 52 deletions

BIN
.vs/TeaCache/v17/.wsuo Normal file

Binary file not shown.

View File

@ -0,0 +1,139 @@
{
"Version": 1,
"WorkspaceRootPath": "C:\\Users\\25142\\Documents\\TeaCache\\",
"Documents": [
{
"AbsoluteMoniker": "D:0:0:{A2FE74E1-B743-11D0-AE1A-00A0C90FFFC3}|\u003CMiscFiles\u003E|C:\\Users\\25142\\Documents\\TeaCache\\README.md||{EFC0BB08-EA7D-40C6-A696-C870411A895B}",
"RelativeMoniker": "D:0:0:{A2FE74E1-B743-11D0-AE1A-00A0C90FFFC3}|\u003CMiscFiles\u003E|solutionrelative:README.md||{EFC0BB08-EA7D-40C6-A696-C870411A895B}"
},
{
"AbsoluteMoniker": "D:0:0:{A2FE74E1-B743-11D0-AE1A-00A0C90FFFC3}|\u003CMiscFiles\u003E|C:\\Users\\25142\\Documents\\TeaCache\\eval\\teacache\\experiments\\opensora_plan.py||{3B902123-F8A7-4915-9F01-361F908088D0}",
"RelativeMoniker": "D:0:0:{A2FE74E1-B743-11D0-AE1A-00A0C90FFFC3}|\u003CMiscFiles\u003E|solutionrelative:eval\\teacache\\experiments\\opensora_plan.py||{3B902123-F8A7-4915-9F01-361F908088D0}"
},
{
"AbsoluteMoniker": "D:0:0:{A2FE74E1-B743-11D0-AE1A-00A0C90FFFC3}|\u003CMiscFiles\u003E|C:\\Users\\25142\\Documents\\TeaCache\\eval\\teacache\\experiments\\opensora.py||{3B902123-F8A7-4915-9F01-361F908088D0}",
"RelativeMoniker": "D:0:0:{A2FE74E1-B743-11D0-AE1A-00A0C90FFFC3}|\u003CMiscFiles\u003E|solutionrelative:eval\\teacache\\experiments\\opensora.py||{3B902123-F8A7-4915-9F01-361F908088D0}"
},
{
"AbsoluteMoniker": "D:0:0:{A2FE74E1-B743-11D0-AE1A-00A0C90FFFC3}|\u003CMiscFiles\u003E|C:\\Users\\25142\\Documents\\TeaCache\\eval\\teacache\\experiments\\latte.py||{3B902123-F8A7-4915-9F01-361F908088D0}",
"RelativeMoniker": "D:0:0:{A2FE74E1-B743-11D0-AE1A-00A0C90FFFC3}|\u003CMiscFiles\u003E|solutionrelative:eval\\teacache\\experiments\\latte.py||{3B902123-F8A7-4915-9F01-361F908088D0}"
},
{
"AbsoluteMoniker": "D:0:0:{A2FE74E1-B743-11D0-AE1A-00A0C90FFFC3}|\u003CMiscFiles\u003E|C:\\Users\\25142\\Documents\\TeaCache\\eval\\teacache\\experiments\\cogvideox.py||{3B902123-F8A7-4915-9F01-361F908088D0}",
"RelativeMoniker": "D:0:0:{A2FE74E1-B743-11D0-AE1A-00A0C90FFFC3}|\u003CMiscFiles\u003E|solutionrelative:eval\\teacache\\experiments\\cogvideox.py||{3B902123-F8A7-4915-9F01-361F908088D0}"
},
{
"AbsoluteMoniker": "D:0:0:{A2FE74E1-B743-11D0-AE1A-00A0C90FFFC3}|\u003CMiscFiles\u003E|C:\\Users\\25142\\Documents\\TeaCache\\TeaCache4HunyuanVideo\\teacache_sample_video.py||{3B902123-F8A7-4915-9F01-361F908088D0}",
"RelativeMoniker": "D:0:0:{A2FE74E1-B743-11D0-AE1A-00A0C90FFFC3}|\u003CMiscFiles\u003E|solutionrelative:TeaCache4HunyuanVideo\\teacache_sample_video.py||{3B902123-F8A7-4915-9F01-361F908088D0}"
},
{
"AbsoluteMoniker": "D:0:0:{A2FE74E1-B743-11D0-AE1A-00A0C90FFFC3}|\u003CMiscFiles\u003E|C:\\Users\\25142\\Documents\\TeaCache\\TeaCache4HunyuanVideo\\README.md||{EFC0BB08-EA7D-40C6-A696-C870411A895B}",
"RelativeMoniker": "D:0:0:{A2FE74E1-B743-11D0-AE1A-00A0C90FFFC3}|\u003CMiscFiles\u003E|solutionrelative:TeaCache4HunyuanVideo\\README.md||{EFC0BB08-EA7D-40C6-A696-C870411A895B}"
}
],
"DocumentGroupContainers": [
{
"Orientation": 0,
"VerticalTabListWidth": 256,
"DocumentGroups": [
{
"DockedWidth": 200,
"SelectedChildIndex": 5,
"Children": [
{
"$type": "Document",
"DocumentIndex": 1,
"Title": "opensora_plan.py",
"DocumentMoniker": "C:\\Users\\25142\\Documents\\TeaCache\\eval\\teacache\\experiments\\opensora_plan.py",
"RelativeDocumentMoniker": "eval\\teacache\\experiments\\opensora_plan.py",
"ToolTip": "C:\\Users\\25142\\Documents\\TeaCache\\eval\\teacache\\experiments\\opensora_plan.py",
"RelativeToolTip": "eval\\teacache\\experiments\\opensora_plan.py",
"ViewState": "AgIAAAAAAAAAAAAAAAAAADQCAABOAAAAAAAAAA==",
"Icon": "ae27a6b0-e345-4288-96df-5eaf394ee369.001001|",
"WhenOpened": "2024-12-24T12:45:11.23Z",
"EditorCaption": ""
},
{
"$type": "Document",
"DocumentIndex": 2,
"Title": "opensora.py",
"DocumentMoniker": "C:\\Users\\25142\\Documents\\TeaCache\\eval\\teacache\\experiments\\opensora.py",
"RelativeDocumentMoniker": "eval\\teacache\\experiments\\opensora.py",
"ToolTip": "C:\\Users\\25142\\Documents\\TeaCache\\eval\\teacache\\experiments\\opensora.py",
"RelativeToolTip": "eval\\teacache\\experiments\\opensora.py",
"ViewState": "AgIAAAAAAAAAAAAAAAAAANEAAAAAAAAAAAAAAA==",
"Icon": "ae27a6b0-e345-4288-96df-5eaf394ee369.001001|",
"WhenOpened": "2024-12-24T12:44:45.479Z",
"EditorCaption": ""
},
{
"$type": "Document",
"DocumentIndex": 3,
"Title": "latte.py",
"DocumentMoniker": "C:\\Users\\25142\\Documents\\TeaCache\\eval\\teacache\\experiments\\latte.py",
"RelativeDocumentMoniker": "eval\\teacache\\experiments\\latte.py",
"ToolTip": "C:\\Users\\25142\\Documents\\TeaCache\\eval\\teacache\\experiments\\latte.py",
"RelativeToolTip": "eval\\teacache\\experiments\\latte.py",
"ViewState": "AgIAAAAAAAAAAAAAAAAAAPYBAABOAAAAAAAAAA==",
"Icon": "ae27a6b0-e345-4288-96df-5eaf394ee369.001001|",
"WhenOpened": "2024-12-24T12:44:13.292Z",
"EditorCaption": ""
},
{
"$type": "Document",
"DocumentIndex": 5,
"Title": "teacache_sample_video.py",
"DocumentMoniker": "C:\\Users\\25142\\Documents\\TeaCache\\TeaCache4HunyuanVideo\\teacache_sample_video.py",
"RelativeDocumentMoniker": "TeaCache4HunyuanVideo\\teacache_sample_video.py",
"ToolTip": "C:\\Users\\25142\\Documents\\TeaCache\\TeaCache4HunyuanVideo\\teacache_sample_video.py",
"RelativeToolTip": "TeaCache4HunyuanVideo\\teacache_sample_video.py",
"ViewState": "AgIAAMIAAAAAAAAAAAASwNgAAAA4AAAAAAAAAA==",
"Icon": "ae27a6b0-e345-4288-96df-5eaf394ee369.001001|",
"WhenOpened": "2024-12-24T12:43:02.085Z",
"EditorCaption": ""
},
{
"$type": "Document",
"DocumentIndex": 4,
"Title": "cogvideox.py",
"DocumentMoniker": "C:\\Users\\25142\\Documents\\TeaCache\\eval\\teacache\\experiments\\cogvideox.py",
"RelativeDocumentMoniker": "eval\\teacache\\experiments\\cogvideox.py",
"ToolTip": "C:\\Users\\25142\\Documents\\TeaCache\\eval\\teacache\\experiments\\cogvideox.py",
"RelativeToolTip": "eval\\teacache\\experiments\\cogvideox.py",
"ViewState": "AgIAAAkAAAAAAAAAAAAAAM4AAAAeAAAAAAAAAA==",
"Icon": "ae27a6b0-e345-4288-96df-5eaf394ee369.001001|",
"WhenOpened": "2024-12-24T12:42:50.703Z",
"EditorCaption": ""
},
{
"$type": "Document",
"DocumentIndex": 0,
"Title": "README.md",
"DocumentMoniker": "C:\\Users\\25142\\Documents\\TeaCache\\README.md",
"RelativeDocumentMoniker": "README.md",
"ToolTip": "C:\\Users\\25142\\Documents\\TeaCache\\README.md",
"RelativeToolTip": "README.md",
"ViewState": "AgIAACcAAAAAAAAAAAAAADQAAAAAAAAAAAAAAA==",
"Icon": "ae27a6b0-e345-4288-96df-5eaf394ee369.001818|",
"WhenOpened": "2024-12-24T12:36:09.99Z",
"EditorCaption": ""
},
{
"$type": "Document",
"DocumentIndex": 6,
"Title": "README.md",
"DocumentMoniker": "C:\\Users\\25142\\Documents\\TeaCache\\TeaCache4HunyuanVideo\\README.md",
"RelativeDocumentMoniker": "TeaCache4HunyuanVideo\\README.md",
"ToolTip": "C:\\Users\\25142\\Documents\\TeaCache\\TeaCache4HunyuanVideo\\README.md",
"RelativeToolTip": "TeaCache4HunyuanVideo\\README.md",
"ViewState": "AgIAAAAAAAAAAAAAAAAAABAAAAAHAAAAAAAAAA==",
"Icon": "ae27a6b0-e345-4288-96df-5eaf394ee369.001818|",
"WhenOpened": "2024-12-24T12:35:11.041Z",
"EditorCaption": ""
}
]
}
]
}
]
}

11
.vs/VSWorkspaceState.json Normal file
View File

@ -0,0 +1,11 @@
{
"ExpandedNodes": [
"",
"\\eval",
"\\eval\\teacache",
"\\eval\\teacache\\experiments",
"\\TeaCache4HunyuanVideo"
],
"SelectedNode": "\\eval\\teacache\\experiments\\opensora_plan.py",
"PreviewInSolutionExplorer": false
}

BIN
.vs/slnx.sqlite Normal file

Binary file not shown.

View File

@ -54,13 +54,17 @@
![visualization](./assets/tisser.png)
## Latest News 🔥
- [2024/12/24] 🔥 Support [HunyuanVideo](https://github.com/Tencent/HunyuanVideo).
- [2024/12/19] 🔥 Support [CogVideoX](https://github.com/THUDM/CogVideo).
- [2024/12/06] 🎉 Release the [code](https://github.com/LiewFeng/TeaCache) TeaCache. Support [Open-Sora](https://github.com/hpcaitech/Open-Sora), [Open-Sora-Plan](https://github.com/PKU-YuanGroup/Open-Sora-Plan) and [Latte](https://github.com/Vchitect/Latte).
- [2024/12/06] 🎉 Release the [code](https://github.com/LiewFeng/TeaCache) of TeaCache. Support [Open-Sora](https://github.com/hpcaitech/Open-Sora), [Open-Sora-Plan](https://github.com/PKU-YuanGroup/Open-Sora-Plan) and [Latte](https://github.com/Vchitect/Latte).
- [2024/11/28] 🎉 Release the [paper](https://arxiv.org/abs/2411.19108) of TeaCache.
## Introduction
We introduce Timestep Embedding Aware Cache (TeaCache), a training-free caching approach that estimates and leverages the fluctuating differences among model outputs across timesteps. For more details and visual results, please visit our [project page](https://github.com/LiewFeng/TeaCache).
## TeaCache for HunyuanVideo
Please refer to [TeaCache4HunyuanVideo](./TeaCache4HunyuanVideo/README.md).
## Installation
Prerequisites:
@ -135,4 +139,4 @@ If you find TeaCache is useful in your research or applications, please consider
## Acknowledgement
This repository is built based on [VideoSys](https://github.com/NUS-HPC-AI-Lab/VideoSys), [Open-Sora](https://github.com/hpcaitech/Open-Sora), [Open-Sora-Plan](https://github.com/PKU-YuanGroup/Open-Sora-Plan), [Latte](https://github.com/Vchitect/Latte) and [CogVideoX](https://github.com/THUDM/CogVideo). Thanks for their contributions!
This repository is built based on [VideoSys](https://github.com/NUS-HPC-AI-Lab/VideoSys), [Open-Sora](https://github.com/hpcaitech/Open-Sora), [Open-Sora-Plan](https://github.com/PKU-YuanGroup/Open-Sora-Plan), [Latte](https://github.com/Vchitect/Latte), [CogVideoX](https://github.com/THUDM/CogVideo) and [HunyuanVideo](https://github.com/Tencent/HunyuanVideo). Thanks for their contributions!

View File

@ -0,0 +1,55 @@
<!-- ## **TeaCache4HunyuanVideo** -->
# TeaCache4HunyuanVideo
[TeaCache](https://github.com/LiewFeng/TeaCache) can speedup [HunyuanVideo](https://github.com/Tencent/HunyuanVideo) 2x without much visual quality degradation, in a training-free manner.
## 📈 Inference Latency Comparisons on a Single A800 GPU
| Resolution | HunyuanVideo | TeaCache (0.1) | TeaCache (0.15) |
|:---------------------:|:-------------------------:|:--------------------:|:----------------------:|
| 540p | ~18 min | ~11 min | ~8 min |
| 720p | ~50 min | ~30 min | ~23 min |
## Usage
Follow [HunyuanVideo](https://github.com/Tencent/HunyuanVideo) to clone the repo and finish the installation, then copy 'teacache_sample_video.py' in this repo to the HunyuanVideo repo.
For single-gpu inference, you can use the following command:
```bash
cd HunyuanVideo
python3 teacache_sample_video.py \
--video-size 720 1280 \
--video-length 129 \
--infer-steps 50 \
--prompt "A cat walks on the grass, realistic style." \
--flow-reverse \
--use-cpu-offload \
--save-path ./teacache_results
```
To generate a video with 8 GPUs, you can use the following command:
```bash
cd HunyuanVideo
torchrun --nproc_per_node=8 teacache_sample_video.py \
--video-size 1280 720 \
--video-length 129 \
--infer-steps 50 \
--prompt "A cat walks on the grass, realistic style." \
--flow-reverse \
--seed 42 \
--ulysses-degree 8 \
--ring-degree 1 \
--save-path ./teacache_results
```
You can modify the thresh in line 220 to obtain your desired trade-off between latency and visul quality.
## Acknowledgements
We would like to thank the contributors to the [HunyuanVideo](https://github.com/Tencent/HunyuanVideo).

View File

@ -0,0 +1,254 @@
import os
import time
from pathlib import Path
from loguru import logger
from datetime import datetime
from hyvideo.utils.file_utils import save_videos_grid
from hyvideo.config import parse_args
from hyvideo.inference import HunyuanVideoSampler
from hyvideo.modules.modulate_layers import modulate
from hyvideo.modules.attenion import attention, parallel_attention, get_cu_seqlens
from typing import Any, List, Tuple, Optional, Union, Dict
import torch
import json
import numpy as np
def teacache_forward(
self,
x: torch.Tensor,
t: torch.Tensor, # Should be in range(0, 1000).
text_states: torch.Tensor = None,
text_mask: torch.Tensor = None, # Now we don't use it.
text_states_2: Optional[torch.Tensor] = None, # Text embedding for modulation.
freqs_cos: Optional[torch.Tensor] = None,
freqs_sin: Optional[torch.Tensor] = None,
guidance: torch.Tensor = None, # Guidance for modulation, should be cfg_scale x 1000.
return_dict: bool = True,
) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:
out = {}
img = x
txt = text_states
_, _, ot, oh, ow = x.shape
tt, th, tw = (
ot // self.patch_size[0],
oh // self.patch_size[1],
ow // self.patch_size[2],
)
# Prepare modulation vectors.
vec = self.time_in(t)
# text modulation
vec = vec + self.vector_in(text_states_2)
# guidance modulation
if self.guidance_embed:
if guidance is None:
raise ValueError(
"Didn't get guidance strength for guidance distilled model."
)
# our timestep_embedding is merged into guidance_in(TimestepEmbedder)
vec = vec + self.guidance_in(guidance)
# Embed image and text.
img = self.img_in(img)
if self.text_projection == "linear":
txt = self.txt_in(txt)
elif self.text_projection == "single_refiner":
txt = self.txt_in(txt, t, text_mask if self.use_attention_mask else None)
else:
raise NotImplementedError(
f"Unsupported text_projection: {self.text_projection}"
)
txt_seq_len = txt.shape[1]
img_seq_len = img.shape[1]
# Compute cu_squlens and max_seqlen for flash attention
cu_seqlens_q = get_cu_seqlens(text_mask, img_seq_len)
cu_seqlens_kv = cu_seqlens_q
max_seqlen_q = img_seq_len + txt_seq_len
max_seqlen_kv = max_seqlen_q
freqs_cis = (freqs_cos, freqs_sin) if freqs_cos is not None else None
if self.enable_teacache:
inp = img.clone()
vec_ = vec.clone()
txt_ = txt.clone()
(
img_mod1_shift,
img_mod1_scale,
img_mod1_gate,
img_mod2_shift,
img_mod2_scale,
img_mod2_gate,
) = self.double_blocks[0].img_mod(vec_).chunk(6, dim=-1)
normed_inp = self.double_blocks[0].img_norm1(inp)
modulated_inp = modulate(
normed_inp, shift=img_mod1_shift, scale=img_mod1_scale
)
if self.cnt == 0 or self.cnt == self.num_steps-1:
should_calc = True
self.accumulated_rel_l1_distance = 0
else:
coefficients = [7.33226126e+02, -4.01131952e+02, 6.75869174e+01, -3.14987800e+00, 9.61237896e-02]
rescale_func = np.poly1d(coefficients)
self.accumulated_rel_l1_distance += rescale_func(((modulated_inp-self.previous_modulated_input).abs().mean() / self.previous_modulated_input.abs().mean()).cpu().item())
if self.accumulated_rel_l1_distance < self.rel_l1_thresh:
should_calc = False
else:
should_calc = True
self.accumulated_rel_l1_distance = 0
self.previous_modulated_input = modulated_inp
self.cnt = 0 if self.cnt == self.num_steps-1 else self.cnt + 1
if self.enable_teacache:
if not should_calc:
img += self.previous_residual
else:
ori_img = img.clone()
# --------------------- Pass through DiT blocks ------------------------
for _, block in enumerate(self.double_blocks):
double_block_args = [
img,
txt,
vec,
cu_seqlens_q,
cu_seqlens_kv,
max_seqlen_q,
max_seqlen_kv,
freqs_cis,
]
img, txt = block(*double_block_args)
# Merge txt and img to pass through single stream blocks.
x = torch.cat((img, txt), 1)
if len(self.single_blocks) > 0:
for _, block in enumerate(self.single_blocks):
single_block_args = [
x,
vec,
txt_seq_len,
cu_seqlens_q,
cu_seqlens_kv,
max_seqlen_q,
max_seqlen_kv,
(freqs_cos, freqs_sin),
]
x = block(*single_block_args)
img = x[:, :img_seq_len, ...]
self.previous_residual = img - ori_img
else:
# --------------------- Pass through DiT blocks ------------------------
for _, block in enumerate(self.double_blocks):
double_block_args = [
img,
txt,
vec,
cu_seqlens_q,
cu_seqlens_kv,
max_seqlen_q,
max_seqlen_kv,
freqs_cis,
]
img, txt = block(*double_block_args)
# Merge txt and img to pass through single stream blocks.
x = torch.cat((img, txt), 1)
if len(self.single_blocks) > 0:
for _, block in enumerate(self.single_blocks):
single_block_args = [
x,
vec,
txt_seq_len,
cu_seqlens_q,
cu_seqlens_kv,
max_seqlen_q,
max_seqlen_kv,
(freqs_cos, freqs_sin),
]
x = block(*single_block_args)
img = x[:, :img_seq_len, ...]
# ---------------------------- Final layer ------------------------------
img = self.final_layer(img, vec) # (N, T, patch_size ** 2 * out_channels)
img = self.unpatchify(img, tt, th, tw)
if return_dict:
out["x"] = img
return out
return img
def main():
args = parse_args()
print(args)
models_root_path = Path(args.model_base)
if not models_root_path.exists():
raise ValueError(f"`models_root` not exists: {models_root_path}")
# Create save folder to save the samples
save_path = args.save_path if args.save_path_suffix=="" else f'{args.save_path}_{args.save_path_suffix}'
if not os.path.exists(args.save_path):
os.makedirs(save_path, exist_ok=True)
# Load models
hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(models_root_path, args=args)
# Get the updated args
args = hunyuan_video_sampler.args
# TeaCache
hunyuan_video_sampler.pipeline.transformer.__class__.enable_teacache = True
hunyuan_video_sampler.pipeline.transformer.__class__.cnt = 0
hunyuan_video_sampler.pipeline.transformer.__class__.num_steps = args.infer_steps - 1
hunyuan_video_sampler.pipeline.transformer.__class__.rel_l1_thresh = 0.15 # 0.1 for 1.6x speedup, 0.15 for 2.1x speedup
hunyuan_video_sampler.pipeline.transformer.__class__.accumulated_rel_l1_distance = 0
hunyuan_video_sampler.pipeline.transformer.__class__.previous_modulated_input = None
hunyuan_video_sampler.pipeline.transformer.__class__.previous_residual = None
hunyuan_video_sampler.pipeline.transformer.__class__.forward = teacache_forward
# Start sampling
# TODO: batch inference check
outputs = hunyuan_video_sampler.predict(
prompt=args.prompt,
height=args.video_size[0],
width=args.video_size[1],
video_length=args.video_length,
seed=args.seed,
negative_prompt=args.neg_prompt,
infer_steps=args.infer_steps,
guidance_scale=args.cfg_scale,
num_videos_per_prompt=args.num_videos,
flow_shift=args.flow_shift,
batch_size=args.batch_size,
embedded_guidance_scale=args.embedded_cfg_scale
)
samples = outputs['samples']
# Save samples
if 'LOCAL_RANK' not in os.environ or int(os.environ['LOCAL_RANK']) == 0:
for i, sample in enumerate(samples):
sample = samples[i].unsqueeze(0)
time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%H:%M:%S")
save_path = f"{save_path}/{time_flag}_seed{outputs['seeds'][i]}_{outputs['prompts'][i][:100].replace('/','')}.mp4"
save_videos_grid(sample, save_path, fps=24)
logger.info(f'Sample save to: {save_path}')
if __name__ == "__main__":
main()

View File

@ -194,25 +194,25 @@ def teacache_forward(
def eval_teacache_slow(prompt_list):
config = CogVideoXConfig()
engine = VideoSysEngine(config)
engine.driver_worker.transformer.enable_teacache = True
engine.driver_worker.transformer.rel_l1_thresh = 0.1
engine.driver_worker.transformer.accumulated_rel_l1_distance = 0
engine.driver_worker.transformer.previous_modulated_input = None
engine.driver_worker.transformer.previous_residual = None
engine.driver_worker.transformer.previous_residual_encoder = None
engine.driver_worker.transformer.__class__.forward = teacache_forward
engine.driver_worker.transformer.__class__.enable_teacache = True
engine.driver_worker.transformer.__class__.rel_l1_thresh = 0.1
engine.driver_worker.transformer.__class__.accumulated_rel_l1_distance = 0
engine.driver_worker.transformer.__class__.previous_modulated_input = None
engine.driver_worker.transformer.__class__.previous_residual = None
engine.driver_worker.transformer.__class__.previous_residual_encoder = None
engine.driver_worker.transformer.__class__.__class__.forward = teacache_forward
generate_func(engine, prompt_list, "./samples/cogvideox_teacache_slow", loop=5)
def eval_teacache_fast(prompt_list):
config = CogVideoXConfig()
engine = VideoSysEngine(config)
engine.driver_worker.transformer.enable_teacache = True
engine.driver_worker.transformer.rel_l1_thresh = 0.2
engine.driver_worker.transformer.accumulated_rel_l1_distance = 0
engine.driver_worker.transformer.previous_modulated_input = None
engine.driver_worker.transformer.previous_residual = None
engine.driver_worker.transformer.previous_residual_encoder = None
engine.driver_worker.transformer.__class__.forward = teacache_forward
engine.driver_worker.transformer.__class__.enable_teacache = True
engine.driver_worker.transformer.__class__.rel_l1_thresh = 0.2
engine.driver_worker.transformer.__class__.accumulated_rel_l1_distance = 0
engine.driver_worker.transformer.__class__.previous_modulated_input = None
engine.driver_worker.transformer.__class__.previous_residual = None
engine.driver_worker.transformer.__class__.previous_residual_encoder = None
engine.driver_worker.transformer.__class__.__class__.forward = teacache_forward
generate_func(engine, prompt_list, "./samples/cogvideox_teacache_fast", loop=5)

View File

@ -497,23 +497,23 @@ def teacache_forward(
def eval_teacache_slow(prompt_list):
config = LatteConfig()
engine = VideoSysEngine(config)
engine.driver_worker.transformer.enable_teacache = True
engine.driver_worker.transformer.rel_l1_thresh = 0.1
engine.driver_worker.transformer.accumulated_rel_l1_distance = 0
engine.driver_worker.transformer.previous_modulated_input = None
engine.driver_worker.transformer.previous_residual = None
engine.driver_worker.transformer.__class__.forward = teacache_forward
engine.driver_worker.transformer.__class__.enable_teacache = True
engine.driver_worker.transformer.__class__.rel_l1_thresh = 0.1
engine.driver_worker.transformer.__class__.accumulated_rel_l1_distance = 0
engine.driver_worker.transformer.__class__.previous_modulated_input = None
engine.driver_worker.transformer.__class__.previous_residual = None
engine.driver_worker.transformer.__class__.__class__.forward = teacache_forward
generate_func(engine, prompt_list, "./samples/latte_teacache_slow", loop=5)
def eval_teacache_fast(prompt_list):
config = LatteConfig()
engine = VideoSysEngine(config)
engine.driver_worker.transformer.enable_teacache = True
engine.driver_worker.transformer.rel_l1_thresh = 0.2
engine.driver_worker.transformer.accumulated_rel_l1_distance = 0
engine.driver_worker.transformer.previous_modulated_input = None
engine.driver_worker.transformer.previous_residual = None
engine.driver_worker.transformer.__class__.forward = teacache_forward
engine.driver_worker.transformer.__class__.enable_teacache = True
engine.driver_worker.transformer.__class__.rel_l1_thresh = 0.2
engine.driver_worker.transformer.__class__.accumulated_rel_l1_distance = 0
engine.driver_worker.transformer.__class__.previous_modulated_input = None
engine.driver_worker.transformer.__class__.previous_residual = None
engine.driver_worker.transformer.__class__.__class__.forward = teacache_forward
generate_func(engine, prompt_list, "./samples/latte_teacache_fast", loop=5)

View File

@ -211,23 +211,23 @@ def eval_base(prompt_list):
def eval_teacache_slow(prompt_list):
config = OpenSoraConfig()
engine = VideoSysEngine(config)
engine.driver_worker.transformer.enable_teacache = True
engine.driver_worker.transformer.rel_l1_thresh = 0.1
engine.driver_worker.transformer.accumulated_rel_l1_distance = 0
engine.driver_worker.transformer.previous_modulated_input = None
engine.driver_worker.transformer.previous_residual = None
engine.driver_worker.transformer.__class__.forward = teacache_forward
engine.driver_worker.transformer.__class__.enable_teacache = True
engine.driver_worker.transformer.__class__.rel_l1_thresh = 0.1
engine.driver_worker.transformer.__class__.accumulated_rel_l1_distance = 0
engine.driver_worker.transformer.__class__.previous_modulated_input = None
engine.driver_worker.transformer.__class__.previous_residual = None
engine.driver_worker.transformer.__class__.__class__.forward = teacache_forward
generate_func(engine, prompt_list, "./samples/opensora_teacache_slow", loop=5)
def eval_teacache_fast(prompt_list):
config = OpenSoraConfig()
engine = VideoSysEngine(config)
engine.driver_worker.transformer.enable_teacache = True
engine.driver_worker.transformer.rel_l1_thresh = 0.2
engine.driver_worker.transformer.accumulated_rel_l1_distance = 0
engine.driver_worker.transformer.previous_modulated_input = None
engine.driver_worker.transformer.previous_residual = None
engine.driver_worker.transformer.__class__.forward = teacache_forward
engine.driver_worker.transformer.__class__.enable_teacache = True
engine.driver_worker.transformer.__class__.rel_l1_thresh = 0.2
engine.driver_worker.transformer.__class__.accumulated_rel_l1_distance = 0
engine.driver_worker.transformer.__class__.previous_modulated_input = None
engine.driver_worker.transformer.__class__.previous_residual = None
engine.driver_worker.transformer.__class__.__class__.forward = teacache_forward
generate_func(engine, prompt_list, "./samples/opensora_teacache_fast", loop=5)

View File

@ -560,23 +560,23 @@ def teacache_forward(
def eval_teacache_slow(prompt_list):
config = OpenSoraPlanConfig(version="v110", transformer_type="65x512x512")
engine = VideoSysEngine(config)
engine.driver_worker.transformer.enable_teacache = True
engine.driver_worker.transformer.rel_l1_thresh = 0.1
engine.driver_worker.transformer.accumulated_rel_l1_distance = 0
engine.driver_worker.transformer.previous_modulated_input = None
engine.driver_worker.transformer.previous_residual = None
engine.driver_worker.transformer.__class__.forward = teacache_forward
engine.driver_worker.transformer.__class__.enable_teacache = True
engine.driver_worker.transformer.__class__.rel_l1_thresh = 0.1
engine.driver_worker.transformer.__class__.accumulated_rel_l1_distance = 0
engine.driver_worker.transformer.__class__.previous_modulated_input = None
engine.driver_worker.transformer.__class__.previous_residual = None
engine.driver_worker.transformer.__class__.__class__.forward = teacache_forward
generate_func(engine, prompt_list, "./samples/opensoraplan_teacache_slow", loop=5)
def eval_teacache_fast(prompt_list):
config = OpenSoraPlanConfig(version="v110", transformer_type="65x512x512")
engine = VideoSysEngine(config)
engine.driver_worker.transformer.enable_teacache = True
engine.driver_worker.transformer.rel_l1_thresh = 0.2
engine.driver_worker.transformer.accumulated_rel_l1_distance = 0
engine.driver_worker.transformer.previous_modulated_input = None
engine.driver_worker.transformer.previous_residual = None
engine.driver_worker.transformer.__class__.forward = teacache_forward
engine.driver_worker.transformer.__class__.enable_teacache = True
engine.driver_worker.transformer.__class__.rel_l1_thresh = 0.2
engine.driver_worker.transformer.__class__.accumulated_rel_l1_distance = 0
engine.driver_worker.transformer.__class__.previous_modulated_input = None
engine.driver_worker.transformer.__class__.previous_residual = None
engine.driver_worker.transformer.__class__.__class__.forward = teacache_forward
generate_func(engine, prompt_list, "./samples/opensoraplan_teacache_fast", loop=5)