This commit is contained in:
kijai 2024-11-11 01:19:11 +02:00
parent 184097e78e
commit ca63f5dade
3 changed files with 7 additions and 3 deletions

View File

@ -571,11 +571,10 @@ class CogVideoXTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
# 2. Patch embedding
p = self.config.patch_size
p_t = self.config.patch_size_t
# We know that the hidden states height and width will always be divisible by patch_size.
# But, the number of frames may not be divisible by patch_size_t. So, we pad with the beginning frames.
if p_t is not None:
remaining_frames = p_t - num_frames % p_t
remaining_frames = 0 if num_frames % 2 == 0 else 1
first_frame = hidden_states[:, :1].repeat(1, 1 + remaining_frames, 1, 1, 1)
hidden_states = torch.cat([first_frame, hidden_states[:, 1:]], dim=1)

View File

@ -263,7 +263,8 @@ class DownloadAndLoadCogVideoModel:
pipe.set_adapters(adapter_list, adapter_weights=adapter_weights)
if fuse:
lora_scale = 1
if "dimensionx" in lora[-1]["path"].lower():
dimension_loras = ["orbit_left_lora", "dimensionx"] # for now dimensionx loras need scaling
if any(item in lora[-1]["path"].lower() for item in dimension_loras):
lora_scale = lora_scale / lora_rank
pipe.fuse_lora(lora_scale=lora_scale, components=["transformer"])

View File

@ -828,6 +828,10 @@ class CogVideoSampler:
num_frames == 49 or
context_options is not None
), "1.0 I2V model can only do 49 frames"
if image_cond_latents is not None:
assert "I2V" in pipeline.get("model_name", ""), "Image condition latents only supported for I2V models"
else:
assert "I2V" not in pipeline.get("model_name", ""), "Image condition latents required for I2V models"
device = mm.get_torch_device()
offload_device = mm.unet_offload_device()