mirror of
https://git.datalinker.icu/kijai/ComfyUI-CogVideoXWrapper.git
synced 2025-12-10 05:14:22 +08:00
update
This commit is contained in:
parent
184097e78e
commit
ca63f5dade
@ -571,11 +571,10 @@ class CogVideoXTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
|
|||||||
# 2. Patch embedding
|
# 2. Patch embedding
|
||||||
p = self.config.patch_size
|
p = self.config.patch_size
|
||||||
p_t = self.config.patch_size_t
|
p_t = self.config.patch_size_t
|
||||||
|
|
||||||
# We know that the hidden states height and width will always be divisible by patch_size.
|
# We know that the hidden states height and width will always be divisible by patch_size.
|
||||||
# But, the number of frames may not be divisible by patch_size_t. So, we pad with the beginning frames.
|
# But, the number of frames may not be divisible by patch_size_t. So, we pad with the beginning frames.
|
||||||
if p_t is not None:
|
if p_t is not None:
|
||||||
remaining_frames = p_t - num_frames % p_t
|
remaining_frames = 0 if num_frames % 2 == 0 else 1
|
||||||
first_frame = hidden_states[:, :1].repeat(1, 1 + remaining_frames, 1, 1, 1)
|
first_frame = hidden_states[:, :1].repeat(1, 1 + remaining_frames, 1, 1, 1)
|
||||||
hidden_states = torch.cat([first_frame, hidden_states[:, 1:]], dim=1)
|
hidden_states = torch.cat([first_frame, hidden_states[:, 1:]], dim=1)
|
||||||
|
|
||||||
|
|||||||
@ -263,7 +263,8 @@ class DownloadAndLoadCogVideoModel:
|
|||||||
pipe.set_adapters(adapter_list, adapter_weights=adapter_weights)
|
pipe.set_adapters(adapter_list, adapter_weights=adapter_weights)
|
||||||
if fuse:
|
if fuse:
|
||||||
lora_scale = 1
|
lora_scale = 1
|
||||||
if "dimensionx" in lora[-1]["path"].lower():
|
dimension_loras = ["orbit_left_lora", "dimensionx"] # for now dimensionx loras need scaling
|
||||||
|
if any(item in lora[-1]["path"].lower() for item in dimension_loras):
|
||||||
lora_scale = lora_scale / lora_rank
|
lora_scale = lora_scale / lora_rank
|
||||||
pipe.fuse_lora(lora_scale=lora_scale, components=["transformer"])
|
pipe.fuse_lora(lora_scale=lora_scale, components=["transformer"])
|
||||||
|
|
||||||
|
|||||||
4
nodes.py
4
nodes.py
@ -828,6 +828,10 @@ class CogVideoSampler:
|
|||||||
num_frames == 49 or
|
num_frames == 49 or
|
||||||
context_options is not None
|
context_options is not None
|
||||||
), "1.0 I2V model can only do 49 frames"
|
), "1.0 I2V model can only do 49 frames"
|
||||||
|
if image_cond_latents is not None:
|
||||||
|
assert "I2V" in pipeline.get("model_name", ""), "Image condition latents only supported for I2V models"
|
||||||
|
else:
|
||||||
|
assert "I2V" not in pipeline.get("model_name", ""), "Image condition latents required for I2V models"
|
||||||
|
|
||||||
device = mm.get_torch_device()
|
device = mm.get_torch_device()
|
||||||
offload_device = mm.unet_offload_device()
|
offload_device = mm.unet_offload_device()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user