mirror of
https://git.datalinker.icu/kijai/ComfyUI-CogVideoXWrapper.git
synced 2025-12-09 04:44:22 +08:00
fixes
This commit is contained in:
parent
dac6a2a3ac
commit
ba2dbfbeb4
@ -258,9 +258,9 @@ class DownloadAndLoadCogVideoModel:
|
|||||||
|
|
||||||
#fp8
|
#fp8
|
||||||
if fp8_transformer == "enabled" or fp8_transformer == "fastmode":
|
if fp8_transformer == "enabled" or fp8_transformer == "fastmode":
|
||||||
params_to_keep = {"patch_embed", "lora", "pos_embedding", "time_embedding"}
|
params_to_keep = {"patch_embed", "lora", "pos_embedding", "time_embedding", "norm_k", "norm_q", "to_k.bias", "to_q.bias", "to_v.bias"}
|
||||||
if "1.5" in model:
|
if "1.5" in model:
|
||||||
params_to_keep.update({"norm1.linear.weight", "norm_k", "norm_q","ofs_embedding", "norm_final", "norm_out", "proj_out"})
|
params_to_keep.update({"norm1.linear.weight", "ofs_embedding", "norm_final", "norm_out", "proj_out"})
|
||||||
for name, param in pipe.transformer.named_parameters():
|
for name, param in pipe.transformer.named_parameters():
|
||||||
if not any(keyword in name for keyword in params_to_keep):
|
if not any(keyword in name for keyword in params_to_keep):
|
||||||
param.data = param.data.to(torch.float8_e4m3fn)
|
param.data = param.data.to(torch.float8_e4m3fn)
|
||||||
|
|||||||
16
nodes.py
16
nodes.py
@ -854,14 +854,14 @@ class CogVideoSampler:
|
|||||||
num_frames == 49 or
|
num_frames == 49 or
|
||||||
context_options is not None
|
context_options is not None
|
||||||
), "1.0 I2V model can only do 49 frames"
|
), "1.0 I2V model can only do 49 frames"
|
||||||
if image_cond_latents is not None:
|
# if image_cond_latents is not None:
|
||||||
assert supports_image_conds, "Image condition latents only supported for I2V and Interpolation models"
|
# assert supports_image_conds, "Image condition latents only supported for I2V and Interpolation models"
|
||||||
if "I2V" in model_name:
|
# if "I2V" in model_name:
|
||||||
assert image_cond_latents["samples"].shape[1] == 1, "I2V model only supports single image condition latent"
|
# assert image_cond_latents["samples"].shape[1] == 1, "I2V model only supports single image condition latent"
|
||||||
elif "interpolation" in model_name.lower():
|
# elif "interpolation" in model_name.lower():
|
||||||
assert image_cond_latents["samples"].shape[1] == 2, "Interpolation model needs two image condition latents"
|
# assert image_cond_latents["samples"].shape[1] == 2, "Interpolation model needs two image condition latents"
|
||||||
else:
|
# else:
|
||||||
assert not supports_image_conds, "Image condition latents required for I2V models"
|
# assert not supports_image_conds, "Image condition latents required for I2V models"
|
||||||
|
|
||||||
device = mm.get_torch_device()
|
device = mm.get_torch_device()
|
||||||
offload_device = mm.unet_offload_device()
|
offload_device = mm.unet_offload_device()
|
||||||
|
|||||||
@ -519,10 +519,9 @@ class CogVideoXPipeline(VideoSysPipeline, CogVideoXLoraLoaderMixin):
|
|||||||
)
|
)
|
||||||
latent_padding = torch.zeros(padding_shape, device=device, dtype=self.vae.dtype)
|
latent_padding = torch.zeros(padding_shape, device=device, dtype=self.vae.dtype)
|
||||||
image_cond_latents = torch.cat([image_cond_latents[:, 0, :, :, :].unsqueeze(1), latent_padding, image_cond_latents[:, -1, :, :, :].unsqueeze(1)], dim=1)
|
image_cond_latents = torch.cat([image_cond_latents[:, 0, :, :, :].unsqueeze(1), latent_padding, image_cond_latents[:, -1, :, :, :].unsqueeze(1)], dim=1)
|
||||||
# Select the first frame along the second dimension
|
|
||||||
if self.transformer.config.patch_size_t is not None:
|
if self.transformer.config.patch_size_t is not None:
|
||||||
first_frame = image_cond_latents[:, : image_latents.size(1) % self.transformer.config.patch_size_t, ...]
|
first_frame = image_cond_latents[:, : image_cond_latents.size(1) % self.transformer.config.patch_size_t, ...]
|
||||||
image_cond_latents = torch.cat([first_frame, image_latents], dim=1)
|
image_cond_latents = torch.cat([first_frame, image_cond_latents], dim=1)
|
||||||
|
|
||||||
logger.info(f"image cond latents shape: {image_cond_latents.shape}")
|
logger.info(f"image cond latents shape: {image_cond_latents.shape}")
|
||||||
else:
|
else:
|
||||||
@ -537,6 +536,10 @@ class CogVideoXPipeline(VideoSysPipeline, CogVideoXLoraLoaderMixin):
|
|||||||
)
|
)
|
||||||
latent_padding = torch.zeros(padding_shape, device=device, dtype=self.vae.dtype)
|
latent_padding = torch.zeros(padding_shape, device=device, dtype=self.vae.dtype)
|
||||||
image_cond_latents = torch.cat([image_cond_latents, latent_padding], dim=1)
|
image_cond_latents = torch.cat([image_cond_latents, latent_padding], dim=1)
|
||||||
|
# Select the first frame along the second dimension
|
||||||
|
if self.transformer.config.patch_size_t is not None:
|
||||||
|
first_frame = image_cond_latents[:, : image_cond_latents.size(1) % self.transformer.config.patch_size_t, ...]
|
||||||
|
image_cond_latents = torch.cat([first_frame, image_cond_latents], dim=1)
|
||||||
else:
|
else:
|
||||||
image_cond_latents = image_cond_latents.repeat(1, latents.shape[1], 1, 1, 1)
|
image_cond_latents = image_cond_latents.repeat(1, latents.shape[1], 1, 1, 1)
|
||||||
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user