mirror of
https://git.datalinker.icu/kijai/ComfyUI-CogVideoXWrapper.git
synced 2025-12-24 12:24:27 +08:00
fix
This commit is contained in:
parent
b5eefbf4d4
commit
0758d2d016
@ -473,7 +473,7 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|||||||
if image_cond_latents is not None:
|
if image_cond_latents is not None:
|
||||||
image_cond_frame_count = image_cond_latents.size(1)
|
image_cond_frame_count = image_cond_latents.size(1)
|
||||||
patch_size_t = self.transformer.config.patch_size_t
|
patch_size_t = self.transformer.config.patch_size_t
|
||||||
if image_cond_latents.shape[1] == 2:
|
if image_cond_frame_count == 2:
|
||||||
logger.info("More than one image conditioning frame received, interpolating")
|
logger.info("More than one image conditioning frame received, interpolating")
|
||||||
padding_shape = (
|
padding_shape = (
|
||||||
batch_size,
|
batch_size,
|
||||||
@ -485,11 +485,11 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|||||||
latent_padding = torch.zeros(padding_shape, device=device, dtype=self.vae_dtype)
|
latent_padding = torch.zeros(padding_shape, device=device, dtype=self.vae_dtype)
|
||||||
image_cond_latents = torch.cat([image_cond_latents[:, 0, :, :, :].unsqueeze(1), latent_padding, image_cond_latents[:, -1, :, :, :].unsqueeze(1)], dim=1)
|
image_cond_latents = torch.cat([image_cond_latents[:, 0, :, :, :].unsqueeze(1), latent_padding, image_cond_latents[:, -1, :, :, :].unsqueeze(1)], dim=1)
|
||||||
if patch_size_t:
|
if patch_size_t:
|
||||||
first_frame = image_cond_latents[:, : image_cond_frame_count % patch_size_t, ...]
|
first_frame = image_cond_latents[:, : image_cond_latents.size(1) % patch_size_t, ...]
|
||||||
image_cond_latents = torch.cat([first_frame, image_cond_latents], dim=1)
|
image_cond_latents = torch.cat([first_frame, image_cond_latents], dim=1)
|
||||||
|
|
||||||
logger.info(f"image cond latents shape: {image_cond_latents.shape}")
|
logger.info(f"image cond latents shape: {image_cond_latents.shape}")
|
||||||
elif image_cond_latents.shape[1] == 1:
|
elif image_cond_frame_count == 1:
|
||||||
logger.info("Only one image conditioning frame received, img2vid")
|
logger.info("Only one image conditioning frame received, img2vid")
|
||||||
if self.input_with_padding:
|
if self.input_with_padding:
|
||||||
padding_shape = (
|
padding_shape = (
|
||||||
@ -503,7 +503,7 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|||||||
image_cond_latents = torch.cat([image_cond_latents, latent_padding], dim=1)
|
image_cond_latents = torch.cat([image_cond_latents, latent_padding], dim=1)
|
||||||
# Select the first frame along the second dimension
|
# Select the first frame along the second dimension
|
||||||
if patch_size_t:
|
if patch_size_t:
|
||||||
first_frame = image_cond_latents[:, : image_cond_frame_count % patch_size_t, ...]
|
first_frame = image_cond_latents[:, : image_cond_latents.size(1) % patch_size_t, ...]
|
||||||
image_cond_latents = torch.cat([first_frame, image_cond_latents], dim=1)
|
image_cond_latents = torch.cat([first_frame, image_cond_latents], dim=1)
|
||||||
else:
|
else:
|
||||||
image_cond_latents = image_cond_latents.repeat(1, latents.shape[1], 1, 1, 1)
|
image_cond_latents = image_cond_latents.repeat(1, latents.shape[1], 1, 1, 1)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user