diff --git a/model_loading.py b/model_loading.py
index 08ddcee..959c8ff 100644
--- a/model_loading.py
+++ b/model_loading.py
@@ -258,9 +258,9 @@ class DownloadAndLoadCogVideoModel:
 
         #fp8
         if fp8_transformer == "enabled" or fp8_transformer == "fastmode":
-            params_to_keep = {"patch_embed", "lora", "pos_embedding", "time_embedding"}
+            params_to_keep = {"patch_embed", "lora", "pos_embedding", "time_embedding", "norm_k", "norm_q", "to_k.bias", "to_q.bias", "to_v.bias"}
             if "1.5" in model:
-                    params_to_keep.update({"norm1.linear.weight", "norm_k", "norm_q","ofs_embedding", "norm_final", "norm_out", "proj_out"}) 
+                    params_to_keep.update({"norm1.linear.weight", "ofs_embedding", "norm_final", "norm_out", "proj_out"}) 
             for name, param in pipe.transformer.named_parameters():
                 if not any(keyword in name for keyword in params_to_keep):
                     param.data = param.data.to(torch.float8_e4m3fn)
diff --git a/nodes.py b/nodes.py
index 29ffe2c..aa8f6bf 100644
--- a/nodes.py
+++ b/nodes.py
@@ -854,14 +854,14 @@ class CogVideoSampler:
             num_frames == 49 or
             context_options is not None
         ), "1.0 I2V model can only do 49 frames"
-        if image_cond_latents is not None:
-            assert supports_image_conds, "Image condition latents only supported for I2V and Interpolation models"
-            if "I2V" in model_name:
-                assert image_cond_latents["samples"].shape[1] == 1, "I2V model only supports single image condition latent"
-            elif "interpolation" in model_name.lower():
-                assert image_cond_latents["samples"].shape[1] == 2, "Interpolation model needs two image condition latents"
-        else:
-            assert not supports_image_conds, "Image condition latents required for I2V models"
+        # if image_cond_latents is not None:
+        #     assert supports_image_conds, "Image condition latents only supported for I2V and Interpolation models"
+        #     if "I2V" in model_name:
+        #         assert image_cond_latents["samples"].shape[1] == 1, "I2V model only supports single image condition latent"
+        #     elif "interpolation" in model_name.lower():
+        #         assert image_cond_latents["samples"].shape[1] == 2, "Interpolation model needs two image condition latents"
+        # else:
+        #     assert not supports_image_conds, "Image condition latents required for I2V models"
 
         device = mm.get_torch_device()
         offload_device = mm.unet_offload_device()
diff --git a/pipeline_cogvideox.py b/pipeline_cogvideox.py
index 694a85e..13c960e 100644
--- a/pipeline_cogvideox.py
+++ b/pipeline_cogvideox.py
@@ -519,10 +519,9 @@ class CogVideoXPipeline(VideoSysPipeline, CogVideoXLoraLoaderMixin):
                 )
                 latent_padding = torch.zeros(padding_shape, device=device, dtype=self.vae.dtype)
                 image_cond_latents = torch.cat([image_cond_latents[:, 0, :, :, :].unsqueeze(1), latent_padding, image_cond_latents[:, -1, :, :, :].unsqueeze(1)], dim=1)
-                # Select the first frame along the second dimension
                 if self.transformer.config.patch_size_t is not None:
-                    first_frame = image_cond_latents[:, : image_latents.size(1) % self.transformer.config.patch_size_t, ...]
-                    image_cond_latents = torch.cat([first_frame, image_latents], dim=1)
+                        first_frame = image_cond_latents[:, : image_cond_latents.size(1) % self.transformer.config.patch_size_t, ...]
+                        image_cond_latents = torch.cat([first_frame, image_cond_latents], dim=1)
 
                 logger.info(f"image cond latents shape: {image_cond_latents.shape}")
             else:
@@ -537,6 +536,10 @@ class CogVideoXPipeline(VideoSysPipeline, CogVideoXLoraLoaderMixin):
                     )
                     latent_padding = torch.zeros(padding_shape, device=device, dtype=self.vae.dtype)
                     image_cond_latents = torch.cat([image_cond_latents, latent_padding], dim=1)
+                    # Select the first frame along the second dimension
+                    if self.transformer.config.patch_size_t is not None:
+                        first_frame = image_cond_latents[:, : image_cond_latents.size(1) % self.transformer.config.patch_size_t, ...]
+                        image_cond_latents = torch.cat([first_frame, image_cond_latents], dim=1)
                 else:
                     image_cond_latents = image_cond_latents.repeat(1, latents.shape[1], 1, 1, 1)
         # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline