diff --git a/hy3dgen/shapegen/models/conditioner.py b/hy3dgen/shapegen/models/conditioner.py index 0e6427f..431336a 100755 --- a/hy3dgen/shapegen/models/conditioner.py +++ b/hy3dgen/shapegen/models/conditioner.py @@ -43,6 +43,7 @@ class ImageEncoder(nn.Module): **kwargs, ): super().__init__() + self.has_guidance_embed = kwargs.get('has_guidance_embed', False) if config is None: self.model = self.MODEL_CLASS.from_pretrained(version) @@ -78,7 +79,7 @@ class ImageEncoder(nn.Module): mask = mask.to(image) image = image * mask supported_sizes = [518, 530] - if image.shape[2] not in supported_sizes or image.shape[3] not in supported_sizes: + if (image.shape[2] not in supported_sizes or image.shape[3] not in supported_sizes) and not self.has_guidance_embed: print(f'Image shape {image.shape} not supported. Resizing to 518x518') inputs = self.transform(image) else: diff --git a/hy3dgen/shapegen/pipelines.py b/hy3dgen/shapegen/pipelines.py index 6af6475..757e655 100755 --- a/hy3dgen/shapegen/pipelines.py +++ b/hy3dgen/shapegen/pipelines.py @@ -187,6 +187,7 @@ class Hunyuan3DDiTPipeline: if "guidance_in.in_layer.bias" in ckpt['model']: #guidance_in.in_layer.bias logger.info("Model has guidance_in, setting guidance_embed to True") config['model']['params']['guidance_embed'] = True + config['conditioner']['params']['main_image_encoder']['kwargs']['has_guidance_embed'] = True config['model']['params']['attention_mode'] = attention_mode config['vae']['params']['attention_mode'] = attention_mode