From 67a4e7a650385c90ab62abef84d73957731ce9ac Mon Sep 17 00:00:00 2001 From: kijai <40791699+kijai@users.noreply.github.com> Date: Tue, 4 Feb 2025 18:05:07 +0200 Subject: [PATCH] Allow larger inputs with fast model Not sure if useful, but at least it doesn't error out unlike the normal model did with larger inputs than 530x530 --- hy3dgen/shapegen/models/conditioner.py | 3 ++- hy3dgen/shapegen/pipelines.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/hy3dgen/shapegen/models/conditioner.py b/hy3dgen/shapegen/models/conditioner.py index 0e6427f..431336a 100755 --- a/hy3dgen/shapegen/models/conditioner.py +++ b/hy3dgen/shapegen/models/conditioner.py @@ -43,6 +43,7 @@ class ImageEncoder(nn.Module): **kwargs, ): super().__init__() + self.has_guidance_embed = kwargs.get('has_guidance_embed', False) if config is None: self.model = self.MODEL_CLASS.from_pretrained(version) @@ -78,7 +79,7 @@ class ImageEncoder(nn.Module): mask = mask.to(image) image = image * mask supported_sizes = [518, 530] - if image.shape[2] not in supported_sizes or image.shape[3] not in supported_sizes: + if (image.shape[2] not in supported_sizes or image.shape[3] not in supported_sizes) and not self.has_guidance_embed: print(f'Image shape {image.shape} not supported. Resizing to 518x518') inputs = self.transform(image) else: diff --git a/hy3dgen/shapegen/pipelines.py b/hy3dgen/shapegen/pipelines.py index 6af6475..757e655 100755 --- a/hy3dgen/shapegen/pipelines.py +++ b/hy3dgen/shapegen/pipelines.py @@ -187,6 +187,7 @@ class Hunyuan3DDiTPipeline: if "guidance_in.in_layer.bias" in ckpt['model']: #guidance_in.in_layer.bias logger.info("Model has guidance_in, setting guidance_embed to True") config['model']['params']['guidance_embed'] = True + config['conditioner']['params']['main_image_encoder']['kwargs']['has_guidance_embed'] = True config['model']['params']['attention_mode'] = attention_mode config['vae']['params']['attention_mode'] = attention_mode