Allow larger inputs with fast model

Not sure if useful, but at least it doesn't error out unlike the normal model did with larger inputs than 530x530
2025-12-09 21:04:32 +08:00 · 2025-02-04 18:05:07 +02:00 · 2025-02-04 18:05:07 +02:00 · 67a4e7a650
commit 67a4e7a650
parent 315dd9aa7a
2 changed files with 3 additions and 1 deletions
--- a/hy3dgen/shapegen/models/conditioner.py
+++ b/hy3dgen/shapegen/models/conditioner.py
@ -43,6 +43,7 @@ class ImageEncoder(nn.Module):
        **kwargs,
    ):
        super().__init__()
+        self.has_guidance_embed = kwargs.get('has_guidance_embed', False)

        if config is None:
            self.model = self.MODEL_CLASS.from_pretrained(version)
@ -78,7 +79,7 @@ class ImageEncoder(nn.Module):
            mask = mask.to(image)
            image = image * mask
        supported_sizes = [518, 530]
-        if image.shape[2] not in supported_sizes or image.shape[3] not in supported_sizes:
+        if (image.shape[2] not in supported_sizes or image.shape[3] not in supported_sizes) and not self.has_guidance_embed:
            print(f'Image shape {image.shape} not supported. Resizing to 518x518')
            inputs = self.transform(image)
        else:
--- a/hy3dgen/shapegen/pipelines.py
+++ b/hy3dgen/shapegen/pipelines.py
@ -187,6 +187,7 @@ class Hunyuan3DDiTPipeline:
        if "guidance_in.in_layer.bias" in ckpt['model']: #guidance_in.in_layer.bias
            logger.info("Model has guidance_in, setting guidance_embed to True")
            config['model']['params']['guidance_embed'] = True
+            config['conditioner']['params']['main_image_encoder']['kwargs']['has_guidance_embed'] = True
        config['model']['params']['attention_mode'] = attention_mode
        config['vae']['params']['attention_mode'] = attention_mode