From 2253869d046b333bc6aea2f7ba587ce298073819 Mon Sep 17 00:00:00 2001
From: kijai <40791699+kijai@users.noreply.github.com>
Date: Wed, 19 Mar 2025 10:35:28 +0200
Subject: [PATCH] Support mini models

---
 configs/dit_config_mini.yaml  | 69 +++++++++++++++++++++++++++++++++++
 hy3dgen/shapegen/pipelines.py | 21 +++++++++--
 nodes.py                      |  4 +-
 3 files changed, 87 insertions(+), 7 deletions(-)
 create mode 100644 configs/dit_config_mini.yaml

diff --git a/configs/dit_config_mini.yaml b/configs/dit_config_mini.yaml
new file mode 100644
index 0000000..e0f8b19
--- /dev/null
+++ b/configs/dit_config_mini.yaml
@@ -0,0 +1,69 @@
+model:
+  target: .hy3dgen.shapegen.models.Hunyuan3DDiT
+  params:
+    in_channels: 64
+    context_in_dim: 1536
+    hidden_size: 1024
+    mlp_ratio: 4.0
+    num_heads: 16
+    depth: 8
+    depth_single_blocks: 16
+    axes_dim: [ 64 ]
+    theta: 10000
+    qkv_bias: True
+    guidance_embed: False
+
+vae:
+  target: .hy3dgen.shapegen.models.ShapeVAE
+  params:
+    num_latents: 512
+    embed_dim: 64
+    num_freqs: 8
+    include_pi: false
+    heads: 16
+    width: 1024
+    num_decoder_layers: 16
+    qkv_bias: false
+    qk_norm: true
+    scale_factor: 1.0188137142395404
+
+conditioner:
+  target: .hy3dgen.shapegen.models.SingleImageEncoder
+  params:
+    main_image_encoder:
+      type: DinoImageEncoder # dino giant
+      kwargs:
+        config:
+          attention_probs_dropout_prob: 0.0
+          drop_path_rate: 0.0
+          hidden_act: gelu
+          hidden_dropout_prob: 0.0
+          hidden_size: 1536
+          image_size: 518
+          initializer_range: 0.02
+          layer_norm_eps: 1.e-6
+          layerscale_value: 1.0
+          mlp_ratio: 4
+          model_type: dinov2
+          num_attention_heads: 24
+          num_channels: 3
+          num_hidden_layers: 40
+          patch_size: 14
+          qkv_bias: true
+          torch_dtype: float32
+          use_swiglu_ffn: true
+        image_size: 518
+
+scheduler:
+  target: .hy3dgen.shapegen.schedulers.FlowMatchEulerDiscreteScheduler
+  params:
+    num_train_timesteps: 1000
+
+image_processor:
+  target: .hy3dgen.shapegen.preprocessors.ImageProcessorV2
+  params:
+    size: 512
+    border_ratio: 0.15
+
+pipeline:
+  target: .hy3dgen.shapegen.pipelines.Hunyuan3DDiTFlowMatchingPipeline
diff --git a/hy3dgen/shapegen/pipelines.py b/hy3dgen/shapegen/pipelines.py
index 31716aa..fa56912 100755
--- a/hy3dgen/shapegen/pipelines.py
+++ b/hy3dgen/shapegen/pipelines.py
@@ -147,7 +147,6 @@ class Hunyuan3DDiTPipeline:
     def from_single_file(
         cls,
         ckpt_path,
-        config_path,
         device='cuda',
         offload_device=torch.device('cpu'),
         dtype=torch.float16,
@@ -157,9 +156,6 @@ class Hunyuan3DDiTPipeline:
         cublas_ops=False,
         **kwargs,
     ):
-        # load config
-        with open(config_path, 'r') as f:
-            config = yaml.safe_load(f)
 
         # load ckpt
         if use_safetensors:
@@ -182,6 +178,23 @@ class Hunyuan3DDiTPipeline:
         else:
             ckpt = torch.load(ckpt_path, map_location='cpu')
 
+        script_directory = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+        # load config
+
+        single_block_nums = set()
+        for k in ckpt["model"].keys():
+            if k.startswith('single_blocks.'):
+                block_num = int(k.split('.')[1])
+                single_block_nums.add(block_num)
+    
+        if len(single_block_nums) < 17:
+            config_path = os.path.join(script_directory, "configs", "dit_config_mini.yaml")
+            logger.info(f"Model has {len(single_block_nums)} single blocks, setting config to dit_config_mini.yaml")
+        else:
+            config_path = os.path.join(script_directory, "configs", "dit_config.yaml")
+        with open(config_path, 'r') as f:
+            config = yaml.safe_load(f)
+
         
         # load model
         if "guidance_in.in_layer.bias" in ckpt['model']: #guidance_in.in_layer.bias
diff --git a/nodes.py b/nodes.py
index c404f1f..f6538d0 100644
--- a/nodes.py
+++ b/nodes.py
@@ -125,11 +125,9 @@ class Hy3DModelLoader:
         device = mm.get_torch_device()
         offload_device=mm.unet_offload_device()
 
-        config_path = os.path.join(script_directory, "configs", "dit_config.yaml")
         model_path = folder_paths.get_full_path("diffusion_models", model)
         pipe, vae = Hunyuan3DDiTFlowMatchingPipeline.from_single_file(
-            ckpt_path=model_path, 
-            config_path=config_path, 
+            ckpt_path=model_path,  
             use_safetensors=True, 
             device=device, 
             offload_device=offload_device,