Support mini models

2025-12-08 20:34:28 +08:00 · 2025-03-19 10:35:28 +02:00 · 2025-03-19 10:35:28 +02:00 · 2253869d04
commit 2253869d04
parent d7a9ca06bd
3 changed files with 87 additions and 7 deletions
--- a/configs/dit_config_mini.yaml
+++ b/configs/dit_config_mini.yaml
@ -0,0 +1,69 @@
+model:
+  target: .hy3dgen.shapegen.models.Hunyuan3DDiT
+  params:
+    in_channels: 64
+    context_in_dim: 1536
+    hidden_size: 1024
+    mlp_ratio: 4.0
+    num_heads: 16
+    depth: 8
+    depth_single_blocks: 16
+    axes_dim: [ 64 ]
+    theta: 10000
+    qkv_bias: True
+    guidance_embed: False
+
+vae:
+  target: .hy3dgen.shapegen.models.ShapeVAE
+  params:
+    num_latents: 512
+    embed_dim: 64
+    num_freqs: 8
+    include_pi: false
+    heads: 16
+    width: 1024
+    num_decoder_layers: 16
+    qkv_bias: false
+    qk_norm: true
+    scale_factor: 1.0188137142395404
+
+conditioner:
+  target: .hy3dgen.shapegen.models.SingleImageEncoder
+  params:
+    main_image_encoder:
+      type: DinoImageEncoder # dino giant
+      kwargs:
+        config:
+          attention_probs_dropout_prob: 0.0
+          drop_path_rate: 0.0
+          hidden_act: gelu
+          hidden_dropout_prob: 0.0
+          hidden_size: 1536
+          image_size: 518
+          initializer_range: 0.02
+          layer_norm_eps: 1.e-6
+          layerscale_value: 1.0
+          mlp_ratio: 4
+          model_type: dinov2
+          num_attention_heads: 24
+          num_channels: 3
+          num_hidden_layers: 40
+          patch_size: 14
+          qkv_bias: true
+          torch_dtype: float32
+          use_swiglu_ffn: true
+        image_size: 518
+
+scheduler:
+  target: .hy3dgen.shapegen.schedulers.FlowMatchEulerDiscreteScheduler
+  params:
+    num_train_timesteps: 1000
+
+image_processor:
+  target: .hy3dgen.shapegen.preprocessors.ImageProcessorV2
+  params:
+    size: 512
+    border_ratio: 0.15
+
+pipeline:
+  target: .hy3dgen.shapegen.pipelines.Hunyuan3DDiTFlowMatchingPipeline
--- a/hy3dgen/shapegen/pipelines.py
+++ b/hy3dgen/shapegen/pipelines.py
@ -147,7 +147,6 @@ class Hunyuan3DDiTPipeline:
    def from_single_file(
        cls,
        ckpt_path,
-        config_path,
        device='cuda',
        offload_device=torch.device('cpu'),
        dtype=torch.float16,
@ -157,9 +156,6 @@ class Hunyuan3DDiTPipeline:
        cublas_ops=False,
        **kwargs,
    ):
-        # load config
-        with open(config_path, 'r') as f:
-            config = yaml.safe_load(f)

        # load ckpt
        if use_safetensors:
@ -182,6 +178,23 @@ class Hunyuan3DDiTPipeline:
        else:
            ckpt = torch.load(ckpt_path, map_location='cpu')

+        script_directory = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+        # load config
+
+        single_block_nums = set()
+        for k in ckpt["model"].keys():
+            if k.startswith('single_blocks.'):
+                block_num = int(k.split('.')[1])
+                single_block_nums.add(block_num)
+    
+        if len(single_block_nums) < 17:
+            config_path = os.path.join(script_directory, "configs", "dit_config_mini.yaml")
+            logger.info(f"Model has {len(single_block_nums)} single blocks, setting config to dit_config_mini.yaml")
+        else:
+            config_path = os.path.join(script_directory, "configs", "dit_config.yaml")
+        with open(config_path, 'r') as f:
+            config = yaml.safe_load(f)
+
        
        # load model
        if "guidance_in.in_layer.bias" in ckpt['model']: #guidance_in.in_layer.bias
--- a/nodes.py
+++ b/nodes.py
@ -125,11 +125,9 @@ class Hy3DModelLoader:
        device = mm.get_torch_device()
        offload_device=mm.unet_offload_device()

-        config_path = os.path.join(script_directory, "configs", "dit_config.yaml")
        model_path = folder_paths.get_full_path("diffusion_models", model)
        pipe, vae = Hunyuan3DDiTFlowMatchingPipeline.from_single_file(
            ckpt_path=model_path,  
-            config_path=config_path, 
            use_safetensors=True, 
            device=device, 
            offload_device=offload_device,