diff --git a/hy3dgen/shapegen/models/autoencoders/volume_decoders.py b/hy3dgen/shapegen/models/autoencoders/volume_decoders.py index d7bfd84..2f4fef7 100644 --- a/hy3dgen/shapegen/models/autoencoders/volume_decoders.py +++ b/hy3dgen/shapegen/models/autoencoders/volume_decoders.py @@ -25,6 +25,7 @@ from .attention_blocks import CrossAttentionDecoder from .attention_processors import FlashVDMCrossAttentionProcessor, FlashVDMTopMCrossAttentionProcessor from ...utils import logger +from comfy.utils import ProgressBar def extract_near_surface_volume_fn(input_tensor: torch.Tensor, alpha: float): device = input_tensor.device @@ -318,7 +319,7 @@ class FlashVDMVolumeDecoding: for i, resolution in enumerate(resolutions[1:]): resolutions[i + 1] = resolutions[0] * 2 ** (i + 1) - logger.info(f"FlashVDMVolumeDecoding Resolution: {resolutions}") + #logger.info(f"FlashVDMVolumeDecoding Resolution: {resolutions}") # 1. generate query points if isinstance(bounds, float): @@ -354,6 +355,7 @@ class FlashVDMVolumeDecoding: ) batch_logits = [] num_batchs = max(num_chunks // xyz_samples.shape[1], 1) + comfy_pbar = ProgressBar(xyz_samples.shape[0]) for start in tqdm(range(0, xyz_samples.shape[0], num_batchs), desc=f"FlashVDM Volume Decoding", disable=not enable_pbar): queries = xyz_samples[start: start + num_batchs, :] @@ -362,13 +364,17 @@ class FlashVDMVolumeDecoding: processor.topk = True logits = geo_decoder(queries=queries, latents=batch_latents) batch_logits.append(logits) + comfy_pbar.update(num_chunks) + grid_logits = torch.cat(batch_logits, dim=0).reshape( mini_grid_num, mini_grid_num, mini_grid_num, mini_grid_size, mini_grid_size, mini_grid_size ).permute(0, 3, 1, 4, 2, 5).contiguous().view( (batch_size, grid_size[0], grid_size[1], grid_size[2]) - ) + ) + + for octree_depth_now in resolutions[1:]: grid_size = np.array([octree_depth_now + 1] * 3) diff --git a/hy3dgen/shapegen/models/conditioner.py b/hy3dgen/shapegen/models/conditioner.py index 326700f..1f5ea56 100755 --- a/hy3dgen/shapegen/models/conditioner.py +++ b/hy3dgen/shapegen/models/conditioner.py @@ -112,6 +112,7 @@ class ImageEncoder(nn.Module): image = (image - low) / (high - low) image = image.to(self.model.device, dtype=self.model.dtype) + print("image shape", image.shape) if mask is not None: mask = mask.to(image) diff --git a/hy3dgen/shapegen/pipelines.py b/hy3dgen/shapegen/pipelines.py index 462dd28..5143ac2 100755 --- a/hy3dgen/shapegen/pipelines.py +++ b/hy3dgen/shapegen/pipelines.py @@ -206,8 +206,8 @@ class Hunyuan3DDiTPipeline: config['model']['params']['attention_mode'] = attention_mode #config['vae']['params']['attention_mode'] = attention_mode - if cublas_ops: - config['vae']['params']['cublas_ops'] = True + #if cublas_ops: + # config['vae']['params']['cublas_ops'] = True with init_empty_weights(): model = instantiate_from_config(config['model']) diff --git a/nodes.py b/nodes.py index f39093d..e88dd9b 100644 --- a/nodes.py +++ b/nodes.py @@ -1086,6 +1086,7 @@ class Hy3DGenerateMesh: "optional": { "mask": ("MASK", ), "scheduler": (["FlowMatchEulerDiscreteScheduler", "ConsistencyFlowMatchEulerDiscreteScheduler"],), + "force_offload": ("BOOLEAN", {"default": True, "tooltip": "Offloads the model to the offload device once the process is done."}), } } @@ -1094,7 +1095,8 @@ class Hy3DGenerateMesh: FUNCTION = "process" CATEGORY = "Hunyuan3DWrapper" - def process(self, pipeline, image, steps, guidance_scale, seed, mask=None, front=None, back=None, left=None, right=None, scheduler="FlowMatchEulerDiscreteScheduler"): + def process(self, pipeline, image, steps, guidance_scale, seed, mask=None, front=None, back=None, left=None, right=None, + scheduler="FlowMatchEulerDiscreteScheduler", force_offload=True): mm.unload_all_models() mm.soft_empty_cache() @@ -1136,8 +1138,9 @@ class Hy3DGenerateMesh: torch.cuda.reset_peak_memory_stats(device) except: pass - - pipeline.to(offload_device) + + if not force_offload: + pipeline.to(offload_device) return (latents, ) @@ -1254,6 +1257,8 @@ class Hy3DVAEDecode: }, "optional": { "enable_flash_vdm": ("BOOLEAN", {"default": True}), + "force_offload": ("BOOLEAN", {"default": True, "tooltip": "Offloads the model to the offload device once the process is done."}), + } } @@ -1262,7 +1267,7 @@ class Hy3DVAEDecode: FUNCTION = "process" CATEGORY = "Hunyuan3DWrapper" - def process(self, vae, latents, box_v, octree_resolution, mc_level, num_chunks, mc_algo, enable_flash_vdm=True): + def process(self, vae, latents, box_v, octree_resolution, mc_level, num_chunks, mc_algo, enable_flash_vdm=True, force_offload=True): device = mm.get_torch_device() offload_device = mm.unet_offload_device() @@ -1283,7 +1288,8 @@ class Hy3DVAEDecode: octree_resolution=octree_resolution, mc_algo=mc_algo, )[0] - vae.to(offload_device) + if force_offload: + vae.to(offload_device) outputs.mesh_f = outputs.mesh_f[:, ::-1] mesh_output = Trimesh.Trimesh(outputs.mesh_v, outputs.mesh_f) diff --git a/pyproject.toml b/pyproject.toml index b4da30a..c7402c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "comfyui-hunyan3dwrapper" description = "Wrapper nodes for https://github.com/Tencent/Hunyuan3D-2, additional installation steps needed, please check the github repository" -version = "1.0.4" +version = "1.0.5" license = {file = "LICENSE"} dependencies = ["trimesh", "diffusers>=0.31.0","accelerate","huggingface_hub","einops","opencv-python","transformers","xatlas","pymeshlab","pygltflib","scikit-learn","scikit-image","pybind11"]