diff --git a/example_workflows/example_workflow.png b/example_workflows/example_workflow.png index b764973..fe26d08 100644 Binary files a/example_workflows/example_workflow.png and b/example_workflows/example_workflow.png differ diff --git a/example_workflows/hy3dtest.json b/example_workflows/hy3d_example_01.json similarity index 90% rename from example_workflows/hy3dtest.json rename to example_workflows/hy3d_example_01.json index 58349cc..827ca17 100644 --- a/example_workflows/hy3dtest.json +++ b/example_workflows/hy3d_example_01.json @@ -1,59 +1,7 @@ { - "last_node_id": 136, - "last_link_id": 241, + "last_node_id": 143, + "last_link_id": 261, "nodes": [ - { - "id": 57, - "type": "ImageCompositeMasked", - "pos": [ - -164.20501708984375, - -573.5294189453125 - ], - "size": [ - 315, - 146 - ], - "flags": {}, - "order": 16, - "mode": 0, - "inputs": [ - { - "name": "destination", - "type": "IMAGE", - "link": 77 - }, - { - "name": "source", - "type": "IMAGE", - "link": 239 - }, - { - "name": "mask", - "type": "MASK", - "link": 228, - "shape": 7 - } - ], - "outputs": [ - { - "name": "IMAGE", - "type": "IMAGE", - "links": [ - 80, - 81 - ], - "slot_index": 0 - } - ], - "properties": { - "Node name for S&R": "ImageCompositeMasked" - }, - "widgets_values": [ - 0, - 0, - false - ] - }, { "id": 28, "type": "DownloadAndLoadHy3DDelightModel", @@ -86,150 +34,6 @@ "hunyuan3d-delight-v2-0" ] }, - { - "id": 58, - "type": "EmptyImage", - "pos": [ - -158.4535369873047, - -755.59521484375 - ], - "size": [ - 315, - 130 - ], - "flags": {}, - "order": 1, - "mode": 0, - "inputs": [], - "outputs": [ - { - "name": "IMAGE", - "type": "IMAGE", - "links": [ - 77 - ], - "slot_index": 0 - } - ], - "title": "EmptyImage: black", - "properties": { - "Node name for S&R": "EmptyImage" - }, - "widgets_values": [ - 512, - 512, - 1, - 0 - ] - }, - { - "id": 55, - "type": "TransparentBGSession+", - "pos": [ - -1127.7001953125, - -384.712646484375 - ], - "size": [ - 340.20001220703125, - 82 - ], - "flags": {}, - "order": 2, - "mode": 0, - "inputs": [], - "outputs": [ - { - "name": "REMBG_SESSION", - "type": "REMBG_SESSION", - "links": [ - 74 - ], - "slot_index": 0 - } - ], - "properties": { - "Node name for S&R": "TransparentBGSession+" - }, - "widgets_values": [ - "base", - true - ] - }, - { - "id": 53, - "type": "PreviewImage", - "pos": [ - 209.58465576171875, - -570.438232421875 - ], - "size": [ - 268.55645751953125, - 318.0697021484375 - ], - "flags": {}, - "order": 18, - "mode": 0, - "inputs": [ - { - "name": "images", - "type": "IMAGE", - "link": 80 - } - ], - "outputs": [], - "properties": { - "Node name for S&R": "PreviewImage" - }, - "widgets_values": [] - }, - { - "id": 59, - "type": "Hy3DPostprocessMesh", - "pos": [ - 173.0716094970703, - 38.68581771850586 - ], - "size": [ - 315, - 150 - ], - "flags": {}, - "order": 21, - "mode": 0, - "inputs": [ - { - "name": "mesh", - "type": "HY3DMESH", - "link": 85 - }, - { - "name": "mask", - "type": "MASK", - "link": null, - "shape": 7 - } - ], - "outputs": [ - { - "name": "mesh", - "type": "HY3DMESH", - "links": [ - 86, - 133 - ], - "slot_index": 0 - } - ], - "properties": { - "Node name for S&R": "Hy3DPostprocessMesh" - }, - "widgets_values": [ - true, - true, - true, - 50000 - ] - }, { "id": 64, "type": "ImageCompositeMasked", @@ -242,7 +46,7 @@ 146 ], "flags": {}, - "order": 20, + "order": 22, "mode": 0, "inputs": [ { @@ -293,7 +97,7 @@ 26 ], "flags": {}, - "order": 24, + "order": 26, "mode": 0, "inputs": [ { @@ -329,7 +133,7 @@ 113.29772186279297 ], "flags": {}, - "order": 3, + "order": 1, "mode": 0, "inputs": [], "outputs": [ @@ -361,7 +165,7 @@ 119.78506469726562 ], "flags": {}, - "order": 4, + "order": 2, "mode": 0, "inputs": [], "outputs": [], @@ -384,7 +188,7 @@ 170 ], "flags": {}, - "order": 27, + "order": 29, "mode": 0, "inputs": [ { @@ -446,7 +250,7 @@ 375.8153991699219 ], "flags": {}, - "order": 30, + "order": 32, "mode": 0, "inputs": [ { @@ -473,7 +277,7 @@ 58 ], "flags": {}, - "order": 5, + "order": 3, "mode": 2, "inputs": [], "outputs": [ @@ -504,7 +308,7 @@ 218 ], "flags": {}, - "order": 32, + "order": 34, "mode": 0, "inputs": [ { @@ -557,7 +361,7 @@ 46 ], "flags": {}, - "order": 39, + "order": 41, "mode": 0, "inputs": [ { @@ -598,7 +402,7 @@ 520.6934204101562 ], "flags": {}, - "order": 40, + "order": 42, "mode": 0, "inputs": [ { @@ -625,7 +429,7 @@ 102 ], "flags": {}, - "order": 38, + "order": 40, "mode": 0, "inputs": [ { @@ -670,7 +474,7 @@ 523.9635620117188 ], "flags": {}, - "order": 37, + "order": 39, "mode": 0, "inputs": [ { @@ -697,7 +501,7 @@ 521.7835083007812 ], "flags": {}, - "order": 35, + "order": 37, "mode": 0, "inputs": [ { @@ -724,7 +528,7 @@ 125.7635726928711 ], "flags": {}, - "order": 6, + "order": 4, "mode": 0, "inputs": [], "outputs": [], @@ -747,7 +551,7 @@ 46 ], "flags": {}, - "order": 33, + "order": 35, "mode": 2, "inputs": [ { @@ -785,7 +589,7 @@ 58 ], "flags": {}, - "order": 7, + "order": 5, "mode": 0, "inputs": [], "outputs": [ @@ -804,97 +608,6 @@ "hunyuan3d-paint-v2-0" ] }, - { - "id": 17, - "type": "Hy3DExportMesh", - "pos": [ - 172.91156005859375, - -119.49565887451172 - ], - "size": [ - 315.6768493652344, - 58.98750305175781 - ], - "flags": {}, - "order": 23, - "mode": 0, - "inputs": [ - { - "name": "mesh", - "type": "HY3DMESH", - "link": 86 - } - ], - "outputs": [ - { - "name": "glb_path", - "type": "STRING", - "links": [ - 94 - ], - "slot_index": 0 - } - ], - "properties": { - "Node name for S&R": "Hy3DExportMesh" - }, - "widgets_values": [ - "3D/Hy3D" - ] - }, - { - "id": 12, - "type": "Hy3DGenerateMesh", - "pos": [ - -170.76661682128906, - -25.604846954345703 - ], - "size": [ - 301.896484375, - 198.27505493164062 - ], - "flags": {}, - "order": 19, - "mode": 0, - "inputs": [ - { - "name": "pipeline", - "type": "HY3DMODEL", - "link": 11 - }, - { - "name": "image", - "type": "IMAGE", - "link": 81 - }, - { - "name": "mask", - "type": "MASK", - "link": 229, - "shape": 7 - } - ], - "outputs": [ - { - "name": "mesh", - "type": "HY3DMESH", - "links": [ - 85 - ], - "slot_index": 0 - } - ], - "properties": { - "Node name for S&R": "Hy3DGenerateMesh" - }, - "widgets_values": [ - 256, - 5.5, - 50, - 32, - "fixed" - ] - }, { "id": 13, "type": "LoadImage", @@ -907,7 +620,7 @@ 314.0000305175781 ], "flags": {}, - "order": 8, + "order": 6, "mode": 0, "inputs": [], "outputs": [ @@ -946,7 +659,7 @@ 222 ], "flags": {}, - "order": 22, + "order": 24, "mode": 0, "inputs": [ { @@ -996,7 +709,7 @@ 82 ], "flags": {}, - "order": 41, + "order": 43, "mode": 0, "inputs": [ { @@ -1034,7 +747,7 @@ 1672.490234375 ], "flags": {}, - "order": 42, + "order": 44, "mode": 0, "inputs": [ { @@ -1095,7 +808,8 @@ ], "properties": { "Node name for S&R": "MaskToImage" - } + }, + "widgets_values": [] }, { "id": 73, @@ -1109,7 +823,7 @@ 99.84209442138672 ], "flags": {}, - "order": 9, + "order": 7, "mode": 0, "inputs": [], "outputs": [], @@ -1120,44 +834,6 @@ "color": "#432", "bgcolor": "#653" }, - { - "id": 63, - "type": "Preview3D", - "pos": [ - 725.4800415039062, - -605.6441650390625 - ], - "size": [ - 985.0338745117188, - 1102.6461181640625 - ], - "flags": {}, - "order": 26, - "mode": 0, - "inputs": [ - { - "name": "model_file", - "type": "STRING", - "link": 94, - "widget": { - "name": "model_file" - } - } - ], - "outputs": [], - "properties": { - "Node name for S&R": "Preview3D" - }, - "widgets_values": [ - "3D/Hy3D_00186_.glb", - "normal", - "#000000", - 10, - "original", - 75, - null - ] - }, { "id": 132, "type": "SolidMask", @@ -1170,7 +846,7 @@ 106 ], "flags": {}, - "order": 10, + "order": 8, "mode": 0, "inputs": [], "outputs": [ @@ -1204,7 +880,7 @@ 396.4273376464844 ], "flags": {}, - "order": 25, + "order": 27, "mode": 0, "inputs": [ { @@ -1231,7 +907,7 @@ 375.8153991699219 ], "flags": {}, - "order": 28, + "order": 30, "mode": 0, "inputs": [ { @@ -1258,7 +934,7 @@ 210 ], "flags": {}, - "order": 29, + "order": 31, "mode": 0, "inputs": [ { @@ -1322,7 +998,7 @@ 562.7461547851562 ], "flags": {}, - "order": 31, + "order": 33, "mode": 0, "inputs": [ { @@ -1349,7 +1025,7 @@ 66 ], "flags": {}, - "order": 34, + "order": 36, "mode": 0, "inputs": [ { @@ -1361,6 +1037,12 @@ "name": "renderer", "type": "MESHRENDER", "link": 151 + }, + { + "name": "camera_config", + "type": "HY3DCAMERA", + "link": null, + "shape": 7 } ], "outputs": [ @@ -1407,7 +1089,7 @@ 66 ], "flags": {}, - "order": 36, + "order": 38, "mode": 0, "inputs": [ { @@ -1455,110 +1137,6 @@ }, "widgets_values": [] }, - { - "id": 56, - "type": "ImageRemoveBackground+", - "pos": [ - -732.384521484375, - -384.8027038574219 - ], - "size": [ - 327.5999755859375, - 46 - ], - "flags": {}, - "order": 14, - "mode": 0, - "inputs": [ - { - "name": "rembg_session", - "type": "REMBG_SESSION", - "link": 74 - }, - { - "name": "image", - "type": "IMAGE", - "link": 238 - } - ], - "outputs": [ - { - "name": "IMAGE", - "type": "IMAGE", - "links": [], - "slot_index": 0 - }, - { - "name": "MASK", - "type": "MASK", - "links": [ - 228, - 229, - 232 - ], - "slot_index": 1 - } - ], - "properties": { - "Node name for S&R": "ImageRemoveBackground+" - }, - "widgets_values": [] - }, - { - "id": 52, - "type": "ImageResize+", - "pos": [ - -768.0399780273438, - -784.8458862304688 - ], - "size": [ - 315, - 218 - ], - "flags": {}, - "order": 12, - "mode": 0, - "inputs": [ - { - "name": "image", - "type": "IMAGE", - "link": 69 - } - ], - "outputs": [ - { - "name": "IMAGE", - "type": "IMAGE", - "links": [ - 238, - 239, - 240 - ], - "slot_index": 0 - }, - { - "name": "width", - "type": "INT", - "links": null - }, - { - "name": "height", - "type": "INT", - "links": null - } - ], - "properties": { - "Node name for S&R": "ImageResize+" - }, - "widgets_values": [ - 512, - 512, - "lanczos", - "pad", - "always", - 0 - ] - }, { "id": 136, "type": "Reroute", @@ -1571,7 +1149,7 @@ 26 ], "flags": {}, - "order": 15, + "order": 14, "mode": 0, "inputs": [ { @@ -1607,13 +1185,13 @@ 26 ], "flags": {}, - "order": 17, + "order": 20, "mode": 0, "inputs": [ { "name": "", "type": "*", - "link": 232 + "link": 261 } ], "outputs": [ @@ -1631,16 +1209,99 @@ "horizontal": false } }, + { + "id": 55, + "type": "TransparentBGSession+", + "pos": [ + -312.4496154785156, + -786.0094604492188 + ], + "size": [ + 340.20001220703125, + 82 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "REMBG_SESSION", + "type": "REMBG_SESSION", + "links": [ + 74 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "TransparentBGSession+" + }, + "widgets_values": [ + "base", + true + ] + }, + { + "id": 138, + "type": "MaskPreview+", + "pos": [ + 138.2086639404297, + -621.7325439453125 + ], + "size": [ + 210, + 246 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "name": "mask", + "type": "MASK", + "link": 244 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "MaskPreview+" + }, + "widgets_values": [] + }, + { + "id": 137, + "type": "Note", + "pos": [ + -450, + -300 + ], + "size": [ + 312.0663146972656, + 86.12521362304688 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "https://huggingface.co/Kijai/Hunyuan3D-2_safetensors/blob/main/hunyuan3d-dit-v2-0-fp16.safetensors" + ], + "color": "#432", + "bgcolor": "#653" + }, { "id": 10, "type": "Hy3DModelLoader", "pos": [ - -247.57777404785156, - 227.78564453125 + -460, + -150 ], "size": [ 372.8913269042969, - 58.52665328979492 + 102 ], "flags": {}, "order": 11, @@ -1658,28 +1319,428 @@ "name": "pipeline", "type": "HY3DMODEL", "links": [ - 11 + 252 ], "slot_index": 0 + }, + { + "name": "vae", + "type": "HY3DVAE", + "links": [ + 250 + ], + "slot_index": 1 } ], "properties": { "Node name for S&R": "Hy3DModelLoader" }, "widgets_values": [ - "hy3dgen\\hunyuan3d-dit-v2-0-fp16.safetensors" + "hy3dgen\\hunyuan3d-dit-v2-0-fp16.safetensors", + "sdpa" + ] + }, + { + "id": 140, + "type": "Hy3DVAEDecode", + "pos": [ + -35.7520866394043, + -127.73638153076172 + ], + "size": [ + 315, + 174 + ], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "HY3DVAE", + "link": 250 + }, + { + "name": "latents", + "type": "HY3DLATENT", + "link": 255 + } + ], + "outputs": [ + { + "name": "mesh", + "type": "HY3DMESH", + "links": [ + 251 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "Hy3DVAEDecode" + }, + "widgets_values": [ + 1.01, + 384, + 8000, + 0, + "mc" + ] + }, + { + "id": 17, + "type": "Hy3DExportMesh", + "pos": [ + 308.6851806640625, + -259.24041748046875 + ], + "size": [ + 315.6768493652344, + 58.98750305175781 + ], + "flags": {}, + "order": 25, + "mode": 0, + "inputs": [ + { + "name": "mesh", + "type": "HY3DMESH", + "link": 86 + } + ], + "outputs": [ + { + "name": "glb_path", + "type": "STRING", + "links": [ + 94 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "Hy3DExportMesh" + }, + "widgets_values": [ + "3D/Hy3D" + ] + }, + { + "id": 59, + "type": "Hy3DPostprocessMesh", + "pos": [ + 301.78021240234375, + -124.21797180175781 + ], + "size": [ + 315, + 174 + ], + "flags": {}, + "order": 23, + "mode": 0, + "inputs": [ + { + "name": "mesh", + "type": "HY3DMESH", + "link": 251 + }, + { + "name": "mask", + "type": "MASK", + "link": null, + "shape": 7 + } + ], + "outputs": [ + { + "name": "mesh", + "type": "HY3DMESH", + "links": [ + 86, + 133 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "Hy3DPostprocessMesh" + }, + "widgets_values": [ + true, + true, + true, + 50000, + false + ] + }, + { + "id": 141, + "type": "Hy3DGenerateMesh", + "pos": [ + -430, + 5.956213474273682 + ], + "size": [ + 315, + 170 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "name": "pipeline", + "type": "HY3DMODEL", + "link": 252 + }, + { + "name": "image", + "type": "IMAGE", + "link": 258 + }, + { + "name": "mask", + "type": "MASK", + "link": 260, + "shape": 7 + } + ], + "outputs": [ + { + "name": "latents", + "type": "HY3DLATENT", + "links": [ + 255 + ] + } + ], + "properties": { + "Node name for S&R": "Hy3DGenerateMesh" + }, + "widgets_values": [ + 5.5, + 50, + 123, + "fixed" + ] + }, + { + "id": 142, + "type": "Reroute", + "pos": [ + -649.8002319335938, + -400.6730651855469 + ], + "size": [ + 75, + 26 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "", + "type": "*", + "link": 256 + } + ], + "outputs": [ + { + "name": "", + "type": "IMAGE", + "links": [ + 257, + 258 + ], + "slot_index": 0 + } + ], + "properties": { + "showOutputText": false, + "horizontal": false + } + }, + { + "id": 56, + "type": "ImageRemoveBackground+", + "pos": [ + -316.1974182128906, + -643.1515502929688 + ], + "size": [ + 327.5999755859375, + 46 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "name": "rembg_session", + "type": "REMBG_SESSION", + "link": 74 + }, + { + "name": "image", + "type": "IMAGE", + "link": 257 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": [ + 244, + 259 + ], + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "ImageRemoveBackground+" + }, + "widgets_values": [] + }, + { + "id": 143, + "type": "Reroute", + "pos": [ + -699.34765625, + -22.30767250061035 + ], + "size": [ + 75, + 26 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "name": "", + "type": "*", + "link": 259 + } + ], + "outputs": [ + { + "name": "", + "type": "MASK", + "links": [ + 260, + 261 + ], + "slot_index": 0 + } + ], + "properties": { + "showOutputText": false, + "horizontal": false + } + }, + { + "id": 52, + "type": "ImageResize+", + "pos": [ + -1104.0650634765625, + -401.0750427246094 + ], + "size": [ + 315, + 218 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 69 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 240, + 256 + ], + "slot_index": 0 + }, + { + "name": "width", + "type": "INT", + "links": null + }, + { + "name": "height", + "type": "INT", + "links": null + } + ], + "properties": { + "Node name for S&R": "ImageResize+" + }, + "widgets_values": [ + 518, + 518, + "lanczos", + "pad", + "always", + 2 + ] + }, + { + "id": 63, + "type": "Preview3D", + "pos": [ + 725.4800415039062, + -605.6441650390625 + ], + "size": [ + 985.0338745117188, + 1102.6461181640625 + ], + "flags": {}, + "order": 28, + "mode": 0, + "inputs": [ + { + "name": "model_file", + "type": "STRING", + "link": 94, + "widget": { + "name": "model_file" + } + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "Preview3D" + }, + "widgets_values": [ + "3D/Hy3D_00372_.glb", + "normal", + "#000000", + 10, + "original", + 75, + null ] } ], "links": [ - [ - 11, - 10, - 0, - 12, - 0, - "HY3DMODEL" - ], [ 30, 28, @@ -1712,38 +1773,6 @@ 0, "REMBG_SESSION" ], - [ - 77, - 58, - 0, - 57, - 0, - "IMAGE" - ], - [ - 80, - 57, - 0, - 53, - 0, - "IMAGE" - ], - [ - 81, - 57, - 0, - 12, - 1, - "IMAGE" - ], - [ - 85, - 12, - 0, - 59, - 0, - "HY3DMESH" - ], [ 86, 59, @@ -1992,22 +2021,6 @@ 0, "IMAGE" ], - [ - 228, - 56, - 1, - 57, - 2, - "MASK" - ], - [ - 229, - 56, - 1, - 12, - 2, - "MASK" - ], [ 231, 135, @@ -2016,30 +2029,6 @@ 2, "MASK" ], - [ - 232, - 56, - 1, - 135, - 0, - "*" - ], - [ - 238, - 52, - 0, - 56, - 1, - "IMAGE" - ], - [ - 239, - 52, - 0, - 57, - 1, - "IMAGE" - ], [ 240, 52, @@ -2055,6 +2044,94 @@ 64, 1, "IMAGE" + ], + [ + 244, + 56, + 1, + 138, + 0, + "MASK" + ], + [ + 250, + 10, + 1, + 140, + 0, + "HY3DVAE" + ], + [ + 251, + 140, + 0, + 59, + 0, + "HY3DMESH" + ], + [ + 252, + 10, + 0, + 141, + 0, + "HY3DMODEL" + ], + [ + 255, + 141, + 0, + 140, + 1, + "HY3DLATENT" + ], + [ + 256, + 52, + 0, + 142, + 0, + "*" + ], + [ + 257, + 142, + 0, + 56, + 1, + "IMAGE" + ], + [ + 258, + 142, + 0, + 141, + 1, + "IMAGE" + ], + [ + 259, + 56, + 1, + 143, + 0, + "*" + ], + [ + 260, + 143, + 0, + 141, + 2, + "MASK" + ], + [ + 261, + 143, + 0, + 135, + 0, + "*" ] ], "groups": [ @@ -2114,15 +2191,15 @@ "config": {}, "extra": { "ds": { - "scale": 0.41772481694158353, + "scale": 0.3797498335832583, "offset": [ - 1359.4285962911417, - 714.972673566092 + 1110.9451208360776, + 845.591364208545 ] }, "node_versions": { + "ComfyUI-Hunyuan3DWrapper": "7e234a017e9191b97f0a5096b7eafaeac54791fe", "comfy-core": "0.3.12", - "ComfyUI-Hunyuan3DWrapper": "d79855cbd9cffa9898369af3e472d073fad8708c", "ComfyUI_essentials": "76e9d1e4399bd025ce8b12c290753d58f9f53e93" }, "VHS_latentpreview": true, diff --git a/hy3dgen/shapegen/pipelines.py b/hy3dgen/shapegen/pipelines.py index 4435bf2..dc706c7 100755 --- a/hy3dgen/shapegen/pipelines.py +++ b/hy3dgen/shapegen/pipelines.py @@ -212,7 +212,7 @@ class Hunyuan3DDiTPipeline: vae = torch.compile(vae) model_kwargs = dict( - vae=vae, + #vae=vae, model=model, scheduler=scheduler, conditioner=conditioner, @@ -223,56 +223,54 @@ class Hunyuan3DDiTPipeline: ) model_kwargs.update(kwargs) - return cls( - **model_kwargs - ) + return cls(**model_kwargs), vae - @classmethod - def from_pretrained( - cls, - model_path, - ckpt_name='model.ckpt', - config_name='config.yaml', - device='cuda', - dtype=torch.float16, - use_safetensors=None, - **kwargs, - ): - original_model_path = model_path - if not os.path.exists(model_path): - # try local path - base_dir = "checkpoints" - model_path = os.path.join(base_dir, model_path, 'hunyuan3d-dit-v2-0') - if not os.path.exists(model_path): - try: - import huggingface_hub - # download from huggingface - huggingface_hub.snapshot_download( - repo_id="tencent/Hunyuan3D-2", - local_dir=base_dir,) + # @classmethod + # def from_pretrained( + # cls, + # model_path, + # ckpt_name='model.ckpt', + # config_name='config.yaml', + # device='cuda', + # dtype=torch.float16, + # use_safetensors=None, + # **kwargs, + # ): + # original_model_path = model_path + # if not os.path.exists(model_path): + # # try local path + # base_dir = "checkpoints" + # model_path = os.path.join(base_dir, model_path, 'hunyuan3d-dit-v2-0') + # if not os.path.exists(model_path): + # try: + # import huggingface_hub + # # download from huggingface + # huggingface_hub.snapshot_download( + # repo_id="tencent/Hunyuan3D-2", + # local_dir=base_dir,) - except ImportError: - logger.warning( - "You need to install HuggingFace Hub to load models from the hub." - ) - raise RuntimeError(f"Model path {model_path} not found") - if not os.path.exists(model_path): - raise FileNotFoundError(f"Model path {original_model_path} not found") + # except ImportError: + # logger.warning( + # "You need to install HuggingFace Hub to load models from the hub." + # ) + # raise RuntimeError(f"Model path {model_path} not found") + # if not os.path.exists(model_path): + # raise FileNotFoundError(f"Model path {original_model_path} not found") - config_path = os.path.join(model_path, config_name) - ckpt_path = os.path.join(model_path, ckpt_name) - return cls.from_single_file( - ckpt_path, - config_path, - device=device, - dtype=dtype, - use_safetensors=use_safetensors, - **kwargs - ) + # config_path = os.path.join(model_path, config_name) + # ckpt_path = os.path.join(model_path, ckpt_name) + # return cls.from_single_file( + # ckpt_path, + # config_path, + # device=device, + # dtype=dtype, + # use_safetensors=use_safetensors, + # **kwargs + # ) def __init__( self, - vae, + #vae, model, scheduler, conditioner, @@ -282,7 +280,7 @@ class Hunyuan3DDiTPipeline: dtype=torch.float16, **kwargs ): - self.vae = vae + #self.vae = vae self.model = model self.scheduler = scheduler self.conditioner = conditioner @@ -295,12 +293,12 @@ class Hunyuan3DDiTPipeline: def to(self, device=None, dtype=None): if device is not None: - self.vae.to(device) + #self.vae.to(device) self.model.to(device) self.conditioner.to(device) if dtype is not None: self.dtype = dtype - self.vae.to(dtype=dtype) + #self.vae.to(dtype=dtype) self.model.to(dtype=dtype) self.conditioner.to(dtype=dtype) @@ -359,7 +357,10 @@ class Hunyuan3DDiTPipeline: return extra_step_kwargs def prepare_latents(self, batch_size, dtype, device, generator, latents=None): - shape = (batch_size, *self.vae.latent_shape) + #shape = (batch_size, *self.vae.latent_shape) + num_latents = 3072 + embed_dim = 64 + shape = (batch_size, num_latents, embed_dim) if isinstance(generator, list) and len(generator) != batch_size: raise ValueError( f"You have passed a list of generators of length {len(generator)}, but requested an effective batch" @@ -423,126 +424,126 @@ class Hunyuan3DDiTPipeline: assert emb.shape == (w.shape[0], embedding_dim) return emb - @torch.no_grad() - def __call__( - self, - image: Union[str, List[str], Image.Image] = None, - num_inference_steps: int = 50, - timesteps: List[int] = None, - sigmas: List[float] = None, - eta: float = 0.0, - guidance_scale: float = 7.5, - dual_guidance_scale: float = 10.5, - dual_guidance: bool = True, - generator=None, - box_v=1.01, - octree_resolution=384, - mc_level=-1 / 512, - num_chunks=8000, - mc_algo='mc', - output_type: Optional[str] = "trimesh", - enable_pbar=True, - **kwargs, - ) -> List[List[trimesh.Trimesh]]: - callback = kwargs.pop("callback", None) - callback_steps = kwargs.pop("callback_steps", None) + # @torch.no_grad() + # def __call__( + # self, + # image: Union[str, List[str], Image.Image] = None, + # num_inference_steps: int = 50, + # timesteps: List[int] = None, + # sigmas: List[float] = None, + # eta: float = 0.0, + # guidance_scale: float = 7.5, + # dual_guidance_scale: float = 10.5, + # dual_guidance: bool = True, + # generator=None, + # box_v=1.01, + # octree_resolution=384, + # mc_level=-1 / 512, + # num_chunks=8000, + # mc_algo='mc', + # output_type: Optional[str] = "trimesh", + # enable_pbar=True, + # **kwargs, + # ) -> List[List[trimesh.Trimesh]]: + # callback = kwargs.pop("callback", None) + # callback_steps = kwargs.pop("callback_steps", None) - device = self.main_device - dtype = self.dtype - do_classifier_free_guidance = guidance_scale >= 0 and \ - getattr(self.model, 'guidance_cond_proj_dim', None) is None - dual_guidance = dual_guidance_scale >= 0 and dual_guidance + # device = self.main_device + # dtype = self.dtype + # do_classifier_free_guidance = guidance_scale >= 0 and \ + # getattr(self.model, 'guidance_cond_proj_dim', None) is None + # dual_guidance = dual_guidance_scale >= 0 and dual_guidance - image, mask = self.prepare_image(image) - cond = self.encode_cond(image=image, - mask=mask, - do_classifier_free_guidance=do_classifier_free_guidance, - dual_guidance=dual_guidance) - batch_size = image.shape[0] + # image, mask = self.prepare_image(image) + # cond = self.encode_cond(image=image, + # mask=mask, + # do_classifier_free_guidance=do_classifier_free_guidance, + # dual_guidance=dual_guidance) + # batch_size = image.shape[0] - t_dtype = torch.long - timesteps, num_inference_steps = retrieve_timesteps( - self.scheduler, num_inference_steps, device, timesteps, sigmas) + # t_dtype = torch.long + # timesteps, num_inference_steps = retrieve_timesteps( + # self.scheduler, num_inference_steps, device, timesteps, sigmas) - latents = self.prepare_latents(batch_size, dtype, device, generator) - extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) + # latents = self.prepare_latents(batch_size, dtype, device, generator) + # extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) - guidance_cond = None - if getattr(self.model, 'guidance_cond_proj_dim', None) is not None: - print('Using lcm guidance scale') - guidance_scale_tensor = torch.tensor(guidance_scale - 1).repeat(batch_size) - guidance_cond = self.get_guidance_scale_embedding( - guidance_scale_tensor, embedding_dim=self.model.guidance_cond_proj_dim - ).to(device=device, dtype=latents.dtype) + # guidance_cond = None + # if getattr(self.model, 'guidance_cond_proj_dim', None) is not None: + # print('Using lcm guidance scale') + # guidance_scale_tensor = torch.tensor(guidance_scale - 1).repeat(batch_size) + # guidance_cond = self.get_guidance_scale_embedding( + # guidance_scale_tensor, embedding_dim=self.model.guidance_cond_proj_dim + # ).to(device=device, dtype=latents.dtype) - comfy_pbar = ProgressBar(num_inference_steps) + # comfy_pbar = ProgressBar(num_inference_steps) - self.model.to(device) - for i, t in enumerate(tqdm(timesteps, disable=not enable_pbar, desc="Diffusion Sampling:", leave=False)): - # expand the latents if we are doing classifier free guidance - if do_classifier_free_guidance: - latent_model_input = torch.cat([latents] * (3 if dual_guidance else 2)) - else: - latent_model_input = latents - latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + # self.model.to(device) + # for i, t in enumerate(tqdm(timesteps, disable=not enable_pbar, desc="Diffusion Sampling:", leave=False)): + # # expand the latents if we are doing classifier free guidance + # if do_classifier_free_guidance: + # latent_model_input = torch.cat([latents] * (3 if dual_guidance else 2)) + # else: + # latent_model_input = latents + # latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) - # predict the noise residual - timestep_tensor = torch.tensor([t], dtype=t_dtype, device=device) - timestep_tensor = timestep_tensor.expand(latent_model_input.shape[0]) - noise_pred = self.model(latent_model_input, timestep_tensor, cond, guidance_cond=guidance_cond) + # # predict the noise residual + # timestep_tensor = torch.tensor([t], dtype=t_dtype, device=device) + # timestep_tensor = timestep_tensor.expand(latent_model_input.shape[0]) + # noise_pred = self.model(latent_model_input, timestep_tensor, cond, guidance_cond=guidance_cond) - # no drop, drop clip, all drop - if do_classifier_free_guidance: - if dual_guidance: - noise_pred_clip, noise_pred_dino, noise_pred_uncond = noise_pred.chunk(3) - noise_pred = ( - noise_pred_uncond - + guidance_scale * (noise_pred_clip - noise_pred_dino) - + dual_guidance_scale * (noise_pred_dino - noise_pred_uncond) - ) - else: - noise_pred_cond, noise_pred_uncond = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_cond - noise_pred_uncond) + # # no drop, drop clip, all drop + # if do_classifier_free_guidance: + # if dual_guidance: + # noise_pred_clip, noise_pred_dino, noise_pred_uncond = noise_pred.chunk(3) + # noise_pred = ( + # noise_pred_uncond + # + guidance_scale * (noise_pred_clip - noise_pred_dino) + # + dual_guidance_scale * (noise_pred_dino - noise_pred_uncond) + # ) + # else: + # noise_pred_cond, noise_pred_uncond = noise_pred.chunk(2) + # noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_cond - noise_pred_uncond) - # compute the previous noisy sample x_t -> x_t-1 - outputs = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs) - latents = outputs.prev_sample + # # compute the previous noisy sample x_t -> x_t-1 + # outputs = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs) + # latents = outputs.prev_sample - comfy_pbar.update(1) + # comfy_pbar.update(1) - if callback is not None and i % callback_steps == 0: - step_idx = i // getattr(self.scheduler, "order", 1) - callback(step_idx, t, outputs) - self.model.to(self.offload_device) - mm.soft_empty_cache() + # if callback is not None and i % callback_steps == 0: + # step_idx = i // getattr(self.scheduler, "order", 1) + # callback(step_idx, t, outputs) + # self.model.to(self.offload_device) + # mm.soft_empty_cache() - return self._export( - latents, - output_type, - box_v, mc_level, num_chunks, octree_resolution, mc_algo, - ) + # return self._export( + # latents, + # output_type, + # box_v, mc_level, num_chunks, octree_resolution, mc_algo, + # ) - def _export(self, latents, output_type, box_v, mc_level, num_chunks, octree_resolution, mc_algo): - if not output_type == "latent": - self.vae.to(self.main_device) - latents = 1. / self.vae.scale_factor * latents - latents = self.vae(latents) - outputs = self.vae.latents2mesh( - latents, - bounds=box_v, - mc_level=mc_level, - num_chunks=num_chunks, - octree_resolution=octree_resolution, - mc_algo=mc_algo, - ) - self.vae.to(self.offload_device) - else: - outputs = latents + # def _export(self, latents, output_type, box_v, mc_level, num_chunks, octree_resolution, mc_algo): + # if not output_type == "latent": + # self.vae.to(self.main_device) + # latents = 1. / self.vae.scale_factor * latents + # latents = self.vae(latents) + # outputs = self.vae.latents2mesh( + # latents, + # bounds=box_v, + # mc_level=mc_level, + # num_chunks=num_chunks, + # octree_resolution=octree_resolution, + # mc_algo=mc_algo, + # ) + # self.vae.to(self.offload_device) + # else: + # outputs = latents - if output_type == 'trimesh': - outputs = export_to_trimesh(outputs) + # if output_type == 'trimesh': + # outputs = export_to_trimesh(outputs) - return outputs + # return outputs class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline): @@ -555,15 +556,15 @@ class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline): num_inference_steps: int = 50, timesteps: List[int] = None, sigmas: List[float] = None, - eta: float = 0.0, + #eta: float = 0.0, guidance_scale: float = 7.5, generator=None, - box_v=1.01, - octree_resolution=384, - mc_level=0.0, - mc_algo='mc', - num_chunks=8000, - output_type: Optional[str] = "trimesh", + # box_v=1.01, + # octree_resolution=384, + # mc_level=0.0, + # mc_algo='mc', + # num_chunks=8000, + # output_type: Optional[str] = "trimesh", enable_pbar=True, **kwargs, ) -> List[List[trimesh.Trimesh]]: @@ -628,9 +629,10 @@ class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline): step_idx = i // getattr(self.scheduler, "order", 1) callback(step_idx, t, outputs) comfy_pbar.update(1) - - return self._export( - latents, - output_type, - box_v, mc_level, num_chunks, octree_resolution, mc_algo, - ) + print("latents shape: ", latents.shape) + return latents + # return self._export( + # latents, + # output_type, + # box_v, mc_level, num_chunks, octree_resolution, mc_algo, + # ) diff --git a/nodes.py b/nodes.py index ce4abbb..3d726d9 100644 --- a/nodes.py +++ b/nodes.py @@ -77,8 +77,8 @@ class Hy3DModelLoader: } } - RETURN_TYPES = ("HY3DMODEL",) - RETURN_NAMES = ("pipeline", ) + RETURN_TYPES = ("HY3DMODEL", "HY3DVAE") + RETURN_NAMES = ("pipeline", "vae") FUNCTION = "loadmodel" CATEGORY = "Hunyuan3DWrapper" @@ -88,7 +88,7 @@ class Hy3DModelLoader: config_path = os.path.join(script_directory, "configs", "dit_config.yaml") model_path = folder_paths.get_full_path("diffusion_models", model) - pipe = Hunyuan3DDiTFlowMatchingPipeline.from_single_file( + pipe, vae = Hunyuan3DDiTFlowMatchingPipeline.from_single_file( ckpt_path=model_path, config_path=config_path, use_safetensors=True, @@ -97,7 +97,7 @@ class Hy3DModelLoader: compile_args=compile_args, attention_mode=attention_mode) - return (pipe,) + return (pipe, vae,) class DownloadAndLoadHy3DDelightModel: @classmethod @@ -676,7 +676,6 @@ class Hy3DGenerateMesh: "required": { "pipeline": ("HY3DMODEL",), "image": ("IMAGE", ), - "octree_resolution": ("INT", {"default": 256, "min": 64, "max": 4096, "step": 16}), "guidance_scale": ("FLOAT", {"default": 5.5, "min": 0.0, "max": 100.0, "step": 0.01}), "steps": ("INT", {"default": 30, "min": 1}), "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), @@ -686,12 +685,12 @@ class Hy3DGenerateMesh: } } - RETURN_TYPES = ("HY3DMESH",) - RETURN_NAMES = ("mesh",) + RETURN_TYPES = ("HY3DLATENT",) + RETURN_NAMES = ("latents",) FUNCTION = "process" CATEGORY = "Hunyuan3DWrapper" - def process(self, pipeline, image, steps, guidance_scale, octree_resolution, seed, mask=None): + def process(self, pipeline, image, steps, guidance_scale, seed, mask=None): device = mm.get_torch_device() offload_device = mm.unet_offload_device() @@ -709,16 +708,12 @@ class Hy3DGenerateMesh: except: pass - mesh = pipeline( + latents = pipeline( image=image, mask=mask, num_inference_steps=steps, - mc_algo='mc', guidance_scale=guidance_scale, - octree_resolution=octree_resolution, - generator=torch.manual_seed(seed))[0] - - log.info(f"Generated mesh with {mesh.vertices.shape[0]} vertices and {mesh.faces.shape[0]} faces") + generator=torch.manual_seed(seed)) print_memory(device) try: @@ -728,7 +723,51 @@ class Hy3DGenerateMesh: pipeline.to(offload_device) - return (mesh, ) + return (latents, ) + +class Hy3DVAEDecode: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "vae": ("HY3DVAE",), + "latents": ("HY3DLATENT", ), + "box_v": ("FLOAT", {"default": 1.01, "min": -10.0, "max": 10.0, "step": 0.001}), + "octree_resolution": ("INT", {"default": 384, "min": 64, "max": 4096, "step": 16}), + "num_chunks": ("INT", {"default": 8000, "min": 1, "max": 10000000, "step": 1}), + "mc_level": ("FLOAT", {"default": 0, "min": -1.0, "max": 1.0, "step": 0.0001}), + "mc_algo": (["mc", "dmc"], {"default": "mc"}), + }, + } + + RETURN_TYPES = ("HY3DMESH",) + RETURN_NAMES = ("mesh",) + FUNCTION = "process" + CATEGORY = "Hunyuan3DWrapper" + + def process(self, vae, latents, box_v, octree_resolution, mc_level, num_chunks, mc_algo): + device = mm.get_torch_device() + offload_device = mm.unet_offload_device() + + vae.to(device) + latents = 1. / vae.scale_factor * latents + latents = vae(latents) + + outputs = vae.latents2mesh( + latents, + bounds=box_v, + mc_level=mc_level, + num_chunks=num_chunks, + octree_resolution=octree_resolution, + mc_algo=mc_algo, + )[0] + vae.to(offload_device) + + outputs.mesh_f = outputs.mesh_f[:, ::-1] + mesh_output = trimesh.Trimesh(outputs.mesh_v, outputs.mesh_f) + log.info(f"Decoded mesh with {mesh_output.vertices.shape[0]} vertices and {mesh_output.faces.shape[0]} faces") + + return (mesh_output, ) class Hy3DPostprocessMesh: @classmethod @@ -918,7 +957,8 @@ NODE_CLASS_MAPPINGS = { "Hy3DRenderMultiViewDepth": Hy3DRenderMultiViewDepth, "Hy3DGetMeshPBRTextures": Hy3DGetMeshPBRTextures, "Hy3DSetMeshPBRTextures": Hy3DSetMeshPBRTextures, - "Hy3DSetMeshPBRAttributes": Hy3DSetMeshPBRAttributes + "Hy3DSetMeshPBRAttributes": Hy3DSetMeshPBRAttributes, + "Hy3DVAEDecode": Hy3DVAEDecode } NODE_DISPLAY_NAME_MAPPINGS = { "Hy3DModelLoader": "Hy3DModelLoader", @@ -941,5 +981,6 @@ NODE_DISPLAY_NAME_MAPPINGS = { "Hy3DRenderMultiViewDepth": "Hy3D Render MultiView Depth", "Hy3DGetMeshPBRTextures": "Hy3D Get Mesh PBR Textures", "Hy3DSetMeshPBRTextures": "Hy3D Set Mesh PBR Textures", - "Hy3DSetMeshPBRAttributes": "Hy3D Set Mesh PBR Attributes" + "Hy3DSetMeshPBRAttributes": "Hy3D Set Mesh PBR Attributes", + "Hy3DVAEDecode": "Hy3D VAE Decode" }