Spaces:

VAST-AI
/

DetailGen3D

Running on Zero

App Files Files Community

Aluren commited on 13 days ago

Commit

9d8c0ee

verified ·

1 Parent(s): b3b2a19

Update app.py

Browse files

Files changed (1) hide show

app.py +170 -105

app.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import os
 import random
 import tempfile
-from typing import Any, List
 import spaces
 import gradio as gr
 import numpy as np
 import torch
 from gradio_litmodel3d import LitModel3D
 from huggingface_hub import snapshot_download
 from PIL import Image
@@ -21,52 +22,113 @@ MAX_SEED = np.iinfo(np.int32).max
 TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tmp")
 DTYPE = torch.bfloat16
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-REPO_ID = "VAST-AI/DetailGen3D"
 MARKDOWN = """
 ## Generating geometry details guided by reference image with [DetailGen3D](https://detailgen3d.github.io/DetailGen3D/)
-1. Upload a detailed image of the frontal view and a coarse model. Then click "Run" to generate the refined result.
-2. If satisfied, download the result using the "Download GLB" button.
-3. Increase CFG strength for better image consistency.
 """
 EXAMPLES = [
-    [
-        "assets/image/100.png",
-        "assets/model/100.glb",
-        42,
-        False
-    ]
 ]
 os.makedirs(TMP_DIR, exist_ok=True)
 local_dir = "pretrained_weights/DetailGen3D"
 snapshot_download(repo_id=REPO_ID, local_dir=local_dir)
-pipeline = DetailGen3DPipeline.from_pretrained(local_dir).to(DEVICE, dtype=DTYPE)
 def load_mesh(mesh_path, num_pc=20480):
-    mesh = trimesh.load(mesh_path, force="mesh")
     center = mesh.bounding_box.centroid
     mesh.apply_translation(-center)
     scale = max(mesh.bounding_box.extents)
     mesh.apply_scale(1.9 / scale)
-    surface, face_indices = trimesh.sample.sample_surface(mesh, 1000000)
     normal = mesh.face_normals[face_indices]
     rng = np.random.default_rng()
     ind = rng.choice(surface.shape[0], num_pc, replace=False)
     surface = torch.FloatTensor(surface[ind])
     normal = torch.FloatTensor(normal[ind])
-    return torch.cat([surface, normal], dim=-1).unsqueeze(0).cuda()
 @torch.no_grad()
 @torch.autocast(device_type=DEVICE)
-def run_detailgen3d(pipeline, image, mesh, seed, num_inference_steps, guidance_scale):
     surface = load_mesh(mesh)
     batch_size = 1
-    # Grid generation
     box_min = np.array([-1.005, -1.005, -1.005])
     box_max = np.array([1.005, 1.005, 1.005])
     sampled_points, grid_size, bbox_size = generate_dense_grid_points(
@@ -75,27 +137,25 @@ def run_detailgen3d(pipeline, image, mesh, seed, num_inference_steps, guidance_s
     sampled_points = torch.FloatTensor(sampled_points).to(DEVICE, dtype=DTYPE)
     sampled_points = sampled_points.unsqueeze(0).repeat(batch_size, 1, 1)
-    # Pipeline execution
     sample = pipeline.vae.encode(surface).latent_dist.sample()
-    occ = pipeline(
-        image,
-        latents=sample,
-        sampled_points=sampled_points,
-        guidance_scale=guidance_scale,
-        noise_aug_level=0,
-        num_inference_steps=num_inference_steps
-    ).samples[0]
-    # Mesh processing
     grid_logits = occ.view(grid_size).cpu().numpy()
-    vertices, faces, normals, _ = measure.marching_cubes(grid_logits, 0, method="lewiner")
     vertices = vertices / grid_size * bbox_size + box_min
-    return trimesh.Trimesh(vertices.astype(np.float32), np.ascontiguousarray(faces))
 @spaces.GPU(duration=180)
 def run_refinement(
-    image_path: str,
-    mesh_path: str,
     seed: int,
     randomize_seed: bool = False,
     num_inference_steps: int = 50,
@@ -103,87 +163,92 @@ def run_refinement(
 ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
-    try:
-        # Validate inputs
-        if not os.path.exists(image_path):
-            raise ValueError(f"Image path {image_path} not found")
-        if not os.path.exists(mesh_path):
-            raise ValueError(f"Mesh path {mesh_path} not found")
-        image = Image.open(image_path).convert("RGB")
-        scene = run_detailgen3d(
-            pipeline,
-            image,
-            mesh_path,
-            seed,
-            num_inference_steps,
-            guidance_scale,
-        )
-        # Save temporary result
-        _, tmp_path = tempfile.mkstemp(suffix=".glb", prefix="detailgen3d_", dir=TMP_DIR)
-        scene.export(tmp_path)
-        return tmp_path, tmp_path, seed
-    finally:
-        torch.cuda.empty_cache()
-# Demo interface
 with gr.Blocks() as demo:
     gr.Markdown(MARKDOWN)
     with gr.Row():
         with gr.Column():
             with gr.Row():
-                image_input = gr.Image(
-                    label="Reference Image",
-                    type="filepath",
-                    sources=["upload", "clipboard"],
                 )
-                mesh_input = gr.Model3D(
-                    label="Input Model",
-                    camera_position=(90, 90, 3)
                 )
-            with gr.Accordion("Advanced Settings", open=False):
-                seed_input = gr.Slider(0, MAX_SEED, value=0, label="Seed")
-                randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
-                steps_input = gr.Slider(1, 100, value=50, step=1, label="Inference Steps")
-                cfg_scale = gr.Slider(0.0, 20.0, value=4.0, step=0.1, label="CFG Scale")
-            run_btn = gr.Button("Generate", variant="primary")
         with gr.Column():
-            model_output = LitModel3D(
-                label="Result Preview",
-                height=500,
-                camera_position=(90, 90, 3)
-            )
-            download_btn = gr.DownloadButton(
-                "Download GLB",
-                interactive=False
-            )
-    # Examples section
-    gr.Examples(
-        examples=EXAMPLES,
-        inputs=[image_input, mesh_input, seed_input, randomize_seed],
-        outputs=[model_output, download_btn, seed_input],
-        fn=run_refinement,
-        cache_examples=False,
-        label="Example Inputs"
-    )
-    # Event handling
-    run_btn.click(
         run_refinement,
-        inputs=[image_input, mesh_input, seed_input, randomize_seed, steps_input, cfg_scale],
-        outputs=[model_output, download_btn, seed_input]
-    ).then(
-        lambda: gr.DownloadButton(interactive=True),
-        outputs=[download_btn]
-    )
-demo.launch()

 import os
 import random
 import tempfile
+from typing import Any, List, Union
 import spaces
 import gradio as gr
 import numpy as np
 import torch
+# from gradio_image_prompter import ImagePrompter
 from gradio_litmodel3d import LitModel3D
 from huggingface_hub import snapshot_download
 from PIL import Image
 TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tmp")
 DTYPE = torch.bfloat16
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+REPO_ID = "VAST-AI/DetailGen3D" # 似乎还没有
 MARKDOWN = """
 ## Generating geometry details guided by reference image with [DetailGen3D](https://detailgen3d.github.io/DetailGen3D/)
+1. Upload a detailed image of the frontal view and a coarse model. Then clik "Run " to generate the refined result.
+2. If you find the generated 3D scene satisfactory, download it by clicking the "Download GLB" button.
+3. If you want the refine result to be more consistent with the image, please manually increase the CFG strength.
 """
 EXAMPLES = [
+    ["assets/image/100.png","assets/model/100.glb",42,False,]
 ]
+# EXAMPLES = [
+#     [
+#         # {
+#         #     "image": "assets/image/100.png",
+#         # },
+#         "assets/image/100.png",
+#         "assets/model/100.glb",
+#         42,
+#         False,
+#     ],
+    # [
+    #     {
+    #         "image": "assets/image/503d193a-1b9b-4685-b05f-00ac82f93d7b.png",
+    #     },
+    #     "assets/image/503d193a-1b9b-4685-b05f-00ac82f93d7b.png",
+    #     "assets/model/503d193a-1b9b-4685-b05f-00ac82f93d7b.glb",
+    #     42,
+    #     False,
+    # ],
+    # [
+    #     {
+    #         "image": "assets/image/34933195-9c2c-4271-8d31-a28bc5348b7a.png",
+    #     },
+    #     "assets/model/34933195-9c2c-4271-8d31-a28bc5348b7a.glb",
+    #     42,
+    #     False,
+    # ],
+    # [
+    #     {
+    #         "image": "assets/image/a5d09c66-1617-465c-aec9-431f48d9a7e1.png",
+    #     },
+    #     "assets/model/a5d09c66-1617-465c-aec9-431f48d9a7e1.glb",
+    #     42,
+    #     False,
+    # ],
+    # [
+    #     {
+    #         "image": "assets/image/cb7e6c4a-b4dd-483c-9789-3d4887ee7434.png",
+    #     },
+    #     "assets/model/cb7e6c4a-b4dd-483c-9789-3d4887ee7434.glb",
+    #     42,
+    #     False,
+    # ],
+    # [
+    #     {
+    #         "image": "assets/image/e799e6b4-3b47-40e0-befb-b156af8758ad.png",
+    #     },
+    #     "assets/model/instant3d/e799e6b4-3b47-40e0-befb-b156af8758ad.glb",
+    #     42,
+    #     False,
+    # ],
+# ]
 os.makedirs(TMP_DIR, exist_ok=True)
 local_dir = "pretrained_weights/DetailGen3D"
 snapshot_download(repo_id=REPO_ID, local_dir=local_dir)
+pipeline = DetailGen3DPipeline.from_pretrained(
+    local_dir
+).to(DEVICE, dtype=DTYPE)
 def load_mesh(mesh_path, num_pc=20480):
+    mesh = trimesh.load(mesh_path,force="mesh")
     center = mesh.bounding_box.centroid
     mesh.apply_translation(-center)
     scale = max(mesh.bounding_box.extents)
     mesh.apply_scale(1.9 / scale)
+    surface, face_indices = trimesh.sample.sample_surface(mesh, 1000000,)
     normal = mesh.face_normals[face_indices]
     rng = np.random.default_rng()
     ind = rng.choice(surface.shape[0], num_pc, replace=False)
     surface = torch.FloatTensor(surface[ind])
     normal = torch.FloatTensor(normal[ind])
+    surface = torch.cat([surface, normal], dim=-1).unsqueeze(0).cuda()
+    return surface
 @torch.no_grad()
 @torch.autocast(device_type=DEVICE)
+def run_detailgen3d(
+    pipeline,
+    image,
+    mesh,
+    seed,
+    num_inference_steps,
+    guidance_scale,
+):
     surface = load_mesh(mesh)
     batch_size = 1
+    # sample query points for decoding
     box_min = np.array([-1.005, -1.005, -1.005])
     box_max = np.array([1.005, 1.005, 1.005])
     sampled_points, grid_size, bbox_size = generate_dense_grid_points(
     sampled_points = torch.FloatTensor(sampled_points).to(DEVICE, dtype=DTYPE)
     sampled_points = sampled_points.unsqueeze(0).repeat(batch_size, 1, 1)
+    # inference pipeline
     sample = pipeline.vae.encode(surface).latent_dist.sample()
+    occ = pipeline(image, latents=sample, sampled_points=sampled_points, guidance_scale=guidance_scale, noise_aug_level=0, num_inference_steps=num_inference_steps).samples[0]
+    # marching cubes
     grid_logits = occ.view(grid_size).cpu().numpy()
+    vertices, faces, normals, _ = measure.marching_cubes(
+        grid_logits, 0, method="lewiner"
+    )
     vertices = vertices / grid_size * bbox_size + box_min
+    mesh = trimesh.Trimesh(vertices.astype(np.float32), np.ascontiguousarray(faces))
+    return mesh
 @spaces.GPU(duration=180)
+@torch.no_grad()
+@torch.autocast(device_type=DEVICE)
 def run_refinement(
+    rgb_image: Any,
+    mesh: Any,
     seed: int,
     randomize_seed: bool = False,
     num_inference_steps: int = 50,
 ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
+    # print("rgb_image", rgb_image)
+    # print("mesh", rgb_image)
+    # if not isinstance(rgb_image, Image.Image) and "image" in rgb_image:
+    #     rgb_image = Image.open(rgb_image["image"]).convert("RGB")
+    rgb_image = Image.open(rgb_image).convert("RGB")
+    scene = run_detailgen3d(
+        pipeline,
+        rgb_image,
+        mesh,
+        seed,
+        num_inference_steps,
+        guidance_scale,
+    )
+    _, tmp_path = tempfile.mkstemp(suffix=".glb", prefix="detailgen3d_", dir=TMP_DIR)
+    scene.export(tmp_path)
+    torch.cuda.empty_cache()
+    return tmp_path, tmp_path, seed
+# Demo
 with gr.Blocks() as demo:
     gr.Markdown(MARKDOWN)
     with gr.Row():
         with gr.Column():
             with gr.Row():
+                # image_prompts = ImagePrompter(label="Input Image", type="pil")
+                image_prompts = gr.Image(label="Example Image", type="pil")
+                mesh = gr.Model3D(label="Input Coarse Model",camera_position=(90,90,3))
+            with gr.Accordion("Generation Settings", open=False):
+                seed = gr.Slider(
+                    label="Seed",
+                    minimum=0,
+                    maximum=MAX_SEED,
+                    step=1,
+                    value=0,
                 )
+                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+                num_inference_steps = gr.Slider(
+                    label="Number of inference steps",
+                    minimum=1,
+                    maximum=50,
+                    step=1,
+                    value=50,
                 )
+                guidance_scale = gr.Slider(
+                    label="CFG scale",
+                    minimum=0.0,
+                    maximum=50.0,
+                    step=0.1,
+                    value=4.0,
+                )
+            gen_button = gr.Button("Generate details", variant="primary")
         with gr.Column():
+            model_output = LitModel3D(label="Generated GLB", exposure=1.0, height=500,camera_position=(90,90,3))
+            download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
+    with gr.Row():
+        gr.Examples(
+            examples=EXAMPLES,
+            fn=run_refinement,
+            inputs=[image_prompts, mesh, seed, randomize_seed],
+            outputs=[model_output, download_glb, seed],
+            cache_examples=False,
+        )
+    gen_button.click(
         run_refinement,
+        inputs=[
+            image_prompts,
+            mesh,
+            seed,
+            randomize_seed,
+            num_inference_steps,
+            guidance_scale,
+        ],
+        outputs=[model_output, download_glb, seed],
+    ).then(lambda: gr.Button(interactive=True), outputs=[download_glb])
+demo.launch()