Spaces:

mike23415
/

Rightlight

Runtime error

App Files Files Community

mike23415 commited on 8 days ago

Commit

829dfd4

verified ·

1 Parent(s): 222e07d

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -81

app.py CHANGED Viewed

@@ -1,98 +1,97 @@
 import os
-import gradio as gr
 import torch
 import numpy as np
 from PIL import Image
-import trimesh
-from diffusers import Zero123Pipeline
 import tempfile
 # Check if CUDA is available, otherwise use CPU
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
-# Initialize the pipeline
-pipe = Zero123Pipeline.from_pretrained(
-    "bennyguo/zero123-xl-diffusers",
-    torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
-).to(device)
-def image_to_3d(input_image, num_inference_steps=75, guidance_scale=3.0):
     """
-    Convert a single image to a 3D model
     """
-    # Preprocess image
-    if input_image is None:
-        return None
-    input_image = input_image.convert("RGB").resize((256, 256))
-    # Generate multiple views using Zero123
-    images = []
-    # Generate views from different angles
-    for elevation in [0, 30]:
-        for azimuth in [0, 90, 180, 270]:
-            print(f"Generating view: elevation={elevation}, azimuth={azimuth}")
-            with torch.no_grad():
-                image = pipe(
-                    image=input_image,
-                    elevation=elevation,
-                    azimuth=azimuth,
-                    num_inference_steps=num_inference_steps,
-                    guidance_scale=guidance_scale,
-                ).images[0]
-                images.append(np.array(image))
-    # Create point cloud from multiple views
-    # This is a simplified approach - in production you might want to use a more sophisticated method
-    points = []
-    for i, img in enumerate(images):
-        # Extract depth information (simplified approach)
-        gray = np.mean(img, axis=2)
-        # Sample points from the image
-        h, w = gray.shape
-        for y in range(0, h, 4):
-            for x in range(0, w, 4):
-                depth = gray[y, x] / 255.0  # Normalize depth
-                # Convert to 3D point based on view angle
-                angle_idx = i % 4
-                elevation = 0 if i < 4 else 30
-                azimuth = angle_idx * 90
-                # Convert to radians
-                elevation_rad = elevation * np.pi / 180
-                azimuth_rad = azimuth * np.pi / 180
-                # Calculate 3D position based on spherical coordinates
-                z = depth * np.cos(elevation_rad) * np.cos(azimuth_rad)
-                x = depth * np.cos(elevation_rad) * np.sin(azimuth_rad)
-                y = depth * np.sin(elevation_rad)
-                points.append([x, y, z])
-    # Create a point cloud
-    point_cloud = np.array(points)
-    # Save point cloud to OBJ file
-    with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as tmp_file:
-        mesh = trimesh.points.PointCloud(point_cloud)
-        mesh.export(tmp_file.name)
-        # Also export as PLY for better compatibility
-        ply_path = tmp_file.name.replace('.obj', '.ply')
-        mesh.export(ply_path)
-        return [tmp_file.name, ply_path]
-def process_image(image, num_steps, guidance):
     try:
-        model_paths = image_to_3d(image, num_inference_steps=num_steps, guidance_scale=guidance)
-        if model_paths:
-            return model_paths[0], model_paths[1], "3D model generated successfully!"
         else:
-            return None, None, "Failed to process the image."
     except Exception as e:
         return None, None, f"Error: {str(e)}"
@@ -104,19 +103,19 @@ with gr.Blocks(title="Image to 3D Model Converter") as demo:
     with gr.Row():
         with gr.Column(scale=1):
             input_image = gr.Image(type="pil", label="Input Image")
-            num_steps = gr.Slider(minimum=20, maximum=100, value=75, step=5, label="Number of Inference Steps")
-            guidance = gr.Slider(minimum=1.0, maximum=7.0, value=3.0, step=0.5, label="Guidance Scale")
             submit_btn = gr.Button("Convert to 3D")
         with gr.Column(scale=1):
-            obj_file = gr.File(label="OBJ File")
-            ply_file = gr.File(label="PLY File")
             output_message = gr.Textbox(label="Output Message")
     submit_btn.click(
         fn=process_image,
-        inputs=[input_image, num_steps, guidance],
-        outputs=[obj_file, ply_file, output_message]
     )
 # Launch the app

 import os
 import torch
+import gradio as gr
 import numpy as np
 from PIL import Image
 import tempfile
+from transformers import AutoImageProcessor, AutoModel
+from tqdm.auto import tqdm
 # Check if CUDA is available, otherwise use CPU
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
+# Initialize the model
+print("Loading Shap-E model...")
+model_id = "openai/shap-e-img2img"
+processor = AutoImageProcessor.from_pretrained(model_id)
+model = AutoModel.from_pretrained(model_id).to(device)
+def preprocess_image(image):
+    # Resize and center crop to 256x256
+    width, height = image.size
+    size = min(width, height)
+    left = (width - size) // 2
+    top = (height - size) // 2
+    right = left + size
+    bottom = top + size
+    image = image.crop((left, top, right, bottom))
+    image = image.resize((256, 256))
+    return image
+def generate_3d_mesh(image, guidance_scale=15.0, num_inference_steps=64):
     """
+    Convert a single image to a 3D model using Shap-E
     """
+    if image is None:
+        return None, "No image provided"
+    try:
+        # Preprocess image
+        image = preprocess_image(image)
+        # Process image
+        inputs = processor(images=image, return_tensors="pt").to(device)
+        # Generate latents
+        with torch.no_grad():
+            latents = model.encode(inputs["pixel_values"]).latents
+        # Decode the latents
+        with torch.no_grad():
+            with tqdm(total=num_inference_steps) as progress_bar:
+                def callback(i, t, latents):
+                    progress_bar.update(1)
+                sample = model.decode(
+                    latents,
+                    guidance_scale=guidance_scale,
+                    num_inference_steps=num_inference_steps,
+                    callback=callback
+                )
+        # Get mesh
+        obj_mesh = sample.get_mesh()
+        glb_mesh = sample.get_glb()
+        # Save mesh to files
+        with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as obj_file:
+            obj_path = obj_file.name
+            obj_mesh.write_obj(obj_path)
+        with tempfile.NamedTemporaryFile(suffix='.glb', delete=False) as glb_file:
+            glb_path = glb_file.name
+            glb_file.write(glb_mesh)
+        return [obj_path, glb_path], "3D model generated successfully!"
+    except Exception as e:
+        return None, f"Error: {str(e)}"
+def process_image(image, guidance_scale, num_steps):
     try:
+        if image is None:
+            return None, None, "Please upload an image first."
+        results, message = generate_3d_mesh(
+            image,
+            guidance_scale=guidance_scale,
+            num_inference_steps=num_steps
+        )
+        if results:
+            return results[0], results[1], message
         else:
+            return None, None, message
     except Exception as e:
         return None, None, f"Error: {str(e)}"
     with gr.Row():
         with gr.Column(scale=1):
             input_image = gr.Image(type="pil", label="Input Image")
+            guidance = gr.Slider(minimum=5.0, maximum=20.0, value=15.0, step=0.5, label="Guidance Scale")
+            num_steps = gr.Slider(minimum=16, maximum=128, value=64, step=8, label="Number of Inference Steps")
             submit_btn = gr.Button("Convert to 3D")
         with gr.Column(scale=1):
+            obj_file = gr.File(label="OBJ File (for editing)")
+            glb_file = gr.File(label="GLB File (for Unity)")
             output_message = gr.Textbox(label="Output Message")
     submit_btn.click(
         fn=process_image,
+        inputs=[input_image, guidance, num_steps],
+        outputs=[obj_file, glb_file, output_message]
     )
 # Launch the app