Spaces:

mike23415
/

Rightlight

Runtime error

App Files Files Community

mike23415 commited on 10 days ago

Commit

06be9c8

verified ·

1 Parent(s): ac27738

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -44

app.py CHANGED Viewed

@@ -4,32 +4,30 @@ import gradio as gr
 import numpy as np
 from PIL import Image
 import tempfile
-from transformers import AutoImageProcessor, AutoModel
 from tqdm.auto import tqdm
 # Check if CUDA is available, otherwise use CPU
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
-# Initialize the model
 print("Loading Shap-E model...")
-model_id = "openai/shap-e-img2img"
-processor = AutoImageProcessor.from_pretrained(model_id)
-model = AutoModel.from_pretrained(model_id).to(device)
 def preprocess_image(image):
-    # Resize and center crop to 256x256
-    width, height = image.size
-    size = min(width, height)
-    left = (width - size) // 2
-    top = (height - size) // 2
-    right = left + size
-    bottom = top + size
-    image = image.crop((left, top, right, bottom))
     image = image.resize((256, 256))
     return image
-def generate_3d_mesh(image, guidance_scale=15.0, num_inference_steps=64):
     """
     Convert a single image to a 3D model using Shap-E
     """
@@ -38,42 +36,62 @@ def generate_3d_mesh(image, guidance_scale=15.0, num_inference_steps=64):
     try:
         # Preprocess image
-        image = preprocess_image(image)
-        # Process image
-        inputs = processor(images=image, return_tensors="pt").to(device)
         # Generate latents
-        with torch.no_grad():
-            latents = model.encode(inputs["pixel_values"]).latents
-        # Decode the latents
-        with torch.no_grad():
-            with tqdm(total=num_inference_steps) as progress_bar:
-                def callback(i, t, latents):
-                    progress_bar.update(1)
-                sample = model.decode(
-                    latents,
-                    guidance_scale=guidance_scale,
-                    num_inference_steps=num_inference_steps,
-                    callback=callback
-                )
-        # Get mesh
-        obj_mesh = sample.get_mesh()
-        glb_mesh = sample.get_glb()
-        # Save mesh to files
         with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as obj_file:
             obj_path = obj_file.name
-            obj_mesh.write_obj(obj_path)
-        with tempfile.NamedTemporaryFile(suffix='.glb', delete=False) as glb_file:
-            glb_path = glb_file.name
-            glb_file.write(glb_mesh)
-        return [obj_path, glb_path], "3D model generated successfully!"
     except Exception as e:
         return None, f"Error: {str(e)}"
@@ -82,7 +100,7 @@ def process_image(image, guidance_scale, num_steps):
         if image is None:
             return None, None, "Please upload an image first."
-        results, message = generate_3d_mesh(
             image,
             guidance_scale=guidance_scale,
             num_inference_steps=num_steps
@@ -109,13 +127,13 @@ with gr.Blocks(title="Image to 3D Model Converter") as demo:
         with gr.Column(scale=1):
             obj_file = gr.File(label="OBJ File (for editing)")
-            glb_file = gr.File(label="GLB File (for Unity)")
             output_message = gr.Textbox(label="Output Message")
     submit_btn.click(
         fn=process_image,
         inputs=[input_image, guidance, num_steps],
-        outputs=[obj_file, glb_file, output_message]
     )
 # Launch the app

 import numpy as np
 from PIL import Image
 import tempfile
 from tqdm.auto import tqdm
 # Check if CUDA is available, otherwise use CPU
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
+# Import Shape-E related modules after installing them
+print("Loading necessary modules...")
+from shap_e.diffusion.sample import sample_latents
+from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
+from shap_e.models.download import load_model, load_config
+from shap_e.util.image_util import load_image
+from shap_e.util.notebooks import create_pan_cameras, decode_latent_mesh
 print("Loading Shap-E model...")
+xm = load_model('transmitter', device=device)
+diffusion = diffusion_from_config(load_config('diffusion'))
 def preprocess_image(image):
+    # Resize to match expected input size
     image = image.resize((256, 256))
     return image
+def image_to_3d(image, guidance_scale=15.0, num_inference_steps=64):
     """
     Convert a single image to a 3D model using Shap-E
     """
     try:
         # Preprocess image
+        processed_image = preprocess_image(image)
+        # Convert PIL image to Shap-E format
+        shap_e_image = load_image(processed_image)
         # Generate latents
+        latents = sample_latents(
+            batch_size=1,
+            model=xm,
+            diffusion=diffusion,
+            guidance_scale=guidance_scale,
+            model_kwargs=dict(images=[shap_e_image]),
+            progress=True,
+            clip_denoised=True,
+            use_fp16=device.type == 'cuda',
+            use_karras=True,
+            karras_steps=num_inference_steps,
+            sigma_min=1e-3,
+            sigma_max=160,
+            s_churn=0,
+        )
+        # Create mesh
+        render_mode = 'nerf' # you can also use 'stf' for faster rendering
+        mesh = decode_latent_mesh(xm, latents[0], render_mode).tri_mesh()
+        # Save mesh to OBJ file
         with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as obj_file:
             obj_path = obj_file.name
+            with open(obj_path, 'w') as f:
+                for v in mesh.verts:
+                    f.write(f'v {v[0]} {v[1]} {v[2]}\n')
+                for face in mesh.faces:
+                    f.write(f'f {face[0]+1} {face[1]+1} {face[2]+1}\n')
+        # Save mesh to PLY file for better Unity compatibility
+        with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as ply_file:
+            ply_path = ply_file.name
+            with open(ply_path, 'w') as f:
+                f.write('ply\n')
+                f.write('format ascii 1.0\n')
+                f.write(f'element vertex {len(mesh.verts)}\n')
+                f.write('property float x\n')
+                f.write('property float y\n')
+                f.write('property float z\n')
+                f.write(f'element face {len(mesh.faces)}\n')
+                f.write('property list uchar int vertex_indices\n')
+                f.write('end_header\n')
+                for v in mesh.verts:
+                    f.write(f'{v[0]} {v[1]} {v[2]}\n')
+                for face in mesh.faces:
+                    f.write(f'3 {face[0]} {face[1]} {face[2]}\n')
+        return [obj_path, ply_path], "3D model generated successfully!"
     except Exception as e:
         return None, f"Error: {str(e)}"
         if image is None:
             return None, None, "Please upload an image first."
+        results, message = image_to_3d(
             image,
             guidance_scale=guidance_scale,
             num_inference_steps=num_steps
         with gr.Column(scale=1):
             obj_file = gr.File(label="OBJ File (for editing)")
+            ply_file = gr.File(label="PLY File (for Unity)")
             output_message = gr.Textbox(label="Output Message")
     submit_btn.click(
         fn=process_image,
         inputs=[input_image, guidance, num_steps],
+        outputs=[obj_file, ply_file, output_message]
     )
 # Launch the app