mike23415 commited on
Commit
829dfd4
·
verified ·
1 Parent(s): 222e07d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -81
app.py CHANGED
@@ -1,98 +1,97 @@
1
  import os
2
- import gradio as gr
3
  import torch
 
4
  import numpy as np
5
  from PIL import Image
6
- import trimesh
7
- from diffusers import Zero123Pipeline
8
  import tempfile
 
 
9
 
10
  # Check if CUDA is available, otherwise use CPU
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
  print(f"Using device: {device}")
13
 
14
- # Initialize the pipeline
15
- pipe = Zero123Pipeline.from_pretrained(
16
- "bennyguo/zero123-xl-diffusers",
17
- torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
18
- ).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- def image_to_3d(input_image, num_inference_steps=75, guidance_scale=3.0):
21
  """
22
- Convert a single image to a 3D model
23
  """
24
- # Preprocess image
25
- if input_image is None:
26
- return None
27
-
28
- input_image = input_image.convert("RGB").resize((256, 256))
29
-
30
- # Generate multiple views using Zero123
31
- images = []
32
-
33
- # Generate views from different angles
34
- for elevation in [0, 30]:
35
- for azimuth in [0, 90, 180, 270]:
36
- print(f"Generating view: elevation={elevation}, azimuth={azimuth}")
37
- with torch.no_grad():
38
- image = pipe(
39
- image=input_image,
40
- elevation=elevation,
41
- azimuth=azimuth,
42
- num_inference_steps=num_inference_steps,
43
- guidance_scale=guidance_scale,
44
- ).images[0]
45
- images.append(np.array(image))
46
 
47
- # Create point cloud from multiple views
48
- # This is a simplified approach - in production you might want to use a more sophisticated method
49
- points = []
50
- for i, img in enumerate(images):
51
- # Extract depth information (simplified approach)
52
- gray = np.mean(img, axis=2)
53
- # Sample points from the image
54
- h, w = gray.shape
55
- for y in range(0, h, 4):
56
- for x in range(0, w, 4):
57
- depth = gray[y, x] / 255.0 # Normalize depth
58
-
59
- # Convert to 3D point based on view angle
60
- angle_idx = i % 4
61
- elevation = 0 if i < 4 else 30
62
- azimuth = angle_idx * 90
63
-
64
- # Convert to radians
65
- elevation_rad = elevation * np.pi / 180
66
- azimuth_rad = azimuth * np.pi / 180
67
-
68
- # Calculate 3D position based on spherical coordinates
69
- z = depth * np.cos(elevation_rad) * np.cos(azimuth_rad)
70
- x = depth * np.cos(elevation_rad) * np.sin(azimuth_rad)
71
- y = depth * np.sin(elevation_rad)
72
 
73
- points.append([x, y, z])
74
-
75
- # Create a point cloud
76
- point_cloud = np.array(points)
77
-
78
- # Save point cloud to OBJ file
79
- with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as tmp_file:
80
- mesh = trimesh.points.PointCloud(point_cloud)
81
- mesh.export(tmp_file.name)
 
 
 
 
 
 
82
 
83
- # Also export as PLY for better compatibility
84
- ply_path = tmp_file.name.replace('.obj', '.ply')
85
- mesh.export(ply_path)
86
 
87
- return [tmp_file.name, ply_path]
 
 
88
 
89
- def process_image(image, num_steps, guidance):
90
  try:
91
- model_paths = image_to_3d(image, num_inference_steps=num_steps, guidance_scale=guidance)
92
- if model_paths:
93
- return model_paths[0], model_paths[1], "3D model generated successfully!"
 
 
 
 
 
 
 
 
94
  else:
95
- return None, None, "Failed to process the image."
96
  except Exception as e:
97
  return None, None, f"Error: {str(e)}"
98
 
@@ -104,19 +103,19 @@ with gr.Blocks(title="Image to 3D Model Converter") as demo:
104
  with gr.Row():
105
  with gr.Column(scale=1):
106
  input_image = gr.Image(type="pil", label="Input Image")
107
- num_steps = gr.Slider(minimum=20, maximum=100, value=75, step=5, label="Number of Inference Steps")
108
- guidance = gr.Slider(minimum=1.0, maximum=7.0, value=3.0, step=0.5, label="Guidance Scale")
109
  submit_btn = gr.Button("Convert to 3D")
110
 
111
  with gr.Column(scale=1):
112
- obj_file = gr.File(label="OBJ File")
113
- ply_file = gr.File(label="PLY File")
114
  output_message = gr.Textbox(label="Output Message")
115
 
116
  submit_btn.click(
117
  fn=process_image,
118
- inputs=[input_image, num_steps, guidance],
119
- outputs=[obj_file, ply_file, output_message]
120
  )
121
 
122
  # Launch the app
 
1
  import os
 
2
  import torch
3
+ import gradio as gr
4
  import numpy as np
5
  from PIL import Image
 
 
6
  import tempfile
7
+ from transformers import AutoImageProcessor, AutoModel
8
+ from tqdm.auto import tqdm
9
 
10
  # Check if CUDA is available, otherwise use CPU
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
  print(f"Using device: {device}")
13
 
14
+ # Initialize the model
15
+ print("Loading Shap-E model...")
16
+ model_id = "openai/shap-e-img2img"
17
+ processor = AutoImageProcessor.from_pretrained(model_id)
18
+ model = AutoModel.from_pretrained(model_id).to(device)
19
+
20
+ def preprocess_image(image):
21
+ # Resize and center crop to 256x256
22
+ width, height = image.size
23
+ size = min(width, height)
24
+ left = (width - size) // 2
25
+ top = (height - size) // 2
26
+ right = left + size
27
+ bottom = top + size
28
+ image = image.crop((left, top, right, bottom))
29
+ image = image.resize((256, 256))
30
+ return image
31
 
32
+ def generate_3d_mesh(image, guidance_scale=15.0, num_inference_steps=64):
33
  """
34
+ Convert a single image to a 3D model using Shap-E
35
  """
36
+ if image is None:
37
+ return None, "No image provided"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ try:
40
+ # Preprocess image
41
+ image = preprocess_image(image)
42
+
43
+ # Process image
44
+ inputs = processor(images=image, return_tensors="pt").to(device)
45
+
46
+ # Generate latents
47
+ with torch.no_grad():
48
+ latents = model.encode(inputs["pixel_values"]).latents
49
+
50
+ # Decode the latents
51
+ with torch.no_grad():
52
+ with tqdm(total=num_inference_steps) as progress_bar:
53
+ def callback(i, t, latents):
54
+ progress_bar.update(1)
 
 
 
 
 
 
 
 
 
55
 
56
+ sample = model.decode(
57
+ latents,
58
+ guidance_scale=guidance_scale,
59
+ num_inference_steps=num_inference_steps,
60
+ callback=callback
61
+ )
62
+
63
+ # Get mesh
64
+ obj_mesh = sample.get_mesh()
65
+ glb_mesh = sample.get_glb()
66
+
67
+ # Save mesh to files
68
+ with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as obj_file:
69
+ obj_path = obj_file.name
70
+ obj_mesh.write_obj(obj_path)
71
 
72
+ with tempfile.NamedTemporaryFile(suffix='.glb', delete=False) as glb_file:
73
+ glb_path = glb_file.name
74
+ glb_file.write(glb_mesh)
75
 
76
+ return [obj_path, glb_path], "3D model generated successfully!"
77
+ except Exception as e:
78
+ return None, f"Error: {str(e)}"
79
 
80
+ def process_image(image, guidance_scale, num_steps):
81
  try:
82
+ if image is None:
83
+ return None, None, "Please upload an image first."
84
+
85
+ results, message = generate_3d_mesh(
86
+ image,
87
+ guidance_scale=guidance_scale,
88
+ num_inference_steps=num_steps
89
+ )
90
+
91
+ if results:
92
+ return results[0], results[1], message
93
  else:
94
+ return None, None, message
95
  except Exception as e:
96
  return None, None, f"Error: {str(e)}"
97
 
 
103
  with gr.Row():
104
  with gr.Column(scale=1):
105
  input_image = gr.Image(type="pil", label="Input Image")
106
+ guidance = gr.Slider(minimum=5.0, maximum=20.0, value=15.0, step=0.5, label="Guidance Scale")
107
+ num_steps = gr.Slider(minimum=16, maximum=128, value=64, step=8, label="Number of Inference Steps")
108
  submit_btn = gr.Button("Convert to 3D")
109
 
110
  with gr.Column(scale=1):
111
+ obj_file = gr.File(label="OBJ File (for editing)")
112
+ glb_file = gr.File(label="GLB File (for Unity)")
113
  output_message = gr.Textbox(label="Output Message")
114
 
115
  submit_btn.click(
116
  fn=process_image,
117
+ inputs=[input_image, guidance, num_steps],
118
+ outputs=[obj_file, glb_file, output_message]
119
  )
120
 
121
  # Launch the app