mike23415 commited on
Commit
06be9c8
·
verified ·
1 Parent(s): ac27738

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -44
app.py CHANGED
@@ -4,32 +4,30 @@ import gradio as gr
4
  import numpy as np
5
  from PIL import Image
6
  import tempfile
7
- from transformers import AutoImageProcessor, AutoModel
8
  from tqdm.auto import tqdm
9
 
10
  # Check if CUDA is available, otherwise use CPU
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
  print(f"Using device: {device}")
13
 
14
- # Initialize the model
 
 
 
 
 
 
 
15
  print("Loading Shap-E model...")
16
- model_id = "openai/shap-e-img2img"
17
- processor = AutoImageProcessor.from_pretrained(model_id)
18
- model = AutoModel.from_pretrained(model_id).to(device)
19
 
20
  def preprocess_image(image):
21
- # Resize and center crop to 256x256
22
- width, height = image.size
23
- size = min(width, height)
24
- left = (width - size) // 2
25
- top = (height - size) // 2
26
- right = left + size
27
- bottom = top + size
28
- image = image.crop((left, top, right, bottom))
29
  image = image.resize((256, 256))
30
  return image
31
 
32
- def generate_3d_mesh(image, guidance_scale=15.0, num_inference_steps=64):
33
  """
34
  Convert a single image to a 3D model using Shap-E
35
  """
@@ -38,42 +36,62 @@ def generate_3d_mesh(image, guidance_scale=15.0, num_inference_steps=64):
38
 
39
  try:
40
  # Preprocess image
41
- image = preprocess_image(image)
42
 
43
- # Process image
44
- inputs = processor(images=image, return_tensors="pt").to(device)
45
 
46
  # Generate latents
47
- with torch.no_grad():
48
- latents = model.encode(inputs["pixel_values"]).latents
49
-
50
- # Decode the latents
51
- with torch.no_grad():
52
- with tqdm(total=num_inference_steps) as progress_bar:
53
- def callback(i, t, latents):
54
- progress_bar.update(1)
55
-
56
- sample = model.decode(
57
- latents,
58
- guidance_scale=guidance_scale,
59
- num_inference_steps=num_inference_steps,
60
- callback=callback
61
- )
62
 
63
- # Get mesh
64
- obj_mesh = sample.get_mesh()
65
- glb_mesh = sample.get_glb()
66
 
67
- # Save mesh to files
68
  with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as obj_file:
69
  obj_path = obj_file.name
70
- obj_mesh.write_obj(obj_path)
 
 
 
 
71
 
72
- with tempfile.NamedTemporaryFile(suffix='.glb', delete=False) as glb_file:
73
- glb_path = glb_file.name
74
- glb_file.write(glb_mesh)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- return [obj_path, glb_path], "3D model generated successfully!"
77
  except Exception as e:
78
  return None, f"Error: {str(e)}"
79
 
@@ -82,7 +100,7 @@ def process_image(image, guidance_scale, num_steps):
82
  if image is None:
83
  return None, None, "Please upload an image first."
84
 
85
- results, message = generate_3d_mesh(
86
  image,
87
  guidance_scale=guidance_scale,
88
  num_inference_steps=num_steps
@@ -109,13 +127,13 @@ with gr.Blocks(title="Image to 3D Model Converter") as demo:
109
 
110
  with gr.Column(scale=1):
111
  obj_file = gr.File(label="OBJ File (for editing)")
112
- glb_file = gr.File(label="GLB File (for Unity)")
113
  output_message = gr.Textbox(label="Output Message")
114
 
115
  submit_btn.click(
116
  fn=process_image,
117
  inputs=[input_image, guidance, num_steps],
118
- outputs=[obj_file, glb_file, output_message]
119
  )
120
 
121
  # Launch the app
 
4
  import numpy as np
5
  from PIL import Image
6
  import tempfile
 
7
  from tqdm.auto import tqdm
8
 
9
  # Check if CUDA is available, otherwise use CPU
10
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
  print(f"Using device: {device}")
12
 
13
+ # Import Shape-E related modules after installing them
14
+ print("Loading necessary modules...")
15
+ from shap_e.diffusion.sample import sample_latents
16
+ from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
17
+ from shap_e.models.download import load_model, load_config
18
+ from shap_e.util.image_util import load_image
19
+ from shap_e.util.notebooks import create_pan_cameras, decode_latent_mesh
20
+
21
  print("Loading Shap-E model...")
22
+ xm = load_model('transmitter', device=device)
23
+ diffusion = diffusion_from_config(load_config('diffusion'))
 
24
 
25
  def preprocess_image(image):
26
+ # Resize to match expected input size
 
 
 
 
 
 
 
27
  image = image.resize((256, 256))
28
  return image
29
 
30
+ def image_to_3d(image, guidance_scale=15.0, num_inference_steps=64):
31
  """
32
  Convert a single image to a 3D model using Shap-E
33
  """
 
36
 
37
  try:
38
  # Preprocess image
39
+ processed_image = preprocess_image(image)
40
 
41
+ # Convert PIL image to Shap-E format
42
+ shap_e_image = load_image(processed_image)
43
 
44
  # Generate latents
45
+ latents = sample_latents(
46
+ batch_size=1,
47
+ model=xm,
48
+ diffusion=diffusion,
49
+ guidance_scale=guidance_scale,
50
+ model_kwargs=dict(images=[shap_e_image]),
51
+ progress=True,
52
+ clip_denoised=True,
53
+ use_fp16=device.type == 'cuda',
54
+ use_karras=True,
55
+ karras_steps=num_inference_steps,
56
+ sigma_min=1e-3,
57
+ sigma_max=160,
58
+ s_churn=0,
59
+ )
60
 
61
+ # Create mesh
62
+ render_mode = 'nerf' # you can also use 'stf' for faster rendering
63
+ mesh = decode_latent_mesh(xm, latents[0], render_mode).tri_mesh()
64
 
65
+ # Save mesh to OBJ file
66
  with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as obj_file:
67
  obj_path = obj_file.name
68
+ with open(obj_path, 'w') as f:
69
+ for v in mesh.verts:
70
+ f.write(f'v {v[0]} {v[1]} {v[2]}\n')
71
+ for face in mesh.faces:
72
+ f.write(f'f {face[0]+1} {face[1]+1} {face[2]+1}\n')
73
 
74
+ # Save mesh to PLY file for better Unity compatibility
75
+ with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as ply_file:
76
+ ply_path = ply_file.name
77
+ with open(ply_path, 'w') as f:
78
+ f.write('ply\n')
79
+ f.write('format ascii 1.0\n')
80
+ f.write(f'element vertex {len(mesh.verts)}\n')
81
+ f.write('property float x\n')
82
+ f.write('property float y\n')
83
+ f.write('property float z\n')
84
+ f.write(f'element face {len(mesh.faces)}\n')
85
+ f.write('property list uchar int vertex_indices\n')
86
+ f.write('end_header\n')
87
+
88
+ for v in mesh.verts:
89
+ f.write(f'{v[0]} {v[1]} {v[2]}\n')
90
+
91
+ for face in mesh.faces:
92
+ f.write(f'3 {face[0]} {face[1]} {face[2]}\n')
93
 
94
+ return [obj_path, ply_path], "3D model generated successfully!"
95
  except Exception as e:
96
  return None, f"Error: {str(e)}"
97
 
 
100
  if image is None:
101
  return None, None, "Please upload an image first."
102
 
103
+ results, message = image_to_3d(
104
  image,
105
  guidance_scale=guidance_scale,
106
  num_inference_steps=num_steps
 
127
 
128
  with gr.Column(scale=1):
129
  obj_file = gr.File(label="OBJ File (for editing)")
130
+ ply_file = gr.File(label="PLY File (for Unity)")
131
  output_message = gr.Textbox(label="Output Message")
132
 
133
  submit_btn.click(
134
  fn=process_image,
135
  inputs=[input_image, guidance, num_steps],
136
+ outputs=[obj_file, ply_file, output_message]
137
  )
138
 
139
  # Launch the app