File size: 3,320 Bytes
6eb061b cfa1332 6eb061b 7a98e98 6eb061b cfa1332 6eb061b cfa1332 6eb061b cfa1332 6eb061b cfa1332 6eb061b cfa1332 6eb061b cfa1332 6eb061b cfa1332 6eb061b cfa1332 6eb061b cfa1332 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
#!/usr/bin/env python
import pathlib
import gradio as gr
import matplotlib as mpl
import numpy as np
import PIL.Image
import spaces
import torch
from gradio_imageslider import ImageSlider
from transformers import DepthProForDepthEstimation, DepthProImageProcessorFast
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
image_processor = DepthProImageProcessorFast.from_pretrained("apple/DepthPro-hf")
model = DepthProForDepthEstimation.from_pretrained("apple/DepthPro-hf").to(device)
# Changed from Spectral_r to gray colormap
cmap = mpl.colormaps.get_cmap("gray")
@spaces.GPU(duration=20)
@torch.inference_mode()
def run(image: PIL.Image.Image) -> tuple[tuple[PIL.Image.Image, PIL.Image.Image], str, str, str, str]:
inputs = image_processor(images=image, return_tensors="pt").to(device)
outputs = model(**inputs)
post_processed_output = image_processor.post_process_depth_estimation(
outputs, target_sizes=[(image.height, image.width)],
)
depth_raw = post_processed_output[0]["predicted_depth"]
depth_min = depth_raw.min().item()
depth_max = depth_raw.max().item()
inverse_depth = 1 / depth_raw
normalized_inverse_depth = (inverse_depth - inverse_depth.min()) / (inverse_depth.max() - inverse_depth.min())
normalized_inverse_depth = normalized_inverse_depth * 255.0
normalized_inverse_depth = normalized_inverse_depth.detach().cpu().numpy()
normalized_inverse_depth = PIL.Image.fromarray(normalized_inverse_depth.astype("uint8"))
# For grayscale, we can either use the normalized_inverse_depth directly as grayscale
# or apply the gray colormap - using colormap for consistency with original code
colored_inverse_depth = PIL.Image.fromarray(
(cmap(np.array(normalized_inverse_depth))[:, :, :3] * 255).astype(np.uint8)
)
field_of_view = post_processed_output[0]["field_of_view"].item()
focal_length = post_processed_output[0]["focal_length"].item()
return (
(image, colored_inverse_depth),
f"{field_of_view:.2f}",
f"{focal_length:.2f}",
f"{depth_min:.2f}",
f"{depth_max:.2f}",
)
with gr.Blocks(css="style.css") as demo:
gr.Markdown("# DepthPro")
with gr.Row():
with gr.Column():
input_image = gr.Image(type="pil")
run_button = gr.Button()
with gr.Column():
output_image = ImageSlider()
with gr.Row():
output_field_of_view = gr.Textbox(label="Field of View")
output_focal_length = gr.Textbox(label="Focal Length")
output_depth_min = gr.Textbox(label="Depth Min")
output_depth_max = gr.Textbox(label="Depth Max")
gr.Examples(
examples=sorted(pathlib.Path("images").glob("*.jpg")),
inputs=input_image,
fn=run,
outputs=[
output_image,
output_field_of_view,
output_focal_length,
output_depth_min,
output_depth_max,
],
)
run_button.click(
fn=run,
inputs=input_image,
outputs=[
output_image,
output_field_of_view,
output_focal_length,
output_depth_min,
output_depth_max,
],
)
if __name__ == "__main__":
demo.queue().launch() |