EEE515_Problem2 / app.py
joeWabbit's picture
Update app.py
411ded6 verified
raw
history blame
3.94 kB
import gradio as gr
import torch
import numpy as np
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
from PIL import Image, ImageFilter
def load_depth_model():
"""
Loads the depth estimation model and processor.
Returns (processor, model, device).
"""
global processor, model, device
if "model" not in globals():
processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2")
model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()
return processor, model, device
def compute_depth_map(image: Image.Image, scale_factor: float) -> np.ndarray:
"""
Computes the depth map for a PIL image.
Inverts the map (i.e. force invert_depth=True) and scales it.
Returns a NumPy array in [0, 1]*scale_factor.
"""
processor, model, device = load_depth_model()
inputs = processor(images=image, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model(**inputs)
predicted_depth = outputs.predicted_depth
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=image.size[::-1], # PIL image size: (width, height)
mode="bicubic",
align_corners=False,
)
depth_min = prediction.min()
depth_max = prediction.max()
depth_vis = (prediction - depth_min) / (depth_max - depth_min + 1e-8)
depth_map = depth_vis.squeeze().cpu().numpy()
# Always invert depth so that near=0 and far=1
depth_map = 1.0 - depth_map
depth_map *= scale_factor
return depth_map
def layered_blur(image: Image.Image, depth_map: np.ndarray, num_layers: int, max_blur: float) -> Image.Image:
"""
Creates multiple blurred versions of 'image' (radii from 0 to max_blur)
and composites them based on the depth map split into num_layers bins.
"""
blur_radii = np.linspace(0, max_blur, num_layers)
blur_versions = [image.filter(ImageFilter.GaussianBlur(r)) for r in blur_radii]
upper_bound = depth_map.max()
thresholds = np.linspace(0, upper_bound, num_layers + 1)
final_image = blur_versions[-1]
for i in range(num_layers - 1, -1, -1):
mask_array = np.logical_and(
depth_map >= thresholds[i],
depth_map < thresholds[i + 1]
).astype(np.uint8) * 255
mask_image = Image.fromarray(mask_array, mode="L")
final_image = Image.composite(blur_versions[i], final_image, mask_image)
return final_image
def process_depth_blur(uploaded_image, max_blur_value, scale_factor, num_layers):
"""
Processes the image with a depth-based blur.
The image is resized to 512x512, its depth is computed (with invert_depth always True),
and a layered blur is applied.
"""
if not isinstance(uploaded_image, Image.Image):
uploaded_image = Image.open(uploaded_image)
image = uploaded_image.convert("RGB").resize((512, 512))
depth_map = compute_depth_map(image, scale_factor)
final_image = layered_blur(image, depth_map, int(num_layers), max_blur_value)
return final_image
with gr.Blocks() as demo:
gr.Markdown("# Depth-Based Lens Blur")
depth_img = gr.Image(type="pil", label="Upload Image")
depth_max_blur = gr.Slider(1.0, 5.0, value=3.0, step=0.1, label="Maximum Blur Radius")
depth_scale = gr.Slider(0.1, 1.0, value=0.5, step=0.1, label="Depth Scale Factor")
depth_layers = gr.Slider(2, 20, value=8, step=1, label="Number of Layers")
depth_out = gr.Image(label="Depth-Based Blurred Image")
depth_button = gr.Button("Process Depth Blur")
depth_button.click(process_depth_blur,
inputs=[depth_img, depth_max_blur, depth_scale, depth_layers],
outputs=depth_out)
if __name__ == "__main__":
demo.launch()