import gradio as gr
import torch
import numpy as np
import cv2
from PIL import Image
import matplotlib.pyplot as plt
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
from io import BytesIO

# Load models
image_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")

def process_image(image, total_degrade_steps=15):
    # Convert to PIL if needed
    if not isinstance(image, Image.Image):
        image = Image.fromarray(image)

    # Standardize size to 512x512
    image = image.resize((512, 512), Image.LANCZOS)
    # Prepare image for the model
    inputs = image_processor(images=image.convert('RGB'), return_tensors="pt")
    
    with torch.no_grad():
        outputs = model(**inputs)
        predicted_depth = outputs.predicted_depth
    
    # Interpolate to original size
    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=image.size[::-1],
        mode="bicubic",
        align_corners=False,
    )
    print(f'total_degrade_steps {total_degrade_steps}')
    # Normalize depth map to [0, 1]
    normalized_depth = (prediction - prediction.min()) / (prediction.max() - prediction.min())
    normalized_depth = normalized_depth.squeeze().detach().cpu().numpy()
    
    # Convert original image to numpy array
    image_np = np.array(image)
    
    # Create a visualization of the depth map
    depth_visualization = (normalized_depth * 255).astype(np.uint8)
    depth_image = Image.fromarray(depth_visualization)
    
    # Create a copy of the original image to store the result
    result = np.copy(image_np)
    
    # Apply variable blur based on depth
    for i in range(total_degrade_steps):
        sigma = i * 2 + 1
        print(f'sigma: {sigma}')

        interval = 0.9 / total_degrade_steps
        closer = 0.9 - (i * interval)
        further = 0.9 - ((i + 1) * interval)
        
        mask = (normalized_depth > further) & (normalized_depth <= closer)
        print(f'closer: {closer}, further: {further}')
        
        if np.any(mask):
            try:            # Apply Gaussian blur with current kernel size
                blurred = cv2.GaussianBlur(image_np, (sigma, sigma), 0)
                
                # # Copy blurred pixels to the result where mask is True
                # mask_3d = np.stack([mask, mask, mask], axis=2) if len(image_np.shape) == 3 else mask
                # result = np.where(mask_3d, blurred, result)

                mask_3d = np.stack([mask, mask, mask], axis=2)
                result[mask_3d] = blurred[mask_3d]
            except Exception as e:
                print(f"Error applying blur with kernel size {sigma}: {e}")
                continue
    
    # Convert result back to PIL Image
    result_image = Image.fromarray(result.astype(np.uint8))
    print(f'result_image size {result_image.size}')
    
    # # Create side-by-side comparison
    # combined_width = image.width * 2
    # combined_height = image.height
    
    # combined_image = Image.new('RGB', (combined_width, combined_height))
    # combined_image.paste(image, (0, 0))
    # combined_image.paste(result_image, (image.width, 0))


    return depth_image, result_image

# Create Gradio interface
with gr.Blocks(title="Depth-Based Blur Effect") as demo:
    gr.Markdown("# Depth-Based Blur Effect")
    gr.Markdown("This app applies variable Gaussian blur to images based on depth estimation. Objects farther from the camera appear more blurred, while closer objects remain sharper.")
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="pil", label="Upload Image")         
            total_steps = gr.Slider(minimum=5, maximum=20, value=15, step=1, label="Total Blur Levels")
            # show_depth = gr.Checkbox(value=True, label="Show Depth Map")
            submit_btn = gr.Button("Apply Depth-Based Blur")
        
        with gr.Column():
            depth_map = gr.Image(type="pil", label="Depth Map")  # Added format="png"
            output_image = gr.Image(type="numpy", label="Result (Original | Blurred)") 
    
    submit_btn.click(
        process_image,
        inputs=[input_image, total_steps],
        outputs=[depth_map, output_image]
    )
    
    gr.Examples(
        examples=[
            ["assets/sample.jpg"],
        ],
        inputs=input_image
    )

    gr.Markdown("""
    ## How it works
    
    1. The app uses the Depth-Anything-V2-Small model to estimate depth in the image
    2. Depth values are normalized to a range of 0-1
    3. A variable Gaussian blur is applied based on depth values
    4. Objects farther from the camera (higher depth values) receive stronger blur
    5. Objects closer to the camera (lower depth values) remain sharper
    
    This creates a realistic depth-of-field effect similar to what's seen in photography.
    """)

# Launch the app
demo.launch()