import cv2
import gradio as gr
import os
from PIL import Image
import numpy as np
import torch
from torch.autograd import Variable
from torchvision import transforms
import torch.nn.functional as F
import matplotlib.pyplot as plt
import warnings
import time
warnings.filterwarnings("ignore")

# Clone the DIS repo and move contents (ensure this runs once per session)
os.system("git clone https://github.com/xuebinqin/DIS")
os.system("mv DIS/IS-Net/* .")

# project imports
from data_loader_cache import normalize, im_reader, im_preprocess 
from models import *

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Download official weights if not already present
if not os.path.exists("saved_models"):
    os.mkdir("saved_models")
    os.system("mv isnet.pth saved_models/")

class GOSNormalize(object):
    """
    Normalize the Image using torch.transforms.
    """
    def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
        self.mean = mean
        self.std = std

    def __call__(self, image):
        return normalize(image, self.mean, self.std)

transform = transforms.Compose([GOSNormalize([0.5, 0.5, 0.5], [1.0, 1.0, 1.0])])

def load_image(im_path, hypar):
    im = im_reader(im_path)
    im, im_shp = im_preprocess(im, hypar["cache_size"])
    im = torch.divide(im, 255.0)
    shape = torch.from_numpy(np.array(im_shp))
    return transform(im).unsqueeze(0), shape.unsqueeze(0)

def build_model(hypar, device):
    net = hypar["model"]
    if hypar["model_digit"] == "half":
        net.half()
        for layer in net.modules():
            if isinstance(layer, torch.nn.BatchNorm2d):
                layer.float()
    net.to(device)
    if hypar["restore_model"] != "":
        net.load_state_dict(torch.load(os.path.join(hypar["model_path"], hypar["restore_model"]), map_location=device))
        net.to(device)
    net.eval()
    return net

def predict(net, inputs_val, shapes_val, hypar, device):
    net.eval()
    if hypar["model_digit"] == "full":
        inputs_val = inputs_val.type(torch.FloatTensor)
    else:
        inputs_val = inputs_val.type(torch.HalfTensor)
    inputs_val_v = Variable(inputs_val, requires_grad=False).to(device)
    ds_val = net(inputs_val_v)[0]
    pred_val = ds_val[0][0, :, :, :]
    pred_val = torch.squeeze(F.upsample(torch.unsqueeze(pred_val, 0),
                                          (shapes_val[0][0], shapes_val[0][1]),
                                          mode='bilinear'))
    ma = torch.max(pred_val)
    mi = torch.min(pred_val)
    pred_val = (pred_val - mi) / (ma - mi + 1e-8)
    if device == 'cuda':
        torch.cuda.empty_cache()
    return (pred_val.detach().cpu().numpy() * 255).astype(np.uint8)

# Parameters
hypar = {
    "model_path": "./saved_models",
    "restore_model": "isnet.pth",
    "interm_sup": False,
    "model_digit": "full",
    "seed": 0,
    "cache_size": [1024, 1024],
    "input_size": [1024, 1024],
    "crop_size": [1024, 1024],
    "model": ISNetDIS()
}

net = build_model(hypar, device)

def inference(file_paths, logs):
    """
    Process up to 3 images uploaded via the file uploader.
    Only the image with background removed is returned.
    """
    start_time = time.time()
    logs = logs or ""
    if not file_paths:
        logs += "No images to process.\n"
        return [], logs, logs

    # Limit to a maximum of 3 images
    image_paths = file_paths[:3]
    processed_images = []
    for path in image_paths:
        image_tensor, orig_size = load_image(path, hypar)
        mask = predict(net, image_tensor, orig_size, hypar, device)
        pil_mask = Image.fromarray(mask).convert('L')
        im_rgb = Image.open(path).convert("RGB")
        im_rgba = im_rgb.copy()
        im_rgba.putalpha(pil_mask)
        processed_images.append(im_rgba)

    elapsed = round(time.time() - start_time, 2)
    logs += f"Processed {len(processed_images)} image(s) in {elapsed} second(s).\n"
    return processed_images, logs, logs

title = "Highly Accurate Dichotomous Image Segmentation"
description = (
    "This is an unofficial demo for DIS, a model that removes the background from images. "
    "Upload up to 3 images at once using the file uploader below. "
    "GitHub: https://github.com/xuebinqin/DIS<br>"
    "Telegram bot: https://t.me/restoration_photo_bot<br>"
    "[![](https://img.shields.io/twitter/follow/DoEvent?label=@DoEvent&style=social)](https://twitter.com/DoEvent)"
)
article = (
    "<div><center><img src='https://visitor-badge.glitch.me/badge?page_id=max_skobeev_dis_cmp_public' "
    "alt='visitor badge'></center></div>"
)

interface = gr.Interface(
    fn=inference,
    inputs=[
        gr.File(file_count="multiple", type="filepath", label="Upload Images (up to 3)"),
        gr.State()
    ],
    outputs=[
        gr.Gallery(label="Output (Background Removed)"),
        gr.State(),
        gr.Textbox(label="Logs", lines=6)
    ],
    examples=[
        [["robot.png"], None],
        [["robot.png", "ship.png"], None],
    ],
    title=title,
    description=description,
    article=article,
    flagging_mode="never",
    cache_mode="lazy"
).queue().launch(show_api=True, show_error=True)