import cv2 import gradio as gr import os from PIL import Image import numpy as np import torch from torch.autograd import Variable from torchvision import transforms import torch.nn.functional as F import matplotlib.pyplot as plt import warnings import time warnings.filterwarnings("ignore") # Clone the DIS repo and move contents (ensure this runs once per session) os.system("git clone https://github.com/xuebinqin/DIS") os.system("mv DIS/IS-Net/* .") # project imports from data_loader_cache import normalize, im_reader, im_preprocess from models import * device = 'cuda' if torch.cuda.is_available() else 'cpu' # Download official weights if not already present if not os.path.exists("saved_models"): os.mkdir("saved_models") os.system("mv isnet.pth saved_models/") class GOSNormalize(object): """ Normalize the Image using torch.transforms. """ def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]): self.mean = mean self.std = std def __call__(self, image): return normalize(image, self.mean, self.std) transform = transforms.Compose([GOSNormalize([0.5, 0.5, 0.5], [1.0, 1.0, 1.0])]) def load_image(im_path, hypar): im = im_reader(im_path) im, im_shp = im_preprocess(im, hypar["cache_size"]) im = torch.divide(im, 255.0) shape = torch.from_numpy(np.array(im_shp)) return transform(im).unsqueeze(0), shape.unsqueeze(0) def build_model(hypar, device): net = hypar["model"] if hypar["model_digit"] == "half": net.half() for layer in net.modules(): if isinstance(layer, torch.nn.BatchNorm2d): layer.float() net.to(device) if hypar["restore_model"] != "": net.load_state_dict(torch.load(os.path.join(hypar["model_path"], hypar["restore_model"]), map_location=device)) net.to(device) net.eval() return net def predict(net, inputs_val, shapes_val, hypar, device): net.eval() if hypar["model_digit"] == "full": inputs_val = inputs_val.type(torch.FloatTensor) else: inputs_val = inputs_val.type(torch.HalfTensor) inputs_val_v = Variable(inputs_val, requires_grad=False).to(device) ds_val = net(inputs_val_v)[0] pred_val = ds_val[0][0, :, :, :] pred_val = torch.squeeze(F.upsample(torch.unsqueeze(pred_val, 0), (shapes_val[0][0], shapes_val[0][1]), mode='bilinear')) ma = torch.max(pred_val) mi = torch.min(pred_val) pred_val = (pred_val - mi) / (ma - mi + 1e-8) if device == 'cuda': torch.cuda.empty_cache() return (pred_val.detach().cpu().numpy() * 255).astype(np.uint8) # Parameters hypar = { "model_path": "./saved_models", "restore_model": "isnet.pth", "interm_sup": False, "model_digit": "full", "seed": 0, "cache_size": [1024, 1024], "input_size": [1024, 1024], "crop_size": [1024, 1024], "model": ISNetDIS() } net = build_model(hypar, device) def inference(file_paths, logs): """ Process up to 3 images uploaded via the file uploader. Only the image with background removed is returned. """ start_time = time.time() logs = logs or "" if not file_paths: logs += "No images to process.\n" return [], logs, logs # Limit to a maximum of 3 images image_paths = file_paths[:3] processed_images = [] for path in image_paths: image_tensor, orig_size = load_image(path, hypar) mask = predict(net, image_tensor, orig_size, hypar, device) pil_mask = Image.fromarray(mask).convert('L') im_rgb = Image.open(path).convert("RGB") im_rgba = im_rgb.copy() im_rgba.putalpha(pil_mask) processed_images.append(im_rgba) elapsed = round(time.time() - start_time, 2) logs += f"Processed {len(processed_images)} image(s) in {elapsed} second(s).\n" return processed_images, logs, logs title = "Highly Accurate Dichotomous Image Segmentation" description = ( "This is an unofficial demo for DIS, a model that removes the background from images. " "Upload up to 3 images at once using the file uploader below. " "GitHub: https://github.com/xuebinqin/DIS
" "Telegram bot: https://t.me/restoration_photo_bot
" "[![](https://img.shields.io/twitter/follow/DoEvent?label=@DoEvent&style=social)](https://twitter.com/DoEvent)" ) article = ( "
" ) interface = gr.Interface( fn=inference, inputs=[ gr.File(file_count="multiple", type="filepath", label="Upload Images (up to 3)"), gr.State() ], outputs=[ gr.Gallery(label="Output (Background Removed)"), gr.State(), gr.Textbox(label="Logs", lines=6) ], examples=[ [["robot.png"], None], [["robot.png", "ship.png"], None], ], title=title, description=description, article=article, flagging_mode="never", cache_mode="lazy" ).queue().launch(show_api=True, show_error=True)