import gradio as gr from PIL import Image from torchvision.transforms import Compose, ToTensor, Resize, Normalize import numpy as np import imageio import tempfile from utils.utils import denorm from model.hub import MultiInputResShiftHub import torch device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = MultiInputResShiftHub.from_pretrained("vfontech/Multiple-Input-Resshift-VFI") model.requires_grad_(False).to(device).eval() transform = Compose([ Resize((256, 448)), ToTensor(), Normalize(mean=[0.5]*3, std=[0.5]*3), ]) def to_numpy(img_tensor: torch.Tensor) -> np.ndarray: img_np = denorm(img_tensor, mean=[0.5]*3, std=[0.5]*3).squeeze().permute(1, 2, 0).cpu().numpy() img_np = np.clip(img_np, 0, 1) return (img_np * 255).astype(np.uint8) def interpolate(img0_pil: Image.Image, img2_pil: Image.Image, tau: float=0.5, num_samples: int=1) -> tuple: img0 = transform(img0_pil.convert("RGB")).unsqueeze(0).to(device) img2 = transform(img2_pil.convert("RGB")).unsqueeze(0).to(device) try: if num_samples == 1: # Unique image img1 = model.reverse_process([img0, img2], tau) return Image.fromarray(to_numpy(img1)), None else: # Múltiples imágenes → video frames = [to_numpy(img0)] for t in np.linspace(0, 1, num_samples): img = model.reverse_process([img0, img2], float(t)) frames.append(to_numpy(img)) frames.append(to_numpy(img2)) temp_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name imageio.mimsave(temp_path, frames, fps=8) return None, temp_path except Exception as e: print(f"Error during interpolation: {e}") return None, None def build_demo() -> gr.Blocks: header = """
Efficient and stochastic video frame interpolation for hand-drawn animation.
Usage:
Number of Samples = 1
, generates a single interpolated frame using Tau.Number of Samples > 1
, Tau is ignored and a full interpolation sequence is generated.