import torch from torchvision import transforms from PIL import Image import numpy as np import gradio as gr from torch import optim import torchvision device = "cuda" if torch.cuda.is_available() else "cpu" def create_vgg_model(): model_weights = torchvision.models.VGG19_Weights.DEFAULT model = torchvision.models.vgg19(weights=model_weights) for param in model.parameters(): param.requires_grad = False model = model.features return model def preprocess(img): image = Image.fromarray(img).convert('RGB') imsize = 196 transform = transforms.Compose([ transforms.Resize((imsize, imsize)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) image = transform(image) image = image.unsqueeze(dim=0) return image def deprocess(image): image = image.clone() image = image.squeeze(0) image = image.permute(1, 2, 0) image = image.cpu().detach().numpy() image = image * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]) image = image.clip(0, 1) return image def get_features(image, model): features = {} layers = { '0': 'layer_1', '5': 'layer_2', '10': 'layer_3', '19': 'layer_4', '28': 'layer_5' } x = image for name, layer in model._modules.items(): x = layer(x) if name in layers: features[layers[name]] = x return features def gram_matrix(image): b, c, h, w = image.size() image = image.view(c, h * w) gram = torch.mm(image, image.t()) return gram def content_loss(target, content): return torch.mean((target - content) ** 2) def style_loss(target_features, style_grams): loss = 0 for layer in target_features: target_gram = gram_matrix(target_features[layer]) style_gram = style_grams[layer] layer_style_loss = torch.mean((target_gram - style_gram) ** 2) loss += layer_style_loss return loss def total_loss(content_loss, style_loss, alpha, beta): return alpha * content_loss + beta * style_loss def predict(content_image, style_image): model = create_vgg_model().to(device).eval() content_img = preprocess(content_image).to(device) style_img = preprocess(style_image).to(device) target_img = content_img.clone().requires_grad_(True) content_features = get_features(content_img, model) style_features = get_features(style_img, model) style_gram = {layer: gram_matrix(style_features[layer]) for layer in style_features} optimizer = optim.Adam([target_img], lr=0.06) alpha_param = 1 beta_param = 1e2 epochs = 60 for i in range(epochs): target_features = get_features(target_img, model) c_loss = content_loss(target_features['layer_4'], content_features['layer_4']) s_loss = style_loss(target_features, style_gram) t_loss = total_loss(c_loss, s_loss, alpha_param, beta_param) optimizer.zero_grad() t_loss.backward() optimizer.step() results = deprocess(target_img) return Image.fromarray((results * 255).astype(np.uint8)) title = "Neural Style Transfer 🎨" demo = gr.Interface(fn=predict, inputs=['image', 'image'], outputs=gr.Image(), title=title) demo.launch(debug=False, share=False)