|
import torch |
|
from torchvision import transforms |
|
from PIL import Image |
|
import numpy as np |
|
import gradio as gr |
|
from torch import optim |
|
import torchvision |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
def create_vgg_model(): |
|
model_weights = torchvision.models.VGG19_Weights.DEFAULT |
|
model = torchvision.models.vgg19(weights=model_weights) |
|
for param in model.parameters(): |
|
param.requires_grad = False |
|
model = model.features |
|
return model |
|
|
|
def preprocess(img): |
|
image = Image.fromarray(img).convert('RGB') |
|
imsize = 196 |
|
transform = transforms.Compose([ |
|
transforms.Resize((imsize, imsize)), |
|
transforms.ToTensor(), |
|
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) |
|
]) |
|
image = transform(image) |
|
image = image.unsqueeze(dim=0) |
|
return image |
|
|
|
def deprocess(image): |
|
image = image.clone() |
|
image = image.squeeze(0) |
|
image = image.permute(1, 2, 0) |
|
image = image.cpu().detach().numpy() |
|
image = image * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]) |
|
image = image.clip(0, 1) |
|
return image |
|
|
|
def get_features(image, model): |
|
features = {} |
|
layers = { |
|
'0': 'layer_1', |
|
'5': 'layer_2', |
|
'10': 'layer_3', |
|
'19': 'layer_4', |
|
'28': 'layer_5' |
|
} |
|
x = image |
|
for name, layer in model._modules.items(): |
|
x = layer(x) |
|
if name in layers: |
|
features[layers[name]] = x |
|
return features |
|
|
|
def gram_matrix(image): |
|
b, c, h, w = image.size() |
|
image = image.view(c, h * w) |
|
gram = torch.mm(image, image.t()) |
|
return gram |
|
|
|
def content_loss(target, content): |
|
return torch.mean((target - content) ** 2) |
|
|
|
def style_loss(target_features, style_grams): |
|
loss = 0 |
|
for layer in target_features: |
|
target_gram = gram_matrix(target_features[layer]) |
|
style_gram = style_grams[layer] |
|
layer_style_loss = torch.mean((target_gram - style_gram) ** 2) |
|
loss += layer_style_loss |
|
return loss |
|
|
|
def total_loss(content_loss, style_loss, alpha, beta): |
|
return alpha * content_loss + beta * style_loss |
|
|
|
def predict(content_image, style_image): |
|
model = create_vgg_model().to(device).eval() |
|
content_img = preprocess(content_image).to(device) |
|
style_img = preprocess(style_image).to(device) |
|
target_img = content_img.clone().requires_grad_(True) |
|
content_features = get_features(content_img, model) |
|
style_features = get_features(style_img, model) |
|
style_gram = {layer: gram_matrix(style_features[layer]) for layer in style_features} |
|
optimizer = optim.Adam([target_img], lr=0.06) |
|
alpha_param = 1 |
|
beta_param = 1e2 |
|
epochs = 60 |
|
for i in range(epochs): |
|
target_features = get_features(target_img, model) |
|
c_loss = content_loss(target_features['layer_4'], content_features['layer_4']) |
|
s_loss = style_loss(target_features, style_gram) |
|
t_loss = total_loss(c_loss, s_loss, alpha_param, beta_param) |
|
optimizer.zero_grad() |
|
t_loss.backward() |
|
optimizer.step() |
|
results = deprocess(target_img) |
|
return Image.fromarray((results * 255).astype(np.uint8)) |
|
|
|
title = "Neural Style Transfer π¨" |
|
|
|
demo = gr.Interface(fn=predict, |
|
inputs=['image', 'image'], |
|
outputs=gr.Image(), |
|
title=title) |
|
|
|
demo.launch(debug=False, share=False) |
|
|
|
|