Spaces:
Running
on
Zero
Running
on
Zero
""" | |
Based on: https://github.com/crowsonkb/k-diffusion | |
Copyright (c) 2022 Katherine Crowson | |
Permission is hereby granted, free of charge, to any person obtaining a copy | |
of this software and associated documentation files (the "Software"), to deal | |
in the Software without restriction, including without limitation the rights | |
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
copies of the Software, and to permit persons to whom the Software is | |
furnished to do so, subject to the following conditions: | |
The above copyright notice and this permission notice shall be included in | |
all copies or substantial portions of the Software. | |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
THE SOFTWARE. | |
""" | |
import numpy as np | |
import torch as th | |
from .gaussian_diffusion import GaussianDiffusion, mean_flat | |
class KarrasDenoiser: | |
def __init__(self, sigma_data: float = 0.5): | |
self.sigma_data = sigma_data | |
def get_snr(self, sigmas): | |
return sigmas**-2 | |
def get_sigmas(self, sigmas): | |
return sigmas | |
def get_scalings(self, sigma): | |
c_skip = self.sigma_data**2 / (sigma**2 + self.sigma_data**2) | |
c_out = sigma * self.sigma_data / (sigma**2 + self.sigma_data**2) ** 0.5 | |
c_in = 1 / (sigma**2 + self.sigma_data**2) ** 0.5 | |
return c_skip, c_out, c_in | |
def training_losses(self, model, x_start, sigmas, model_kwargs=None, noise=None): | |
if model_kwargs is None: | |
model_kwargs = {} | |
if noise is None: | |
noise = th.randn_like(x_start) | |
terms = {} | |
dims = x_start.ndim | |
x_t = x_start + noise * append_dims(sigmas, dims) | |
c_skip, c_out, _ = [append_dims(x, dims) for x in self.get_scalings(sigmas)] | |
model_output, denoised = self.denoise(model, x_t, sigmas, **model_kwargs) | |
target = (x_start - c_skip * x_t) / c_out | |
terms["mse"] = mean_flat((model_output - target) ** 2) | |
terms["xs_mse"] = mean_flat((denoised - x_start) ** 2) | |
if "vb" in terms: | |
terms["loss"] = terms["mse"] + terms["vb"] | |
else: | |
terms["loss"] = terms["mse"] | |
return terms | |
def denoise(self, model, x_t, sigmas, **model_kwargs): | |
c_skip, c_out, c_in = [append_dims(x, x_t.ndim) for x in self.get_scalings(sigmas)] | |
rescaled_t = 1000 * 0.25 * th.log(sigmas + 1e-44) | |
model_output = model(c_in * x_t, rescaled_t, **model_kwargs) | |
denoised = c_out * model_output + c_skip * x_t | |
return model_output, denoised | |
class GaussianToKarrasDenoiser: | |
def __init__(self, model, diffusion): | |
from scipy import interpolate | |
self.model = model | |
self.diffusion = diffusion | |
self.alpha_cumprod_to_t = interpolate.interp1d( | |
diffusion.alphas_cumprod, np.arange(0, diffusion.num_timesteps) | |
) | |
def sigma_to_t(self, sigma): | |
alpha_cumprod = 1.0 / (sigma**2 + 1) | |
if alpha_cumprod > self.diffusion.alphas_cumprod[0]: | |
return 0 | |
elif alpha_cumprod <= self.diffusion.alphas_cumprod[-1]: | |
return self.diffusion.num_timesteps - 1 | |
else: | |
return float(self.alpha_cumprod_to_t(alpha_cumprod)) | |
def denoise(self, x_t, sigmas, clip_denoised=True, model_kwargs=None): | |
t = th.tensor( | |
[self.sigma_to_t(sigma) for sigma in sigmas.cpu().numpy()], | |
dtype=th.long, | |
device=sigmas.device, | |
) | |
c_in = append_dims(1.0 / (sigmas**2 + 1) ** 0.5, x_t.ndim) | |
out = self.diffusion.p_mean_variance( | |
self.model, x_t * c_in, t, clip_denoised=clip_denoised, model_kwargs=model_kwargs | |
) | |
return None, out["pred_xstart"] | |
def karras_sample(*args, **kwargs): | |
last = None | |
for x in karras_sample_progressive(*args, **kwargs): | |
last = x["x"] | |
return last | |
def karras_sample_progressive( | |
diffusion, | |
model, | |
shape, | |
steps, | |
clip_denoised=True, | |
progress=False, | |
model_kwargs=None, | |
device=None, | |
sigma_min=0.002, | |
sigma_max=80, # higher for highres? | |
rho=7.0, | |
sampler="heun", | |
s_churn=0.0, | |
s_tmin=0.0, | |
s_tmax=float("inf"), | |
s_noise=1.0, | |
guidance_scale=0.0, | |
): | |
sigmas = get_sigmas_karras(steps, sigma_min, sigma_max, rho, device=device) | |
x_T = th.randn(*shape, device=device) * sigma_max | |
sample_fn = {"heun": sample_heun, "dpm": sample_dpm, "ancestral": sample_euler_ancestral}[ | |
sampler | |
] | |
if sampler != "ancestral": | |
sampler_args = dict(s_churn=s_churn, s_tmin=s_tmin, s_tmax=s_tmax, s_noise=s_noise) | |
else: | |
sampler_args = {} | |
if isinstance(diffusion, KarrasDenoiser): | |
def denoiser(x_t, sigma): | |
_, denoised = diffusion.denoise(model, x_t, sigma, **model_kwargs) | |
if clip_denoised: | |
denoised = denoised.clamp(-1, 1) | |
return denoised | |
elif isinstance(diffusion, GaussianDiffusion): | |
model = GaussianToKarrasDenoiser(model, diffusion) | |
def denoiser(x_t, sigma): | |
_, denoised = model.denoise( | |
x_t, sigma, clip_denoised=clip_denoised, model_kwargs=model_kwargs | |
) | |
return denoised | |
else: | |
raise NotImplementedError | |
if guidance_scale != 0 and guidance_scale != 1: | |
def guided_denoiser(x_t, sigma): | |
x_t = th.cat([x_t, x_t], dim=0) | |
sigma = th.cat([sigma, sigma], dim=0) | |
x_0 = denoiser(x_t, sigma) | |
cond_x_0, uncond_x_0 = th.split(x_0, len(x_0) // 2, dim=0) | |
x_0 = uncond_x_0 + guidance_scale * (cond_x_0 - uncond_x_0) | |
return x_0 | |
else: | |
guided_denoiser = denoiser | |
for obj in sample_fn( | |
guided_denoiser, | |
x_T, | |
sigmas, | |
progress=progress, | |
**sampler_args, | |
): | |
if isinstance(diffusion, GaussianDiffusion): | |
yield diffusion.unscale_out_dict(obj) | |
else: | |
yield obj | |
def get_sigmas_karras(n, sigma_min, sigma_max, rho=7.0, device="cpu"): | |
"""Constructs the noise schedule of Karras et al. (2022).""" | |
ramp = th.linspace(0, 1, n) | |
min_inv_rho = sigma_min ** (1 / rho) | |
max_inv_rho = sigma_max ** (1 / rho) | |
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho | |
return append_zero(sigmas).to(device) | |
def to_d(x, sigma, denoised): | |
"""Converts a denoiser output to a Karras ODE derivative.""" | |
return (x - denoised) / append_dims(sigma, x.ndim) | |
def get_ancestral_step(sigma_from, sigma_to): | |
"""Calculates the noise level (sigma_down) to step down to and the amount | |
of noise to add (sigma_up) when doing an ancestral sampling step.""" | |
sigma_up = (sigma_to**2 * (sigma_from**2 - sigma_to**2) / sigma_from**2) ** 0.5 | |
sigma_down = (sigma_to**2 - sigma_up**2) ** 0.5 | |
return sigma_down, sigma_up | |
def sample_euler_ancestral(model, x, sigmas, progress=False): | |
"""Ancestral sampling with Euler method steps.""" | |
s_in = x.new_ones([x.shape[0]]) | |
indices = range(len(sigmas) - 1) | |
if progress: | |
from tqdm.auto import tqdm | |
indices = tqdm(indices) | |
for i in indices: | |
denoised = model(x, sigmas[i] * s_in) | |
sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1]) | |
yield {"x": x, "i": i, "sigma": sigmas[i], "sigma_hat": sigmas[i], "pred_xstart": denoised} | |
d = to_d(x, sigmas[i], denoised) | |
# Euler method | |
dt = sigma_down - sigmas[i] | |
x = x + d * dt | |
x = x + th.randn_like(x) * sigma_up | |
yield {"x": x, "pred_xstart": x} | |
def sample_heun( | |
denoiser, | |
x, | |
sigmas, | |
progress=False, | |
s_churn=0.0, | |
s_tmin=0.0, | |
s_tmax=float("inf"), | |
s_noise=1.0, | |
): | |
"""Implements Algorithm 2 (Heun steps) from Karras et al. (2022).""" | |
s_in = x.new_ones([x.shape[0]]) | |
indices = range(len(sigmas) - 1) | |
if progress: | |
from tqdm.auto import tqdm | |
indices = tqdm(indices) | |
for i in indices: | |
gamma = ( | |
min(s_churn / (len(sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.0 | |
) | |
eps = th.randn_like(x) * s_noise | |
sigma_hat = sigmas[i] * (gamma + 1) | |
if gamma > 0: | |
x = x + eps * (sigma_hat**2 - sigmas[i] ** 2) ** 0.5 | |
denoised = denoiser(x, sigma_hat * s_in) | |
d = to_d(x, sigma_hat, denoised) | |
yield {"x": x, "i": i, "sigma": sigmas[i], "sigma_hat": sigma_hat, "pred_xstart": denoised} | |
dt = sigmas[i + 1] - sigma_hat | |
if sigmas[i + 1] == 0: | |
# Euler method | |
x = x + d * dt | |
else: | |
# Heun's method | |
x_2 = x + d * dt | |
denoised_2 = denoiser(x_2, sigmas[i + 1] * s_in) | |
d_2 = to_d(x_2, sigmas[i + 1], denoised_2) | |
d_prime = (d + d_2) / 2 | |
x = x + d_prime * dt | |
yield {"x": x, "pred_xstart": denoised} | |
def sample_dpm( | |
denoiser, | |
x, | |
sigmas, | |
progress=False, | |
s_churn=0.0, | |
s_tmin=0.0, | |
s_tmax=float("inf"), | |
s_noise=1.0, | |
): | |
"""A sampler inspired by DPM-Solver-2 and Algorithm 2 from Karras et al. (2022).""" | |
s_in = x.new_ones([x.shape[0]]) | |
indices = range(len(sigmas) - 1) | |
if progress: | |
from tqdm.auto import tqdm | |
indices = tqdm(indices) | |
for i in indices: | |
gamma = ( | |
min(s_churn / (len(sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.0 | |
) | |
eps = th.randn_like(x) * s_noise | |
sigma_hat = sigmas[i] * (gamma + 1) | |
if gamma > 0: | |
x = x + eps * (sigma_hat**2 - sigmas[i] ** 2) ** 0.5 | |
denoised = denoiser(x, sigma_hat * s_in) | |
d = to_d(x, sigma_hat, denoised) | |
yield {"x": x, "i": i, "sigma": sigmas[i], "sigma_hat": sigma_hat, "denoised": denoised} | |
# Midpoint method, where the midpoint is chosen according to a rho=3 Karras schedule | |
sigma_mid = ((sigma_hat ** (1 / 3) + sigmas[i + 1] ** (1 / 3)) / 2) ** 3 | |
dt_1 = sigma_mid - sigma_hat | |
dt_2 = sigmas[i + 1] - sigma_hat | |
x_2 = x + d * dt_1 | |
denoised_2 = denoiser(x_2, sigma_mid * s_in) | |
d_2 = to_d(x_2, sigma_mid, denoised_2) | |
x = x + d_2 * dt_2 | |
yield {"x": x, "pred_xstart": denoised} | |
def append_dims(x, target_dims): | |
"""Appends dimensions to the end of a tensor until it has target_dims dimensions.""" | |
dims_to_append = target_dims - x.ndim | |
if dims_to_append < 0: | |
raise ValueError(f"input has {x.ndim} dims but target_dims is {target_dims}, which is less") | |
return x[(...,) + (None,) * dims_to_append] | |
def append_zero(x): | |
return th.cat([x, x.new_zeros([1])]) | |