import numpy as np import torch import cv2 as cv import random import os import spaces import gradio as gr from transformers import pipeline from controlnet_aux import MLSDdetector, HEDdetector, NormalBaeDetector, LineartDetector from peft import PeftModel, LoraConfig from diffusers import ( DiffusionPipeline, StableDiffusionPipeline, StableDiffusionControlNetPipeline, StableDiffusionControlNetImg2ImgPipeline, DPMSolverMultistepScheduler, PNDMScheduler, ControlNetModel ) from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import rescale_noise_cfg, retrieve_timesteps from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.utils.torch_utils import randn_tensor from diffusers.utils import load_image, make_image_grid device = "cuda" if torch.cuda.is_available() else "cpu" if torch.cuda.is_available(): torch_dtype = torch.float16 else: torch_dtype = torch.float32 default_model = 'CompVis/stable-diffusion-v1-4' LoRA_path = 'new_model' CONTROLNET_MODE = { "Canny Edge Detection" : "lllyasviel/control_v11p_sd15_canny", "Pixel to Pixel": "lllyasviel/control_v11e_sd15_ip2p", "HED edge detection (soft edge)" : "lllyasviel/control_sd15_hed", "Midas depth estimation" : "lllyasviel/control_v11f1p_sd15_depth", "Surface Normal Estimation" : "lllyasviel/control_v11p_sd15_normalbae", "Scribble-Based Generation" : "lllyasviel/control_v11p_sd15_scribble", "Line Art Generation": "lllyasviel/control_v11p_sd15_lineart", } def get_pipe( model_id, use_controlnet, controlnet_mode, use_ip_adapter ): if use_controlnet and use_ip_adapter: print('Pipe with ControlNet and IPAdapter') controlnet = ControlNetModel.from_pretrained( CONTROLNET_MODE[controlnet_mode], cache_dir="./models_cache", torch_dtype=torch.float16 ) pipe = StableDiffusionControlNetPipeline.from_pretrained( model_id if model_id!='Maria_Lashina_LoRA' else default_model, torch_dtype=torch_dtype, controlnet=use_controlnet, safety_checker=None, ).to(device) pipe.load_ip_adapter( "h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-plus_sd14.bin", ) elif controlnet: print('Pipe with ControlNet') controlnet = ControlNetModel.from_pretrained( CONTROLNET_MODE[controlnet_mode], cache_dir="./models_cache", torch_dtype=torch.float16) pipe = StableDiffusionControlNetPipeline.from_pretrained( model_id if model_id!='Maria_Lashina_LoRA' else default_model, torch_dtype=torch_dtype, controlnet=use_controlnet, safety_checker=None, ).to(device) elif ip_adapter: print('Pipe with IpAdapter') pipe = StableDiffusionPipeline.from_pretrained( model_id if model_id!='Maria_Lashina_LoRA' else default_model, torch_dtype=torch_dtype, safety_checker=None, ).to(device) pipe.load_ip_adapter( "h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-plus_sd14.bin") else: print('Pipe with only SD') pipe = StableDiffusionPipeline.from_pretrained( model_id if model_id!='Maria_Lashina_LoRA' else default_model, torch_dtype=torch_dtype, safety_checker=None, ).to(device) if model_id == 'Maria_Lashina_LoRA': adapter_name = 'a cartoonish mouse' unet_sub_dir = os.path.join(LoRA_path, "unet") text_encoder_sub_dir = os.path.join(LoRA_path, "text_encoder") pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name) pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name) if torch_dtype == torch.float16: pipe.unet.half() pipe.text_encoder.half() return pipe def prepare_controlnet_image(controlnet_image, mode): if mode == "Canny Edge Detection": image = cv.Canny(controlnet_image, 80, 160) image = np.repeat(image[:, :, None], 3, axis=2) image = Image.fromarray(image) elif mode == "HED edge detection (soft edge)": processor = HEDdetector.from_pretrained('lllyasviel/Annotators') image = processor(controlnet_image) elif mode == "Midas depth estimation": depth_estimator = pipeline('depth-estimation') image = depth_estimator(controlnet_image)['depth'] image = np.array(image) image = image[:, :, None] image = np.concatenate([image, image, image], axis=2) image = Image.fromarray(image) elif mode == "Surface Normal Estimation": processor = NormalBaeDetector.from_pretrained("lllyasviel/Annotators") image = processor(controlnet_image) elif mode == "Scribble-Based Generation": processor = HEDdetector.from_pretrained('lllyasviel/Annotators') image = processor(controlnet_image, scribble=True) elif mode == "Line Art Generation": processor = LineartDetector.from_pretrained("lllyasviel/Annotators") image = processor(controlnet_image) else: image = controlnet_image # @spaces.GPU #[uncomment to use ZeroGPU] def infer( model_id, prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, use_controlnet, controlnet_strength, controlnet_mode, controlnet_image, use_ip_adapter, ip_adapter_scale, ip_adapter_image, progress=gr.Progress(track_tqdm=True), ): if randomize_seed: seed = random.randint(0, MAX_SEED) generator = torch.Generator().manual_seed(seed) if not use_controlnet and not use_ip_adapter: pipe = get_pipe(model_id, use_controlnet, controlnet_mode, use_ip_adapter) image = pipe( prompt=prompt, negative_prompt=negative_prompt, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, width=width, height=height, generator=generator ).images[0] elif use_controlnet and not use_ip_adapter: cn_image = prepare_controlnet_image(controlnet_image, controlnet_mode) pipe = get_pipe(model_id, use_controlnet, controlnet_mode, use_ip_adapter) image = pipe( prompt, cn_image, negative_prompt=negative_prompt, num_inference_steps = num_inference_steps, controlnet_conditioning_scale=control_strength, generator=generator ).images[0] elif not use_controlnet and use_ip_adapter: pipe = get_pipe(model_id, use_controlnet, controlnet_mode, use_ip_adapter) pipe.set_ip_adapter_scale(ip_adapter_scale) image = pipe( prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, ip_adapter_image=ip_adapter_image, generator=generator ).images[0] elif use_controlnet and use_ip_adapter: cn_image = prepare_controlnet_image(controlnet_image, controlnet_mode) pipe = get_pipe(model_id, use_controlnet, controlnet_mode, use_ip_adapter) pipe.set_ip_adapter_scale(ip_adapter_scale) image = pipe( prompt, cn_image, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, height=height, width=width, controlnet_conditioning_scale=control_strength, ip_adapter_image=image_upload_ip, generator=generator, ).images[0] return image, seed