import numpy as np
import torch
import cv2 as cv
import random
import os
import spaces
import gradio as gr

from transformers import pipeline
from controlnet_aux import MLSDdetector, HEDdetector, NormalBaeDetector, LineartDetector
from peft import PeftModel, LoraConfig
from diffusers import (
    DiffusionPipeline,
    StableDiffusionPipeline,
    StableDiffusionControlNetPipeline,
    StableDiffusionControlNetImg2ImgPipeline,
    DPMSolverMultistepScheduler,
    PNDMScheduler,
    ControlNetModel
)
from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import rescale_noise_cfg, retrieve_timesteps
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.utils.torch_utils import randn_tensor
from diffusers.utils import load_image, make_image_grid

device = "cuda" if torch.cuda.is_available() else "cpu"

if torch.cuda.is_available():
    torch_dtype = torch.float16
else:
    torch_dtype = torch.float32

default_model = 'CompVis/stable-diffusion-v1-4'
LoRA_path = 'new_model'

CONTROLNET_MODE = {
    "Canny Edge Detection" : "lllyasviel/control_v11p_sd15_canny",
    "Pixel to Pixel": "lllyasviel/control_v11e_sd15_ip2p",
    "HED edge detection (soft edge)" : "lllyasviel/control_sd15_hed",
    "Midas depth estimation" : "lllyasviel/control_v11f1p_sd15_depth",
    "Surface Normal Estimation" : "lllyasviel/control_v11p_sd15_normalbae",
    "Scribble-Based Generation" : "lllyasviel/control_v11p_sd15_scribble",
    "Line Art Generation": "lllyasviel/control_v11p_sd15_lineart",
}

def get_pipe(
    model_id,
    use_controlnet,
    controlnet_mode,
    use_ip_adapter
):

    if use_controlnet and use_ip_adapter:
        
        print('Pipe with ControlNet and IPAdapter')

        controlnet = ControlNetModel.from_pretrained(
            CONTROLNET_MODE[controlnet_mode],
            cache_dir="./models_cache",
            torch_dtype=torch.float16
        )

        pipe = StableDiffusionControlNetPipeline.from_pretrained(
            model_id if model_id!='Maria_Lashina_LoRA' else default_model, 
            torch_dtype=torch_dtype, 
            controlnet=use_controlnet,
            safety_checker=None,
        ).to(device)

        pipe.load_ip_adapter(
            "h94/IP-Adapter",
            subfolder="models",
            weight_name="ip-adapter-plus_sd14.bin",
        )

    elif controlnet:

        print('Pipe with ControlNet')

        controlnet = ControlNetModel.from_pretrained(
            CONTROLNET_MODE[controlnet_mode],
            cache_dir="./models_cache",
            torch_dtype=torch.float16)
            
        pipe = StableDiffusionControlNetPipeline.from_pretrained(
            model_id if model_id!='Maria_Lashina_LoRA' else default_model, 
            torch_dtype=torch_dtype, 
            controlnet=use_controlnet,
            safety_checker=None,
        ).to(device)

    elif ip_adapter:

        print('Pipe with IpAdapter')

        pipe = StableDiffusionPipeline.from_pretrained(
            model_id if model_id!='Maria_Lashina_LoRA' else default_model, 
            torch_dtype=torch_dtype,
            safety_checker=None,
        ).to(device)
        
        pipe.load_ip_adapter(
            "h94/IP-Adapter",
            subfolder="models",
            weight_name="ip-adapter-plus_sd14.bin")

    else:

        print('Pipe with only SD')

        pipe = StableDiffusionPipeline.from_pretrained(
            model_id if model_id!='Maria_Lashina_LoRA' else default_model, 
            torch_dtype=torch_dtype,
            safety_checker=None,
        ).to(device)


    if model_id == 'Maria_Lashina_LoRA':
        adapter_name = 'a cartoonish mouse'
        unet_sub_dir = os.path.join(LoRA_path, "unet")
        text_encoder_sub_dir = os.path.join(LoRA_path, "text_encoder")
        
        pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)

        pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name)

        if torch_dtype == torch.float16:
            pipe.unet.half()
            pipe.text_encoder.half()
    
    return pipe

def prepare_controlnet_image(controlnet_image, mode):
    if mode == "Canny Edge Detection":
        image = cv.Canny(controlnet_image, 80, 160)
        image = np.repeat(image[:, :, None], 3, axis=2)
        image = Image.fromarray(image)

    elif mode == "HED edge detection (soft edge)":
        processor = HEDdetector.from_pretrained('lllyasviel/Annotators')
        image = processor(controlnet_image)

    elif mode == "Midas depth estimation":
        depth_estimator = pipeline('depth-estimation')
        image = depth_estimator(controlnet_image)['depth']
        image = np.array(image)
        image = image[:, :, None]
        image = np.concatenate([image, image, image], axis=2)
        image = Image.fromarray(image)

    elif mode == "Surface Normal Estimation":
        processor = NormalBaeDetector.from_pretrained("lllyasviel/Annotators")
        image = processor(controlnet_image)

    elif mode == "Scribble-Based Generation":
        processor = HEDdetector.from_pretrained('lllyasviel/Annotators')
        image = processor(controlnet_image, scribble=True)

    elif mode == "Line Art Generation":
        processor = LineartDetector.from_pretrained("lllyasviel/Annotators")
        image = processor(controlnet_image)

    else:
        image = controlnet_image

# @spaces.GPU #[uncomment to use ZeroGPU]
def infer(
    model_id,
    prompt,
    negative_prompt,
    seed,
    randomize_seed,
    width,
    height,
    guidance_scale,
    num_inference_steps,
    use_controlnet,
    controlnet_strength,
    controlnet_mode,
    controlnet_image,
    use_ip_adapter,
    ip_adapter_scale,
    ip_adapter_image,
    progress=gr.Progress(track_tqdm=True),
):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    generator = torch.Generator().manual_seed(seed)

    if not use_controlnet and not use_ip_adapter:

        pipe = get_pipe(model_id, use_controlnet, controlnet_mode, use_ip_adapter)

        image = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            guidance_scale=guidance_scale,
            num_inference_steps=num_inference_steps,
            width=width,
            height=height,
            generator=generator
        ).images[0]

    elif use_controlnet and not use_ip_adapter:
        
        cn_image = prepare_controlnet_image(controlnet_image, controlnet_mode)

        pipe = get_pipe(model_id, use_controlnet, controlnet_mode, use_ip_adapter)
        
        image = pipe(
            prompt,
            cn_image,
            negative_prompt=negative_prompt,
            num_inference_steps = num_inference_steps,
            controlnet_conditioning_scale=control_strength,
            generator=generator
        ).images[0]

    elif not use_controlnet and use_ip_adapter:

        pipe = get_pipe(model_id, use_controlnet, controlnet_mode, use_ip_adapter)

        pipe.set_ip_adapter_scale(ip_adapter_scale)

        image = pipe(
            prompt,
            num_inference_steps=num_inference_steps,
            guidance_scale=guidance_scale,
            ip_adapter_image=ip_adapter_image,
            generator=generator
        ).images[0]

    elif use_controlnet and use_ip_adapter:

        cn_image = prepare_controlnet_image(controlnet_image, controlnet_mode)

        pipe = get_pipe(model_id, use_controlnet, controlnet_mode, use_ip_adapter)
        
        pipe.set_ip_adapter_scale(ip_adapter_scale)

        image = pipe(
            prompt,
            cn_image,
            num_inference_steps=num_inference_steps,
            guidance_scale=guidance_scale,
            height=height,
            width=width,
            controlnet_conditioning_scale=control_strength,
            ip_adapter_image=image_upload_ip,
            generator=generator,
        ).images[0]

    return image, seed