Spaces:

dezzman
/

diffusion_models

Running

App Files Files Community

dezzman commited on Feb 8

Commit

7bcd902

verified ·

1 Parent(s): 3f250af

Update app.py

Browse files

Files changed (1) hide show

app.py +128 -188

app.py CHANGED Viewed

@@ -1,18 +1,15 @@
 import gradio as gr
 import numpy as np
 import torch
-from diffusers.utils import load_image, make_image_grid
-from diffusers import (
-    StableDiffusionPipeline,
-    StableDiffusionControlNetPipeline,
-    ControlNetModel
-)
 from peft import PeftModel, LoraConfig
 from controlnet_aux import HEDdetector
 from PIL import Image
 import cv2 as cv
 import os
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
@@ -23,198 +20,141 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model_id_default = "CompVis/stable-diffusion-v1-4"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-hed = None
-dict_controlnet = {
-    "edge_detection": "lllyasviel/sd-controlnet-canny",
-    # "pose_estimation": "lllyasviel/sd-controlnet-openpose",
-    # "depth_map": "lllyasviel/sd-controlnet-depth",
-    "scribble": "lllyasviel/sd-controlnet-scribble",
-    # "MLSD": "lllyasviel/sd-controlnet-mlsd"
-}
-controlnet = ControlNetModel.from_pretrained(
-    dict_controlnet["edge_detection"],
-    cache_dir="./models_cache",
-    torch_dtype=torch_dtype,
-)
-def get_lora_sd_pipeline(
-    ckpt_dir='./lora_logos',
-    base_model_name_or_path=None,
-    dtype=torch.float16,
-    adapter_name="default",
-    controlnet=None
-    ):
-    unet_sub_dir = os.path.join(ckpt_dir, "unet")
-    text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
-    if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None:
-        config = LoraConfig.from_pretrained(text_encoder_sub_dir)
-        base_model_name_or_path = config.base_model_name_or_path
-    if base_model_name_or_path is None:
-        raise ValueError("Please specify the base model name or path")
-    pipe = StableDiffusionControlNetPipeline.from_pretrained(
-        base_model_name_or_path,
-        torch_dtype=dtype,
-        controlnet=controlnet,
-    )
-    before_params = pipe.unet.parameters()
-    pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
-    pipe.unet.set_adapter(adapter_name)
-    after_params = pipe.unet.parameters()
-    print("Parameters changed:", any(torch.any(b != a) for b, a in zip(before_params, after_params)))
-    if os.path.exists(text_encoder_sub_dir):
-        pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name)
-    if dtype in (torch.float16, torch.bfloat16):
-        pipe.unet.half()
-        pipe.text_encoder.half()
-    return pipe
-def process_prompt(prompt, tokenizer, text_encoder, max_length=77):
-    tokens = tokenizer(prompt, truncation=False, return_tensors="pt")["input_ids"]
-    chunks = [tokens[:, i:i + max_length] for i in range(0, tokens.shape[1], max_length)]
-    with torch.no_grad():
-        embeds = [text_encoder(chunk.to(text_encoder.device))[0] for chunk in chunks]
-    return torch.cat(embeds, dim=1)
-def align_embeddings(prompt_embeds, negative_prompt_embeds):
-    max_length = max(prompt_embeds.shape[1], negative_prompt_embeds.shape[1])
-    return torch.nn.functional.pad(prompt_embeds, (0, 0, 0, max_length - prompt_embeds.shape[1])), \
-           torch.nn.functional.pad(negative_prompt_embeds, (0, 0, 0, max_length - negative_prompt_embeds.shape[1]))
-def map_edge_detection(image_path: str) -> Image:
-    source_img = load_image(image_path).convert('RGB')
-    edges = cv.Canny(np.array(source_img), 80, 160)
-    edges = np.repeat(edges[:, :, None], 3, axis=2)
-    final_image = Image.fromarray(edges)
-    return final_image
-def map_scribble(image_path: str) -> Image:
-    global hed
-    if not hed:
-        hed = HEDdetector.from_pretrained('lllyasviel/Annotators')
     image = load_image(image_path).convert('RGB')
-    scribble_image = hed(image)
-    image_np = np.array(scribble_image)
-    image_np = cv.medianBlur(image_np, 3)
-    image = cv.convertScaleAbs(image_np, alpha=1.5, beta=0)
-    final_image = Image.fromarray(image)
-    return final_image
-pipe = get_lora_sd_pipeline(
-    ckpt_dir='./lora_logos',
-    base_model_name_or_path=model_id_default,
-    dtype=torch_dtype,
-    controlnet=controlnet
-).to(device)
-def infer(
-    prompt,
-    negative_prompt,
-    width=512,
-    height=512,
-    num_inference_steps=20,
-    model_id='CompVis/stable-diffusion-v1-4',
-    seed=42,
-    guidance_scale=7.0,
-    lora_scale=0.5,
-    cn_enable=False,
-    cn_strength=0.0,
-    cn_mode='edge_detection',
-    cn_image=None,
-    ip_enable=False,
-    ip_scale=0.5,
-    ip_image=None,
-    progress=gr.Progress(track_tqdm=True)
-    ):
-    generator = torch.Generator(device).manual_seed(seed)
-    global pipe
-    global controlnet
-    controlnet_changed = False
-    if cn_enable:
-        if dict_controlnet[cn_mode] != pipe.controlnet._name_or_path:
-            controlnet = ControlNetModel.from_pretrained(
-                dict_controlnet[cn_mode],
-                cache_dir="./models_cache",
-                torch_dtype=torch_dtype
-            )
-            controlnet_changed = True
-    else:
-        cn_strength = 0.0  # отключаем контролнет принудительно
-    if model_id != pipe._name_or_path:
-        pipe = StableDiffusionControlNetPipeline.from_pretrained(
-            model_id,
-            torch_dtype=torch_dtype,
-            controlnet=controlnet,
-            controlnet_conditioning_scale=cn_strength,
-        ).to(device)
-    elif (model_id == pipe._name_or_path) and controlnet_changed:
-        pipe = StableDiffusionControlNetPipeline.from_pretrained(
-            model_id,
-            torch_dtype=torch_dtype,
-            controlnet=controlnet,
-            controlnet_conditioning_scale=cn_strength,
-        ).to(device)
-        print(f"LoRA adapter loaded: {pipe.unet.active_adapters}")
-        print(f"LoRA scale applied: {lora_scale}")
-        pipe.fuse_lora(lora_scale=lora_scale)
-    elif (model_id == pipe._name_or_path) and not controlnet_changed:
-        print(f"LoRA adapter loaded: {pipe.unet.active_adapters}")
-        print(f"LoRA scale applied: {lora_scale}")
-        pipe.fuse_lora(lora_scale=lora_scale)
-    prompt_embeds = process_prompt(prompt, pipe.tokenizer, pipe.text_encoder)
-    negative_prompt_embeds = process_prompt(negative_prompt, pipe.tokenizer, pipe.text_encoder)
-    prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
-    params = {
-        'prompt_embeds': prompt_embeds,
-        'negative_prompt_embeds': negative_prompt_embeds,
-        'guidance_scale': guidance_scale,
-        'num_inference_steps': num_inference_steps,
-        'width': width,
-        'height': height,
-        'generator': generator,
-    }
-    if cn_enable:
-        params['controlnet_conditioning_scale'] = cn_strength
-        if cn_mode == 'edge_detection':
-            control_image = map_edge_detection(cn_image)
-        elif cn_mode == 'scribble':
-            control_image = map_scribble(cn_image)
-        params['image'] = control_image
-    if ip_enable:
-        pipe.load_ip_adapter(
-            IP_ADAPTER,
-            subfolder="models",
-            weight_name=IP_ADAPTER_WEIGHT_NAME,
         )
-        params['ip_adapter_image'] = load_image(ip_image).convert('RGB')
-        pipe.set_ip_adapter_scale(ip_scale)
-    return pipe(**params).images[0]
 css = """
 #col-container {

 import gradio as gr
 import numpy as np
 import torch
+from diffusers.utils import load_image
+from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
 from peft import PeftModel, LoraConfig
 from controlnet_aux import HEDdetector
 from PIL import Image
 import cv2 as cv
 import os
+from functools import lru_cache
+from contextlib import contextmanager
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 model_id_default = "CompVis/stable-diffusion-v1-4"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+class PipelineManager:
+    def __init__(self):
+        self.pipe = None
+        self.current_model = None
+        self.controlnet_cache = {}
+        self.hed = None
+    @lru_cache(maxsize=2)
+    def get_controlnet(self, model_name: str) -> ControlNetModel:
+        if model_name not in self.controlnet_cache:
+            self.controlnet_cache[model_name] = ControlNetModel.from_pretrained(
+                model_name,
+                cache_dir="./models_cache",
+                torch_dtype=torch_dtype
+            ).to(device)
+        return self.controlnet_cache[model_name]
+    def get_hed_detector(self):
+        if self.hed is None:
+            self.hed = HEDdetector.from_pretrained('lllyasviel/Annotators')
+        return self.hed
+    def initialize_pipeline(self, model_id, controlnet_model):
+        controlnet = self.get_controlnet(controlnet_model)
+        if not self.pipe or model_id != self.current_model:
+            self.pipe = self.create_pipeline(model_id, controlnet)
+            self.current_model = model_id
+        return self.pipe
+    def create_pipeline(self, model_id, controlnet):
+        pipe = StableDiffusionControlNetPipeline.from_pretrained(
+            model_id,
+            torch_dtype=torch_dtype,
+            controlnet=controlnet,
+            cache_dir="./models_cache"
+        ).to(device)
+        if os.path.exists('./lora_logos'):
+            pipe = self.load_lora_adapters(pipe)
+        return pipe
+    def load_lora_adapters(self, pipe):
+        unet_dir = os.path.join('./lora_logos', "unet")
+        text_encoder_dir = os.path.join('./lora_logos', "text_encoder")
+        pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_dir, adapter_name="default")
+        if os.path.exists(text_encoder_dir):
+            pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_dir)
+        return pipe.to(device)
+@contextmanager
+def torch_inference_mode():
+    with torch.inference_mode(), torch.autocast(device.type):
+        yield
+def process_embeddings(prompt, negative_prompt, tokenizer, text_encoder):
+    def process_text(text):
+        tokens = tokenizer(text, return_tensors="pt", truncation=False).input_ids
+        chunks = [tokens[:, i:i+77].to(device) for i in range(0, tokens.size(1), 77)]
+        return torch.cat([text_encoder(chunk)[0] for chunk in chunks], dim=1)
+    prompt_emb = process_text(prompt)
+    negative_emb = process_text(negative_prompt)
+    max_len = max(prompt_emb.size(1), negative_emb.size(1))
+    return (
+        torch.nn.functional.pad(prompt_emb, (0, 0, 0, max_len - prompt_emb.size(1))),
+        torch.nn.functional.pad(negative_emb, (0, 0, 0, max_len - negative_emb.size(1)))
+    )
+def process_control_image(image_path: str, processor: str, hed_detector) -> Image:
     image = load_image(image_path).convert('RGB')
+    if processor == 'edge_detection':
+        edges = cv.Canny(np.array(image), 80, 160)
+        return Image.fromarray(np.repeat(edges[:, :, None], 3, axis=2))
+    if processor == 'scribble':
+        scribble = hed_detector(image)
+        processed = cv.medianBlur(np.array(scribble), 3)
+        return Image.fromarray(cv.convertScaleAbs(processed, alpha=1.5))
+pipeline_mgr = PipelineManager()
+controlnet_models = {
+    "edge_detection": "lllyasviel/sd-controlnet-canny",
+    "scribble": "lllyasviel/sd-controlnet-scribble"
+}
+def infer(**kwargs):
+    generator = torch.Generator(device).manual_seed(kwargs['seed'])
+    with torch_inference_mode():
+        pipe = pipeline_mgr.initialize_pipeline(
+            kwargs['model_id'],
+            controlnet_models.get(kwargs['cn_mode'], controlnet_models['edge_detection'])
         )
+        if kwargs['cn_enable'] and not kwargs['cn_image']:
+            raise gr.Error("ControlNet enabled but no image provided!")
+        prompt_emb, negative_emb = process_embeddings(
+            kwargs['prompt'],
+            kwargs['negative_prompt'],
+            pipe.tokenizer,
+            pipe.text_encoder
+        )
+        params = {
+            'prompt_embeds': prompt_emb,
+            'negative_prompt_embeds': negative_emb,
+            'guidance_scale': kwargs['guidance_scale'],
+            'num_inference_steps': kwargs['num_inference_steps'],
+            'width': kwargs['width'],
+            'height': kwargs['height'],
+            'generator': generator
+        }
+        if kwargs['cn_enable']:
+            params['image'] = process_control_image(
+                kwargs['cn_image'],
+                kwargs['cn_mode'],
+                pipeline_mgr.get_hed_detector()
+            )
+            params['controlnet_conditioning_scale'] = kwargs['cn_strength']
+        if kwargs.get('ip_enable', False):
+            pipe.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=IP_ADAPTER_WEIGHT_NAME)
+            params['ip_adapter_image'] = load_image(kwargs['ip_image']).convert('RGB')
+            pipe.set_ip_adapter_scale(kwargs.get('ip_scale', 0.5))
+        pipe.fuse_lora(lora_scale=kwargs.get('lora_scale', 0.5))
+        return pipe(**params).images[0]
 css = """
 #col-container {