File size: 4,685 Bytes
155699f
0efa5c1
155699f
cdbd072
f2112c3
155699f
cdbd072
 
 
 
 
 
 
 
f9d1adb
cdbd072
 
 
155699f
cdbd072
 
358c39a
 
1fe2c76
358c39a
49c57cc
 
358c39a
49c57cc
cdbd072
155699f
cdbd072
 
 
 
 
 
 
 
 
535b58c
 
cdbd072
 
535b58c
f9d1adb
 
535b58c
cdbd072
 
 
 
 
 
 
 
 
 
535b58c
 
165a80c
cdbd072
cecf975
cdbd072
 
 
 
 
535b58c
cdbd072
155699f
f9d1adb
 
 
 
 
 
 
cdbd072
 
 
f9d1adb
cdbd072
 
165a80c
cdbd072
 
 
 
 
 
 
165a80c
358c39a
 
cdbd072
 
 
3278f95
 
 
cdbd072
de1e61c
cbb19c6
cdbd072
165a80c
cdbd072
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import os
import torch
import gradio as gr
from diffusers import DiffusionPipeline
from PIL import Image

# --- Configuration ---
SPACE_TITLE = "🎨 Enhanced Studio Ghibli AI Art Generator (LoRA)"
SPACE_DESCRIPTION = "Upload a portrait or a photo and transform it into a breathtaking Studio Ghibli-style masterpiece using a LoRA for fine-tuned results."
BASE_MODEL_ID = "black-forest-labs/FLUX.1-dev"
LORA_REPO_ID = "strangerzonehf/Flux-Ghibli-Art-LoRA"
TRIGGER_WORD = "Ghibli Art"
STRENGTH = 0.60  # Adjust for better balance between input and style
GUIDANCE_SCALE = 7.5  # Increased for better prompt adherence
NUM_inference_steps = 30  # Increased for potentially higher quality
INPUT_IMAGE_SIZE = (512, 512)
PROMPT_PREFIX = ""  # No need for separate prefix as LoRA is targeted
NEGATIVE_PROMPT = "ugly, deformed, blurry, low quality, bad anatomy, bad proportions, disfigured, poorly drawn face, mutation, mutated, extra limbs, extra fingers, body horror, glitchy, tiling"

# --- Device Setup ---
# Attempt to use CUDA if available, otherwise fallback to CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"🚀 Using device: {device}")

# Ensure Torch is correctly initialized
try:
    torch.zeros(1).to(device)
    print("✅ Torch initialized successfully on", device)
except Exception as e:
    print(f"⚠️ Torch initialization error: {e}")

# --- Model Loading ---
try:
    pipe = DiffusionPipeline.from_pretrained(BASE_MODEL_ID, torch_dtype=torch.bfloat16)
except ValueError as e:
    if "sentencepiece" in str(e):
        print("⚠️ Error: sentencepiece is not installed. Please install it with: pip install sentencepiece")
        raise
    else:
        raise e

try:
    pipe.load_lora_weights(LORA_REPO_ID)
    print(f"✅ LoRA weights loaded from {LORA_REPO_ID}")
except Exception as e:
    print(f"⚠️ Error loading LoRA weights: PEFT backend is required. Please install it with: pip install peft")
    print("⚠️ Continuing without LoRA, results may vary.")

pipe.to(device)

# --- Optimization (Conditional for CUDA) ---
if device == "cuda":
    try:
        pipe.enable_xformers_memory_efficient_attention()
        print("✅ xFormers enabled!")
    except Exception as e:
        print(f"⚠️ xFormers not available: {e}")
    pipe.enable_model_cpu_offload()
pipe.enable_vae_slicing()
pipe.enable_attention_slicing()

# --- Image Transformation Function ---
def transform_image(input_image):
    if input_image is None:
        return None

    try:
        input_image = input_image.resize(INPUT_IMAGE_SIZE)

        prompt = f"{PROMPT_PREFIX} {TRIGGER_WORD}, portrait of a person" # Incorporate trigger word

        # FLUX.1-dev does not seem to directly support the 'image' argument for image-to-image.
        # We will try to influence the generation using the prompt, describing the input image.
        # This is a workaround as direct image-to-image is not available with this pipeline.

        # Basic description of the input image (you can make this more detailed)
        prompt += f", photo of a person"

        output = pipe(
            prompt=prompt,
            guidance_scale=GUIDANCE_SCALE,
            num_inference_steps=NUM_inference_steps,
            negative_prompt=NEGATIVE_PROMPT,
        ).images[0]

        return output
    except Exception as e:
        print(f"❌ Error during image transformation: {e}")
        return None

# --- Gradio UI ---
iface = gr.Interface(
    fn=transform_image,
    inputs=gr.Image(type="pil", label="Upload a Portrait/Photo"),
    outputs=gr.Image(type="pil", label="Studio Ghibli-Style Output"),
    title=SPACE_TITLE,
    description=SPACE_DESCRIPTION,
    examples=[
        gr.Image(type="pil", value="examples/portrait1.jpg", label="Example Portrait"),
        gr.Image(type="pil", value="examples/photo1.jpg", label="Example Photo"),
        gr.Image(type="pil", value="examples/landscape1.jpg", label="Example Landscape"),
    ],
)

# --- Main Execution ---
if __name__ == "__main__":
    # Create an 'examples' directory if it doesn't exist and add some sample images
    if not os.path.exists("examples"):
        os.makedirs("examples")
        # You'll need to download or create these example images
        # and place them in the 'examples' folder.
        # Example:
        # from urllib.request import urlretrieve
        # urlretrieve("URL_TO_YOUR_EXAMPLE_IMAGE_1", "examples/portrait1.jpg")
        # urlretrieve("URL_TO_YOUR_EXAMPLE_IMAGE_2", "examples/photo1.jpg")
        # urlretrieve("URL_TO_YOUR_EXAMPLE_IMAGE_3", "examples/landscape1.jpg")
        print("ℹ️ Created 'examples' directory. Please add sample images.")

    iface.launch()