Update app.py
Browse files
app.py
CHANGED
@@ -1,60 +1,11 @@
|
|
1 |
-
|
2 |
-
# !pip install huggingface_hub gradio pillow
|
3 |
-
|
4 |
from huggingface_hub import InferenceClient
|
5 |
import gradio as gr
|
6 |
import random
|
7 |
-
import json
|
8 |
import logging
|
9 |
import sys
|
10 |
-
import os
|
11 |
-
|
12 |
-
# Set up logging
|
13 |
-
logging.basicConfig(
|
14 |
-
level=logging.INFO,
|
15 |
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
16 |
-
handlers=[logging.StreamHandler(sys.stdout)]
|
17 |
-
)
|
18 |
-
logger = logging.getLogger("visual_studio")
|
19 |
|
20 |
-
#
|
21 |
-
def setup_client(api_key, provider=None):
|
22 |
-
try:
|
23 |
-
if provider:
|
24 |
-
client = InferenceClient(provider=provider, api_key=api_key)
|
25 |
-
logger.info(f"{provider} client initialized successfully")
|
26 |
-
else:
|
27 |
-
client = InferenceClient(api_key=api_key)
|
28 |
-
logger.info("Hugging Face client initialized successfully")
|
29 |
-
return client
|
30 |
-
except Exception as e:
|
31 |
-
logger.error(f"Error initializing client: {str(e)}")
|
32 |
-
return None
|
33 |
-
|
34 |
-
# Initialize clients
|
35 |
-
try:
|
36 |
-
# Replace with your actual HF API key
|
37 |
-
hf_api_key = os.getenv("HF_API_KEY1")
|
38 |
-
hf_client = setup_client(hf_api_key)
|
39 |
-
logger.info("Hugging Face client created successfully")
|
40 |
-
|
41 |
-
# Set up Llama client if API key is provided
|
42 |
-
llama_api_key = os.getenv("HF_API_KEY2") # Replace with actual key if available
|
43 |
-
try:
|
44 |
-
llama_client = setup_client(llama_api_key, "sambanova")
|
45 |
-
use_llama = True
|
46 |
-
logger.info("Llama client created successfully")
|
47 |
-
except Exception as e:
|
48 |
-
logger.warning(f"Llama client not available: {str(e)}. Will use fallback enhancement.")
|
49 |
-
llama_client = None
|
50 |
-
use_llama = False
|
51 |
-
except Exception as e:
|
52 |
-
logger.error(f"Failed to create Hugging Face client: {str(e)}")
|
53 |
-
hf_client = None
|
54 |
-
llama_client = None
|
55 |
-
use_llama = False
|
56 |
-
|
57 |
-
# Only Hugging Face hosted models
|
58 |
IMAGE_MODELS = {
|
59 |
"stabilityai/stable-diffusion-xl-base-1.0": "SDXL (Best Quality)",
|
60 |
"runwayml/stable-diffusion-v1-5": "Stable Diffusion 1.5 (Balanced)",
|
@@ -63,7 +14,6 @@ IMAGE_MODELS = {
|
|
63 |
"dreamlike-art/dreamlike-diffusion-1.0": "Dreamlike Diffusion (Artistic)"
|
64 |
}
|
65 |
|
66 |
-
# Creation types
|
67 |
CREATION_TYPES = {
|
68 |
"Realistic Photo": "Create a photorealistic image with natural details and lighting",
|
69 |
"Digital Art": "Create colorful digital artwork with clean lines and vibrant colors",
|
@@ -77,7 +27,6 @@ CREATION_TYPES = {
|
|
77 |
"Pixel Art": "Create retro-style pixel art with limited color palette"
|
78 |
}
|
79 |
|
80 |
-
# Art styles with detailed descriptions for better Llama prompt enhancement
|
81 |
ART_STYLES = {
|
82 |
"Photorealistic": "detailed realistic style that resembles a photograph with accurate lighting and textures",
|
83 |
"Impressionist": "soft brushstrokes that capture light and atmosphere over precise details, like Monet",
|
@@ -87,140 +36,73 @@ ART_STYLES = {
|
|
87 |
"Abstract": "non-representational style using shapes, colors, and forms to express ideas",
|
88 |
"Cubist": "geometric shapes and multiple perspectives shown simultaneously, like Picasso",
|
89 |
"Art Nouveau": "ornate, flowing lines inspired by natural forms with decorative elegance",
|
90 |
-
"Gothic": "
|
91 |
"Cyberpunk": "futuristic dystopian style with neon colors, technology, and urban decay",
|
92 |
-
"Steampunk": "
|
93 |
"Retro/Vintage": "nostalgic style reminiscent of past decades with period-appropriate elements",
|
94 |
-
"Art Deco": "geometric patterns, bold colors, and luxurious materials
|
95 |
-
"Baroque": "
|
96 |
"Ukiyo-e": "traditional Japanese woodblock print style with flat areas of color and strong outlines",
|
97 |
-
"Comic Book": "
|
98 |
-
"Psychedelic": "vibrant
|
99 |
-
"Vaporwave": "
|
100 |
-
"Studio Ghibli": "
|
101 |
-
"Hyperrealism": "
|
102 |
}
|
103 |
|
104 |
-
#
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
logger.info(f"Enhancing prompt with Llama 4 for creation type: {creation_type}, art style: {art_style}")
|
112 |
-
|
113 |
-
# Enhanced Llama 4 system prompt
|
114 |
-
system_prompt = """You are a world-class prompt engineer who specializes in creating detailed, effective prompts for text-to-image AI models.
|
115 |
-
|
116 |
-
Your task is to transform a user's simple description into a comprehensive, detailed image generation prompt that will create stunning visuals. Consider all the provided elements (description, creation type, art style, mood) and combine them into a cohesive, detailed prompt.
|
117 |
-
|
118 |
-
MOST IMPORTANTLY - ADD LOGICAL DETAILS:
|
119 |
-
- Analyze what the user wants and add logical details that would make the scene realistic or coherent
|
120 |
-
- If describing something fantastical (e.g., "flying cat"), add logical details about how this could work (e.g., "a cat with majestic feathered wings spread wide")
|
121 |
-
- Think about environment, lighting, perspective, time of day, weather, and other contextual elements
|
122 |
-
- Create a vivid, imaginable scene with spatial relationships clearly defined
|
123 |
-
|
124 |
-
PROMPT STRUCTURE GUIDELINES:
|
125 |
-
1. Start with the core subject and its primary characteristics
|
126 |
-
2. Add environment and setting details
|
127 |
-
3. Describe lighting, atmosphere, and mood
|
128 |
-
4. Include specific visual style and artistic technique references
|
129 |
-
5. Add technical quality terms (8K, detailed, masterful, etc.)
|
130 |
-
|
131 |
-
FORMAT YOUR RESPONSE AS A SINGLE PARAGRAPH with no additional comments, explanations, or bullet points. Use natural language without awkward comma separations. Aim for 75-150 words.
|
132 |
-
|
133 |
-
AVOID:
|
134 |
-
- Do not include quotation marks in your response
|
135 |
-
- Do not preface with "here's a prompt" or similar text
|
136 |
-
- Do not use placeholders
|
137 |
-
- Do not add negative prompts
|
138 |
-
- Do not write in list format or use bullet points
|
139 |
-
|
140 |
-
Respond only with the enhanced prompt and nothing else."""
|
141 |
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
|
|
|
|
153 |
|
154 |
-
|
|
|
|
|
|
|
155 |
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
logger.info(f"Llama 4 enhanced prompt: {enhanced[:100]}...")
|
167 |
-
return enhanced if enhanced else user_input
|
168 |
-
except Exception as e:
|
169 |
-
logger.error(f"Error during Llama enhancement: {str(e)}")
|
170 |
-
return enhance_prompt_fallback(user_input, creation_type, art_style, mood)
|
171 |
-
except Exception as e:
|
172 |
-
logger.error(f"Error in Llama enhancement: {str(e)}")
|
173 |
-
return enhance_prompt_fallback(user_input, creation_type, art_style, mood)
|
174 |
|
175 |
-
# Fallback prompt enhancement
|
176 |
def enhance_prompt_fallback(user_input, creation_type, art_style, mood):
|
177 |
logger.info(f"Using fallback enhancement for: {user_input[:50]}...")
|
178 |
-
|
179 |
-
# Quality terms by creation type
|
180 |
quality_terms = {
|
181 |
-
"Realistic Photo": [
|
182 |
-
|
183 |
-
|
184 |
-
],
|
185 |
-
"
|
186 |
-
|
187 |
-
|
188 |
-
],
|
189 |
-
"
|
190 |
-
|
191 |
-
"epic", "otherworldly", "imaginative scene"
|
192 |
-
],
|
193 |
-
"Concept Art": [
|
194 |
-
"professional concept art", "detailed design", "conceptual illustration",
|
195 |
-
"industry standard", "visual development", "production artwork"
|
196 |
-
],
|
197 |
-
"Anime/Manga": [
|
198 |
-
"anime style", "manga illustration", "cel shaded",
|
199 |
-
"Japanese animation", "2D character art", "anime aesthetic"
|
200 |
-
],
|
201 |
-
"Oil Painting": [
|
202 |
-
"oil on canvas", "textured brushwork", "rich colors",
|
203 |
-
"traditional painting", "artistic brushstrokes", "gallery quality"
|
204 |
-
],
|
205 |
-
"Watercolor": [
|
206 |
-
"watercolor painting", "soft color bleeding", "delicate washes",
|
207 |
-
"transparent layers", "loose brushwork", "gentle transitions"
|
208 |
-
],
|
209 |
-
"Sketch": [
|
210 |
-
"detailed sketch", "pencil drawing", "line art",
|
211 |
-
"hand-drawn", "fine details", "shading techniques"
|
212 |
-
],
|
213 |
-
"3D Rendering": [
|
214 |
-
"3D render", "volumetric lighting", "ray tracing",
|
215 |
-
"3D modeling", "realistic textures", "computer graphics"
|
216 |
-
],
|
217 |
-
"Pixel Art": [
|
218 |
-
"pixel art", "8-bit style", "retro game aesthetic",
|
219 |
-
"limited color palette", "pixelated", "nostalgic game art"
|
220 |
-
]
|
221 |
}
|
222 |
-
|
223 |
-
# Style modifiers for different art styles
|
224 |
style_modifiers = {
|
225 |
"Photorealistic": "highly detailed photorealistic style with perfect lighting",
|
226 |
"Impressionist": "impressionist style with visible brushstrokes capturing light and atmosphere",
|
@@ -243,8 +125,6 @@ def enhance_prompt_fallback(user_input, creation_type, art_style, mood):
|
|
243 |
"Studio Ghibli": "Studio Ghibli anime style with whimsical detailed environments",
|
244 |
"Hyperrealism": "hyperrealistic style with extreme detail beyond photography"
|
245 |
}
|
246 |
-
|
247 |
-
# Mood modifiers for different moods
|
248 |
mood_modifiers = {
|
249 |
"Happy": "bright cheerful atmosphere with warm colors",
|
250 |
"Sad": "melancholic atmosphere with muted colors",
|
@@ -257,184 +137,209 @@ def enhance_prompt_fallback(user_input, creation_type, art_style, mood):
|
|
257 |
"Romantic": "soft romantic atmosphere with dreamy lighting",
|
258 |
"Epic": "grand epic atmosphere with dramatic scale"
|
259 |
}
|
260 |
-
|
261 |
-
|
262 |
-
type_terms = quality_terms.get(creation_type, [
|
263 |
-
"high quality", "detailed", "professional", "masterful"
|
264 |
-
])
|
265 |
-
|
266 |
-
# Common quality terms
|
267 |
-
common_terms = [
|
268 |
-
"8K resolution", "highly detailed", "professional",
|
269 |
-
"trending on artstation", "masterpiece"
|
270 |
-
]
|
271 |
-
|
272 |
-
# Get style modifier
|
273 |
style_modifier = style_modifiers.get(art_style, "detailed style")
|
274 |
-
|
275 |
-
# Get mood modifier
|
276 |
mood_modifier = mood_modifiers.get(mood, "atmospheric")
|
277 |
-
|
278 |
-
# Basic prompt structure
|
279 |
-
prompt_parts = [
|
280 |
-
user_input,
|
281 |
-
style_modifier,
|
282 |
-
mood_modifier
|
283 |
-
]
|
284 |
-
|
285 |
-
# Add randomly selected quality terms
|
286 |
selected_type_terms = random.sample(type_terms, min(3, len(type_terms)))
|
287 |
selected_common_terms = random.sample(common_terms, min(2, len(common_terms)))
|
288 |
-
|
289 |
-
# Combine terms
|
290 |
quality_description = ", ".join(selected_type_terms + selected_common_terms)
|
291 |
-
|
292 |
-
# Final enhanced prompt
|
293 |
enhanced_prompt = f"{', '.join(prompt_parts)}, {quality_description}"
|
294 |
-
|
295 |
logger.info(f"Fallback enhanced prompt: {enhanced_prompt[:100]}...")
|
296 |
return enhanced_prompt
|
297 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
# Generate image function
|
299 |
def generate_image(description, creation_type, art_style, mood, model_name):
|
300 |
try:
|
301 |
logger.info(f"Generating image with model: {model_name}")
|
302 |
-
|
303 |
-
# Use Llama 4 for prompt enhancement with all inputs
|
304 |
enhanced_prompt = enhance_prompt_with_llama(description, creation_type, art_style, mood)
|
305 |
-
|
306 |
-
# Check if client is available
|
307 |
if hf_client is None:
|
308 |
logger.error("Hugging Face client not available")
|
309 |
return None, "Error: Hugging Face client not available", enhanced_prompt
|
310 |
-
|
311 |
-
# Generate image
|
312 |
logger.info(f"Sending request to model {model_name} with prompt: {enhanced_prompt[:100]}...")
|
313 |
image = hf_client.text_to_image(
|
314 |
prompt=enhanced_prompt,
|
315 |
model=model_name,
|
316 |
negative_prompt="low quality, blurry, distorted, deformed, disfigured, bad anatomy, watermark, signature, text"
|
317 |
)
|
318 |
-
|
319 |
logger.info("Image generated successfully")
|
320 |
-
|
321 |
-
# Analysis information
|
322 |
-
analysis = f"Image generated using model: {model_name}\n"
|
323 |
if use_llama:
|
324 |
-
analysis += "Prompt enhanced with Llama 4"
|
325 |
else:
|
326 |
analysis += "Prompt enhanced with fallback method"
|
327 |
-
|
328 |
return image, analysis, enhanced_prompt
|
329 |
except Exception as e:
|
330 |
logger.error(f"Error generating image: {str(e)}")
|
331 |
return None, f"Error generating image: {str(e)}", enhanced_prompt
|
332 |
|
333 |
-
#
|
334 |
-
with gr.Blocks(title="
|
335 |
-
|
336 |
-
gr.
|
337 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
with gr.Row():
|
339 |
with gr.Column():
|
340 |
-
# Core
|
341 |
description_input = gr.Textbox(
|
342 |
label="Describe what you want to see",
|
343 |
placeholder="Be detailed and specific about what you want in the image...",
|
344 |
lines=4
|
345 |
)
|
346 |
-
|
347 |
with gr.Row():
|
348 |
creation_type = gr.Dropdown(
|
349 |
choices=list(CREATION_TYPES.keys()),
|
350 |
value="Digital Art",
|
351 |
label="Creation Type"
|
352 |
)
|
353 |
-
|
354 |
model_selector = gr.Dropdown(
|
355 |
choices=list(IMAGE_MODELS.keys()),
|
356 |
value="stabilityai/stable-diffusion-xl-base-1.0",
|
357 |
label="Image Model"
|
358 |
)
|
359 |
-
|
360 |
with gr.Row():
|
361 |
art_style = gr.Dropdown(
|
362 |
choices=list(ART_STYLES.keys()),
|
363 |
value="Photorealistic",
|
364 |
label="Art Style"
|
365 |
)
|
366 |
-
|
367 |
mood_dropdown = gr.Dropdown(
|
368 |
-
choices=["Happy", "Sad", "Mysterious", "Peaceful", "Tense",
|
369 |
"Whimsical", "Dark", "Energetic", "Romantic", "Epic"],
|
370 |
value="Peaceful",
|
371 |
label="Mood"
|
372 |
)
|
373 |
-
|
374 |
-
# Generate button
|
375 |
generate_button = gr.Button("✨ Generate Image", variant="primary", size="lg")
|
376 |
-
|
377 |
-
# Display model names in a more user-friendly way
|
378 |
def format_model_name(model_key):
|
379 |
return IMAGE_MODELS.get(model_key, model_key)
|
380 |
-
|
381 |
model_label = gr.HTML(value="")
|
382 |
model_selector.change(
|
383 |
-
fn=lambda x: f"<p>Selected model:
|
384 |
inputs=model_selector,
|
385 |
outputs=model_label
|
386 |
)
|
387 |
-
|
388 |
with gr.Column():
|
389 |
-
# Output areas
|
390 |
image_output = gr.Image(label="Generated Image")
|
391 |
-
|
392 |
with gr.Accordion("Enhanced Prompt", open=False):
|
393 |
prompt_output = gr.Textbox(label="AI-Enhanced Prompt Used", lines=6)
|
394 |
-
|
395 |
-
# Connect generate button
|
396 |
generate_button.click(
|
397 |
fn=generate_image,
|
398 |
inputs=[description_input, creation_type, art_style, mood_dropdown, model_selector],
|
399 |
outputs=[image_output, model_label, prompt_output]
|
400 |
)
|
401 |
-
|
402 |
-
# Tips
|
403 |
-
with gr.Accordion("Tips for better results", open=True):
|
404 |
-
gr.Markdown("""
|
405 |
-
### 💡 Tips for better results:
|
406 |
-
|
407 |
-
- **Be specific** about what you want to see - include details about subjects, actions, setting
|
408 |
-
- **Mention colors, textures, lighting** if they're important to your vision
|
409 |
-
- **Try different art styles** to dramatically change the look and feel
|
410 |
-
- **The mood selection** influences the overall atmosphere and color palette
|
411 |
-
- **SDXL model** generally produces the highest quality images but takes longer
|
412 |
-
|
413 |
-
#### Examples of good descriptions:
|
414 |
-
|
415 |
-
- *"A serene lake at sunset with mountains in the background and a small wooden boat floating nearby"*
|
416 |
-
- *"A futuristic cityscape with flying cars, neon lights, and tall skyscrapers under a night sky with two moons"*
|
417 |
-
- *"A close-up portrait of an elderly craftsman with weathered hands working on an intricate wooden carving in his workshop"*
|
418 |
-
""")
|
419 |
|
420 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
421 |
with gr.Accordion("Troubleshooting", open=False):
|
422 |
-
gr.Markdown(
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
""")
|
438 |
|
439 |
-
# Launch the interface
|
440 |
-
interface.launch()
|
|
|
1 |
+
import os
|
|
|
|
|
2 |
from huggingface_hub import InferenceClient
|
3 |
import gradio as gr
|
4 |
import random
|
|
|
5 |
import logging
|
6 |
import sys
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
# Define your dictionaries for image models, creation types, and art styles
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
IMAGE_MODELS = {
|
10 |
"stabilityai/stable-diffusion-xl-base-1.0": "SDXL (Best Quality)",
|
11 |
"runwayml/stable-diffusion-v1-5": "Stable Diffusion 1.5 (Balanced)",
|
|
|
14 |
"dreamlike-art/dreamlike-diffusion-1.0": "Dreamlike Diffusion (Artistic)"
|
15 |
}
|
16 |
|
|
|
17 |
CREATION_TYPES = {
|
18 |
"Realistic Photo": "Create a photorealistic image with natural details and lighting",
|
19 |
"Digital Art": "Create colorful digital artwork with clean lines and vibrant colors",
|
|
|
27 |
"Pixel Art": "Create retro-style pixel art with limited color palette"
|
28 |
}
|
29 |
|
|
|
30 |
ART_STYLES = {
|
31 |
"Photorealistic": "detailed realistic style that resembles a photograph with accurate lighting and textures",
|
32 |
"Impressionist": "soft brushstrokes that capture light and atmosphere over precise details, like Monet",
|
|
|
36 |
"Abstract": "non-representational style using shapes, colors, and forms to express ideas",
|
37 |
"Cubist": "geometric shapes and multiple perspectives shown simultaneously, like Picasso",
|
38 |
"Art Nouveau": "ornate, flowing lines inspired by natural forms with decorative elegance",
|
39 |
+
"Gothic": "gothic style with dark atmosphere and dramatic elements",
|
40 |
"Cyberpunk": "futuristic dystopian style with neon colors, technology, and urban decay",
|
41 |
+
"Steampunk": "steampunk style with Victorian aesthetics and brass machinery",
|
42 |
"Retro/Vintage": "nostalgic style reminiscent of past decades with period-appropriate elements",
|
43 |
+
"Art Deco": "art deco style with geometric patterns, bold colors, and luxurious materials",
|
44 |
+
"Baroque": "baroque style with dramatic lighting and rich ornamentation",
|
45 |
"Ukiyo-e": "traditional Japanese woodblock print style with flat areas of color and strong outlines",
|
46 |
+
"Comic Book": "comic book style with bold outlines and vibrant colors",
|
47 |
+
"Psychedelic": "psychedelic style with vibrant swirling colors and abstract patterns",
|
48 |
+
"Vaporwave": "vaporwave aesthetic with glitch art and 80s/90s nostalgia",
|
49 |
+
"Studio Ghibli": "Studio Ghibli anime style with whimsical detailed environments",
|
50 |
+
"Hyperrealism": "hyperrealistic style with extreme detail beyond photography"
|
51 |
}
|
52 |
|
53 |
+
# Set up logging
|
54 |
+
logging.basicConfig(
|
55 |
+
level=logging.INFO,
|
56 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
57 |
+
handlers=[logging.StreamHandler(sys.stdout)]
|
58 |
+
)
|
59 |
+
logger = logging.getLogger("nostalgia_nexus")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
+
# Setup client function
|
62 |
+
def setup_client(api_key, provider=None):
|
63 |
+
try:
|
64 |
+
if provider:
|
65 |
+
client = InferenceClient(provider=provider, api_key=api_key)
|
66 |
+
logger.info(f"{provider} client initialized successfully")
|
67 |
+
else:
|
68 |
+
client = InferenceClient(api_key=api_key)
|
69 |
+
logger.info("Hugging Face client initialized successfully")
|
70 |
+
return client
|
71 |
+
except Exception as e:
|
72 |
+
logger.error(f"Error initializing client: {str(e)}")
|
73 |
+
return None
|
74 |
|
75 |
+
# Initialize clients
|
76 |
+
hf_api_key = os.getenv("HF_API_KEY1") or "your_hf_api_key_here"
|
77 |
+
hf_client = setup_client(hf_api_key)
|
78 |
+
logger.info("Hugging Face client created successfully")
|
79 |
|
80 |
+
# (Optional) Setup Llama client if available
|
81 |
+
llama_api_key = os.getenv("HF_API_KEY2") or "your_llama_api_key_here"
|
82 |
+
try:
|
83 |
+
llama_client = setup_client(llama_api_key, "sambanova")
|
84 |
+
use_llama = True
|
85 |
+
logger.info("Llama client created successfully")
|
86 |
+
except Exception as e:
|
87 |
+
logger.warning(f"Llama client not available: {str(e)}. Will use fallback enhancement.")
|
88 |
+
llama_client = None
|
89 |
+
use_llama = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
+
# Fallback prompt enhancement function
|
92 |
def enhance_prompt_fallback(user_input, creation_type, art_style, mood):
|
93 |
logger.info(f"Using fallback enhancement for: {user_input[:50]}...")
|
|
|
|
|
94 |
quality_terms = {
|
95 |
+
"Realistic Photo": ["photorealistic", "high resolution", "detailed", "natural lighting", "sharp focus", "professional photography"],
|
96 |
+
"Digital Art": ["vibrant colors", "clean lines", "digital illustration", "polished", "professional digital art", "detailed rendering"],
|
97 |
+
"Fantasy Illustration": ["magical atmosphere", "fantasy art", "detailed illustration", "epic", "otherworldly", "imaginative scene"],
|
98 |
+
"Concept Art": ["professional concept art", "detailed design", "conceptual illustration", "industry standard", "visual development", "production artwork"],
|
99 |
+
"Anime/Manga": ["anime style", "manga illustration", "cel shaded", "Japanese animation", "2D character art", "anime aesthetic"],
|
100 |
+
"Oil Painting": ["oil on canvas", "textured brushwork", "rich colors", "traditional painting", "artistic brushstrokes", "gallery quality"],
|
101 |
+
"Watercolor": ["watercolor painting", "soft color bleeding", "delicate washes", "transparent layers", "loose brushwork", "gentle transitions"],
|
102 |
+
"Sketch": ["detailed sketch", "pencil drawing", "line art", "hand-drawn", "fine details", "shading techniques"],
|
103 |
+
"3D Rendering": ["3D render", "volumetric lighting", "ray tracing", "3D modeling", "realistic textures", "computer graphics"],
|
104 |
+
"Pixel Art": ["pixel art", "8-bit style", "retro game aesthetic", "limited color palette", "pixelated", "nostalgic game art"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
}
|
|
|
|
|
106 |
style_modifiers = {
|
107 |
"Photorealistic": "highly detailed photorealistic style with perfect lighting",
|
108 |
"Impressionist": "impressionist style with visible brushstrokes capturing light and atmosphere",
|
|
|
125 |
"Studio Ghibli": "Studio Ghibli anime style with whimsical detailed environments",
|
126 |
"Hyperrealism": "hyperrealistic style with extreme detail beyond photography"
|
127 |
}
|
|
|
|
|
128 |
mood_modifiers = {
|
129 |
"Happy": "bright cheerful atmosphere with warm colors",
|
130 |
"Sad": "melancholic atmosphere with muted colors",
|
|
|
137 |
"Romantic": "soft romantic atmosphere with dreamy lighting",
|
138 |
"Epic": "grand epic atmosphere with dramatic scale"
|
139 |
}
|
140 |
+
type_terms = quality_terms.get(creation_type, ["high quality", "detailed", "professional", "masterful"])
|
141 |
+
common_terms = ["8K resolution", "highly detailed", "professional", "trending on artstation", "masterpiece"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
style_modifier = style_modifiers.get(art_style, "detailed style")
|
|
|
|
|
143 |
mood_modifier = mood_modifiers.get(mood, "atmospheric")
|
144 |
+
prompt_parts = [user_input, style_modifier, mood_modifier]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
selected_type_terms = random.sample(type_terms, min(3, len(type_terms)))
|
146 |
selected_common_terms = random.sample(common_terms, min(2, len(common_terms)))
|
|
|
|
|
147 |
quality_description = ", ".join(selected_type_terms + selected_common_terms)
|
|
|
|
|
148 |
enhanced_prompt = f"{', '.join(prompt_parts)}, {quality_description}"
|
|
|
149 |
logger.info(f"Fallback enhanced prompt: {enhanced_prompt[:100]}...")
|
150 |
return enhanced_prompt
|
151 |
|
152 |
+
# Llama prompt enhancement function
|
153 |
+
def enhance_prompt_with_llama(user_input, creation_type, art_style, mood):
|
154 |
+
try:
|
155 |
+
if not use_llama or llama_client is None:
|
156 |
+
logger.warning("Llama enhancement not available, using fallback")
|
157 |
+
return enhance_prompt_fallback(user_input, creation_type, art_style, mood)
|
158 |
+
logger.info(f"Enhancing prompt with Llama 4 for creation type: {creation_type}, art style: {art_style}")
|
159 |
+
system_prompt = """You are a world-class prompt engineer who specializes in creating detailed, effective prompts for text-to-image AI models.
|
160 |
+
|
161 |
+
Your task is to transform a user's simple description into a comprehensive, detailed image generation prompt that will create stunning visuals. Consider all the provided elements (description, creation type, art style, mood) and combine them into a cohesive, detailed prompt.
|
162 |
+
|
163 |
+
MOST IMPORTANTLY - ADD LOGICAL DETAILS:
|
164 |
+
- Analyze what the user wants and add logical details that would make the scene realistic or coherent
|
165 |
+
- If describing something fantastical (e.g., "flying cat"), add logical details about how this could work (e.g., "a cat with majestic feathered wings spread wide")
|
166 |
+
- Think about environment, lighting, perspective, time of day, weather, and other contextual elements
|
167 |
+
- Create a vivid, imaginable scene with spatial relationships clearly defined
|
168 |
+
|
169 |
+
PROMPT STRUCTURE GUIDELINES:
|
170 |
+
1. Start with the core subject and its primary characteristics
|
171 |
+
2. Add environment and setting details
|
172 |
+
3. Describe lighting, atmosphere, and mood
|
173 |
+
4. Include specific visual style and artistic technique references
|
174 |
+
5. Add technical quality terms (8K, detailed, masterful, etc.)
|
175 |
+
|
176 |
+
FORMAT YOUR RESPONSE AS A SINGLE PARAGRAPH with no additional comments, explanations, or bullet points. Use natural language without awkward comma separations. Aim for 75-150 words.
|
177 |
+
|
178 |
+
AVOID:
|
179 |
+
- Do not include quotation marks in your response
|
180 |
+
- Do not preface with "here's a prompt" or similar text
|
181 |
+
- Do not use placeholders
|
182 |
+
- Do not add negative prompts
|
183 |
+
- Do not write in list format or use bullet points
|
184 |
+
|
185 |
+
Respond only with the enhanced prompt and nothing else."""
|
186 |
+
creation_description = CREATION_TYPES.get(creation_type, "Create a detailed image")
|
187 |
+
style_description = ART_STYLES.get(art_style, "with detailed and professional quality")
|
188 |
+
user_prompt = f"Description: {user_input}\nCreation Type: {creation_type} - {creation_description}\nArt Style: {art_style} - {style_description}\nMood: {mood}\n\nPlease create a comprehensive, detailed image generation prompt that combines all these elements."
|
189 |
+
completion = llama_client.chat.completions.create(
|
190 |
+
model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
191 |
+
messages=[
|
192 |
+
{"role": "system", "content": system_prompt},
|
193 |
+
{"role": "user", "content": user_prompt}
|
194 |
+
],
|
195 |
+
max_tokens=500,
|
196 |
+
)
|
197 |
+
enhanced = completion.choices[0].message.content
|
198 |
+
logger.info(f"Llama 4 enhanced prompt: {enhanced[:100]}...")
|
199 |
+
return enhanced if enhanced else user_input
|
200 |
+
except Exception as e:
|
201 |
+
logger.error(f"Error during Llama enhancement: {str(e)}")
|
202 |
+
return enhance_prompt_fallback(user_input, creation_type, art_style, mood)
|
203 |
+
|
204 |
# Generate image function
|
205 |
def generate_image(description, creation_type, art_style, mood, model_name):
|
206 |
try:
|
207 |
logger.info(f"Generating image with model: {model_name}")
|
|
|
|
|
208 |
enhanced_prompt = enhance_prompt_with_llama(description, creation_type, art_style, mood)
|
|
|
|
|
209 |
if hf_client is None:
|
210 |
logger.error("Hugging Face client not available")
|
211 |
return None, "Error: Hugging Face client not available", enhanced_prompt
|
|
|
|
|
212 |
logger.info(f"Sending request to model {model_name} with prompt: {enhanced_prompt[:100]}...")
|
213 |
image = hf_client.text_to_image(
|
214 |
prompt=enhanced_prompt,
|
215 |
model=model_name,
|
216 |
negative_prompt="low quality, blurry, distorted, deformed, disfigured, bad anatomy, watermark, signature, text"
|
217 |
)
|
|
|
218 |
logger.info("Image generated successfully")
|
219 |
+
analysis = f"Image generated using model: <b style='color:#FF6F61;'>{IMAGE_MODELS.get(model_name, model_name)}</b>\n"
|
|
|
|
|
220 |
if use_llama:
|
221 |
+
analysis += "Prompt enhanced with <b style='color:#FF6F61;'>Llama 4</b>"
|
222 |
else:
|
223 |
analysis += "Prompt enhanced with fallback method"
|
|
|
224 |
return image, analysis, enhanced_prompt
|
225 |
except Exception as e:
|
226 |
logger.error(f"Error generating image: {str(e)}")
|
227 |
return None, f"Error generating image: {str(e)}", enhanced_prompt
|
228 |
|
229 |
+
# Build Gradio Interface with enhanced UI and highlighting names
|
230 |
+
with gr.Blocks(title="Nostalgia Nexus") as interface:
|
231 |
+
# Add custom CSS for additional styling
|
232 |
+
gr.HTML(
|
233 |
+
"""
|
234 |
+
<style>
|
235 |
+
body { background-color: #fdfdfd; }
|
236 |
+
.gradio-container { font-family: 'Arial', sans-serif; }
|
237 |
+
.header-title { text-align: center; color: #FF6F61; font-size: 2.5em; font-weight: bold; }
|
238 |
+
.header-subtitle { text-align: center; color: #6B5B95; font-size: 1.5em; margin-bottom: 20px; }
|
239 |
+
</style>
|
240 |
+
"""
|
241 |
+
)
|
242 |
+
|
243 |
+
# Header with highlighted project name and subtitle
|
244 |
+
gr.Markdown(
|
245 |
+
"""
|
246 |
+
<h1 class="header-title">🎨 Nostalgia Nexus</h1>
|
247 |
+
<h3 class="header-subtitle">Transform your childhood memories into stunning images with AI</h3>
|
248 |
+
"""
|
249 |
+
)
|
250 |
+
|
251 |
with gr.Row():
|
252 |
with gr.Column():
|
253 |
+
# Core input: description
|
254 |
description_input = gr.Textbox(
|
255 |
label="Describe what you want to see",
|
256 |
placeholder="Be detailed and specific about what you want in the image...",
|
257 |
lines=4
|
258 |
)
|
|
|
259 |
with gr.Row():
|
260 |
creation_type = gr.Dropdown(
|
261 |
choices=list(CREATION_TYPES.keys()),
|
262 |
value="Digital Art",
|
263 |
label="Creation Type"
|
264 |
)
|
|
|
265 |
model_selector = gr.Dropdown(
|
266 |
choices=list(IMAGE_MODELS.keys()),
|
267 |
value="stabilityai/stable-diffusion-xl-base-1.0",
|
268 |
label="Image Model"
|
269 |
)
|
|
|
270 |
with gr.Row():
|
271 |
art_style = gr.Dropdown(
|
272 |
choices=list(ART_STYLES.keys()),
|
273 |
value="Photorealistic",
|
274 |
label="Art Style"
|
275 |
)
|
|
|
276 |
mood_dropdown = gr.Dropdown(
|
277 |
+
choices=["Happy", "Sad", "Mysterious", "Peaceful", "Tense",
|
278 |
"Whimsical", "Dark", "Energetic", "Romantic", "Epic"],
|
279 |
value="Peaceful",
|
280 |
label="Mood"
|
281 |
)
|
|
|
|
|
282 |
generate_button = gr.Button("✨ Generate Image", variant="primary", size="lg")
|
283 |
+
# Display model names in a highlighted, user-friendly way
|
|
|
284 |
def format_model_name(model_key):
|
285 |
return IMAGE_MODELS.get(model_key, model_key)
|
|
|
286 |
model_label = gr.HTML(value="")
|
287 |
model_selector.change(
|
288 |
+
fn=lambda x: f"<p style='color: #FF6F61; font-weight: bold;'>Selected model: {format_model_name(x)}</p>",
|
289 |
inputs=model_selector,
|
290 |
outputs=model_label
|
291 |
)
|
|
|
292 |
with gr.Column():
|
293 |
+
# Output areas: image and enhanced prompt
|
294 |
image_output = gr.Image(label="Generated Image")
|
|
|
295 |
with gr.Accordion("Enhanced Prompt", open=False):
|
296 |
prompt_output = gr.Textbox(label="AI-Enhanced Prompt Used", lines=6)
|
297 |
+
# Connect generate button click event to image generation function
|
|
|
298 |
generate_button.click(
|
299 |
fn=generate_image,
|
300 |
inputs=[description_input, creation_type, art_style, mood_dropdown, model_selector],
|
301 |
outputs=[image_output, model_label, prompt_output]
|
302 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
|
304 |
+
# Tips section with enhanced styling
|
305 |
+
with gr.Accordion("Tips for better results", open=True):
|
306 |
+
gr.Markdown(
|
307 |
+
"""
|
308 |
+
<div style="background-color: #FFF3E0; padding: 10px; border-radius: 5px;">
|
309 |
+
<h4 style="color: #FF6F61;">💡 Tips for better results:</h4>
|
310 |
+
<ul>
|
311 |
+
<li><b>Be specific</b>: include details about subjects, actions, and settings.</li>
|
312 |
+
<li><b>Mention colors, textures, and lighting</b> to enhance your vision.</li>
|
313 |
+
<li><b>Try different art styles</b> to change the overall mood.</li>
|
314 |
+
<li><b>Select the mood</b> to influence the final atmosphere.</li>
|
315 |
+
<li><b>SDXL model</b> generally yields the highest quality images, albeit slower.</li>
|
316 |
+
</ul>
|
317 |
+
<p><i>Examples:</i></p>
|
318 |
+
<ul>
|
319 |
+
<li>"A serene lake at sunset with mountains in the background and a small wooden boat floating nearby"</li>
|
320 |
+
<li>"A futuristic cityscape with flying cars, neon lights, and skyscrapers under a dual-moon night sky"</li>
|
321 |
+
<li>"An elderly craftsman with weathered hands meticulously carving a detailed wooden sculpture"</li>
|
322 |
+
</ul>
|
323 |
+
</div>
|
324 |
+
"""
|
325 |
+
)
|
326 |
+
# Troubleshooting section with enhanced styling
|
327 |
with gr.Accordion("Troubleshooting", open=False):
|
328 |
+
gr.Markdown(
|
329 |
+
"""
|
330 |
+
<div style="background-color: #F8D7DA; padding: 10px; border-radius: 5px;">
|
331 |
+
<h4 style="color: #C82333;">Troubleshooting Tips</h4>
|
332 |
+
<ol>
|
333 |
+
<li>Check the console/terminal for detailed logs.</li>
|
334 |
+
<li>Verify your Hugging Face API key and its permissions.</li>
|
335 |
+
<li>Try a different model if access errors occur.</li>
|
336 |
+
<li>Simplify your prompt if it is overly long or complex.</li>
|
337 |
+
<li>Restart the app if it has been running for a long time.</li>
|
338 |
+
</ol>
|
339 |
+
<p>Common errors include 401/403 (authentication/model access issues), 429 (rate limiting), and 503 (service unavailability).</p>
|
340 |
+
</div>
|
341 |
+
"""
|
342 |
+
)
|
|
|
343 |
|
344 |
+
# Launch the Gradio interface
|
345 |
+
interface.launch()
|