Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
from model_utils import ( | |
analyze_sentiment, | |
answer_question, | |
generate_caption, | |
load_image_captioning_model, | |
load_sentiment_model, | |
load_vqa_model, | |
) | |
# Load models at startup | |
print("Loading models...") | |
image_caption_model, image_caption_processor, image_caption_tokenizer = ( | |
load_image_captioning_model() | |
) | |
vqa_model, vqa_processor, vqa_tokenizer = load_vqa_model() | |
sentiment_model, sentiment_tokenizer = load_sentiment_model() | |
print("Models loaded successfully!") | |
def image_caption_fn(image): | |
# Save image temporarily | |
temp_path = "temp_image.jpg" | |
image.save(temp_path) | |
# Generate caption | |
caption = generate_caption( | |
temp_path, image_caption_model, image_caption_processor, image_caption_tokenizer | |
) | |
# Clean up | |
if os.path.exists(temp_path): | |
os.remove(temp_path) | |
return caption | |
def vqa_fn(image, question): | |
# Save image temporarily | |
temp_path = "temp_image.jpg" | |
image.save(temp_path) | |
# Answer question | |
answer = answer_question( | |
temp_path, question, vqa_model, vqa_processor, vqa_tokenizer | |
) | |
# Clean up | |
if os.path.exists(temp_path): | |
os.remove(temp_path) | |
return answer | |
def sentiment_fn(text): | |
sentiment, confidence = analyze_sentiment( | |
text, sentiment_model, sentiment_tokenizer | |
) | |
confidence_percentage = f"{confidence:.2%}" | |
# Create a simple bar chart for visualization | |
labels = ["Negative", "Positive"] | |
values = ( | |
[1 - confidence, confidence] | |
if sentiment == "positive" | |
else [confidence, 1 - confidence] | |
) | |
fig, ax = plt.subplots(figsize=(6, 3)) | |
bars = ax.bar(labels, values, color=["#FF6B6B", "#4ECDC4"]) | |
ax.set_ylim(0, 1) | |
ax.set_title("Sentiment Analysis") | |
for bar in bars: | |
height = bar.get_height() | |
ax.text( | |
bar.get_x() + bar.get_width() / 2.0, | |
height + 0.02, | |
f"{height:.2f}", | |
ha="center", | |
va="bottom", | |
) | |
return f"Sentiment: {sentiment.upper()} (Confidence: {confidence_percentage})", fig | |
# Create the Gradio interface | |
with gr.Blocks(title="Multi-Modal AI Demo") as demo: | |
gr.Markdown("# Multi-Modal AI Demo") | |
gr.Markdown( | |
"This application demonstrates multi-modal AI capabilities using Hugging Face models." | |
) | |
with gr.Tab("Image Captioning"): | |
gr.Markdown("## Image Captioning") | |
gr.Markdown("Upload an image to generate a descriptive caption.") | |
with gr.Row(): | |
with gr.Column(): | |
image_input = gr.Image(type="pil", label="Input Image") | |
caption_button = gr.Button("Generate Caption") | |
with gr.Column(): | |
caption_output = gr.Textbox( | |
label="Generated Caption", interactive=False | |
) | |
caption_button.click( | |
fn=image_caption_fn, inputs=[image_input], outputs=[caption_output] | |
) | |
with gr.Tab("Visual Question Answering"): | |
gr.Markdown("## Visual Question Answering") | |
gr.Markdown("Upload an image and ask a question about it.") | |
with gr.Row(): | |
with gr.Column(): | |
vqa_image_input = gr.Image(type="pil", label="Input Image") | |
vqa_question_input = gr.Textbox(label="Your Question") | |
vqa_button = gr.Button("Get Answer") | |
with gr.Column(): | |
vqa_output = gr.Textbox(label="Answer", interactive=False) | |
vqa_button.click( | |
fn=vqa_fn, | |
inputs=[vqa_image_input, vqa_question_input], | |
outputs=[vqa_output], | |
) | |
with gr.Tab("Sentiment Analysis"): | |
gr.Markdown("## Sentiment Analysis") | |
gr.Markdown("Enter some text to analyze its sentiment.") | |
with gr.Row(): | |
with gr.Column(): | |
sentiment_input = gr.Textbox(label="Input Text") | |
sentiment_button = gr.Button("Analyze Sentiment") | |
with gr.Column(): | |
sentiment_output = gr.Textbox(label="Result", interactive=False) | |
sentiment_plot = gr.Plot(label="Sentiment Distribution") | |
sentiment_button.click( | |
fn=sentiment_fn, | |
inputs=[sentiment_input], | |
outputs=[sentiment_output, sentiment_plot], | |
) | |
gr.Markdown("### About") | |
gr.Markdown(""" | |
This demo uses the following pretrained models from Hugging Face: | |
- Image Captioning: `nlpconnect/vit-gpt2-image-captioning` | |
- Visual Question Answering: `nlpconnect/vit-gpt2-image-captioning` (simplified) | |
- Sentiment Analysis: `distilbert-base-uncased-finetuned-sst-2-english` | |
""") | |
# Launch the demo | |
if __name__ == "__main__": | |
demo.launch(share=True) | |