Amarthya7's picture
Upload 4 files
ae1d6c7 verified
import os
import gradio as gr
import matplotlib.pyplot as plt
from model_utils import (
analyze_sentiment,
answer_question,
generate_caption,
load_image_captioning_model,
load_sentiment_model,
load_vqa_model,
)
# Load models at startup
print("Loading models...")
image_caption_model, image_caption_processor, image_caption_tokenizer = (
load_image_captioning_model()
)
vqa_model, vqa_processor, vqa_tokenizer = load_vqa_model()
sentiment_model, sentiment_tokenizer = load_sentiment_model()
print("Models loaded successfully!")
def image_caption_fn(image):
# Save image temporarily
temp_path = "temp_image.jpg"
image.save(temp_path)
# Generate caption
caption = generate_caption(
temp_path, image_caption_model, image_caption_processor, image_caption_tokenizer
)
# Clean up
if os.path.exists(temp_path):
os.remove(temp_path)
return caption
def vqa_fn(image, question):
# Save image temporarily
temp_path = "temp_image.jpg"
image.save(temp_path)
# Answer question
answer = answer_question(
temp_path, question, vqa_model, vqa_processor, vqa_tokenizer
)
# Clean up
if os.path.exists(temp_path):
os.remove(temp_path)
return answer
def sentiment_fn(text):
sentiment, confidence = analyze_sentiment(
text, sentiment_model, sentiment_tokenizer
)
confidence_percentage = f"{confidence:.2%}"
# Create a simple bar chart for visualization
labels = ["Negative", "Positive"]
values = (
[1 - confidence, confidence]
if sentiment == "positive"
else [confidence, 1 - confidence]
)
fig, ax = plt.subplots(figsize=(6, 3))
bars = ax.bar(labels, values, color=["#FF6B6B", "#4ECDC4"])
ax.set_ylim(0, 1)
ax.set_title("Sentiment Analysis")
for bar in bars:
height = bar.get_height()
ax.text(
bar.get_x() + bar.get_width() / 2.0,
height + 0.02,
f"{height:.2f}",
ha="center",
va="bottom",
)
return f"Sentiment: {sentiment.upper()} (Confidence: {confidence_percentage})", fig
# Create the Gradio interface
with gr.Blocks(title="Multi-Modal AI Demo") as demo:
gr.Markdown("# Multi-Modal AI Demo")
gr.Markdown(
"This application demonstrates multi-modal AI capabilities using Hugging Face models."
)
with gr.Tab("Image Captioning"):
gr.Markdown("## Image Captioning")
gr.Markdown("Upload an image to generate a descriptive caption.")
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil", label="Input Image")
caption_button = gr.Button("Generate Caption")
with gr.Column():
caption_output = gr.Textbox(
label="Generated Caption", interactive=False
)
caption_button.click(
fn=image_caption_fn, inputs=[image_input], outputs=[caption_output]
)
with gr.Tab("Visual Question Answering"):
gr.Markdown("## Visual Question Answering")
gr.Markdown("Upload an image and ask a question about it.")
with gr.Row():
with gr.Column():
vqa_image_input = gr.Image(type="pil", label="Input Image")
vqa_question_input = gr.Textbox(label="Your Question")
vqa_button = gr.Button("Get Answer")
with gr.Column():
vqa_output = gr.Textbox(label="Answer", interactive=False)
vqa_button.click(
fn=vqa_fn,
inputs=[vqa_image_input, vqa_question_input],
outputs=[vqa_output],
)
with gr.Tab("Sentiment Analysis"):
gr.Markdown("## Sentiment Analysis")
gr.Markdown("Enter some text to analyze its sentiment.")
with gr.Row():
with gr.Column():
sentiment_input = gr.Textbox(label="Input Text")
sentiment_button = gr.Button("Analyze Sentiment")
with gr.Column():
sentiment_output = gr.Textbox(label="Result", interactive=False)
sentiment_plot = gr.Plot(label="Sentiment Distribution")
sentiment_button.click(
fn=sentiment_fn,
inputs=[sentiment_input],
outputs=[sentiment_output, sentiment_plot],
)
gr.Markdown("### About")
gr.Markdown("""
This demo uses the following pretrained models from Hugging Face:
- Image Captioning: `nlpconnect/vit-gpt2-image-captioning`
- Visual Question Answering: `nlpconnect/vit-gpt2-image-captioning` (simplified)
- Sentiment Analysis: `distilbert-base-uncased-finetuned-sst-2-english`
""")
# Launch the demo
if __name__ == "__main__":
demo.launch(share=True)