Spaces:

Amarthya7
/

Multi-model-ai-demo

Sleeping

File size: 4,990 Bytes

ae1d6c7

import os
import gradio as gr
import matplotlib.pyplot as plt
from model_utils import (
    analyze_sentiment,
    answer_question,
    generate_caption,
    load_image_captioning_model,
    load_sentiment_model,
    load_vqa_model,
)

# Load models at startup
print("Loading models...")
image_caption_model, image_caption_processor, image_caption_tokenizer = (
    load_image_captioning_model()
)
vqa_model, vqa_processor, vqa_tokenizer = load_vqa_model()
sentiment_model, sentiment_tokenizer = load_sentiment_model()
print("Models loaded successfully!")


def image_caption_fn(image):
    # Save image temporarily
    temp_path = "temp_image.jpg"
    image.save(temp_path)

    # Generate caption
    caption = generate_caption(
        temp_path, image_caption_model, image_caption_processor, image_caption_tokenizer
    )

    # Clean up
    if os.path.exists(temp_path):
        os.remove(temp_path)

    return caption


def vqa_fn(image, question):
    # Save image temporarily
    temp_path = "temp_image.jpg"
    image.save(temp_path)

    # Answer question
    answer = answer_question(
        temp_path, question, vqa_model, vqa_processor, vqa_tokenizer
    )

    # Clean up
    if os.path.exists(temp_path):
        os.remove(temp_path)

    return answer


def sentiment_fn(text):
    sentiment, confidence = analyze_sentiment(
        text, sentiment_model, sentiment_tokenizer
    )
    confidence_percentage = f"{confidence:.2%}"

    # Create a simple bar chart for visualization
    labels = ["Negative", "Positive"]
    values = (
        [1 - confidence, confidence]
        if sentiment == "positive"
        else [confidence, 1 - confidence]
    )

    fig, ax = plt.subplots(figsize=(6, 3))
    bars = ax.bar(labels, values, color=["#FF6B6B", "#4ECDC4"])
    ax.set_ylim(0, 1)
    ax.set_title("Sentiment Analysis")

    for bar in bars:
        height = bar.get_height()
        ax.text(
            bar.get_x() + bar.get_width() / 2.0,
            height + 0.02,
            f"{height:.2f}",
            ha="center",
            va="bottom",
        )

    return f"Sentiment: {sentiment.upper()} (Confidence: {confidence_percentage})", fig


# Create the Gradio interface
with gr.Blocks(title="Multi-Modal AI Demo") as demo:
    gr.Markdown("# Multi-Modal AI Demo")
    gr.Markdown(
        "This application demonstrates multi-modal AI capabilities using Hugging Face models."
    )

    with gr.Tab("Image Captioning"):
        gr.Markdown("## Image Captioning")
        gr.Markdown("Upload an image to generate a descriptive caption.")

        with gr.Row():
            with gr.Column():
                image_input = gr.Image(type="pil", label="Input Image")
                caption_button = gr.Button("Generate Caption")

            with gr.Column():
                caption_output = gr.Textbox(
                    label="Generated Caption", interactive=False
                )

        caption_button.click(
            fn=image_caption_fn, inputs=[image_input], outputs=[caption_output]
        )

    with gr.Tab("Visual Question Answering"):
        gr.Markdown("## Visual Question Answering")
        gr.Markdown("Upload an image and ask a question about it.")

        with gr.Row():
            with gr.Column():
                vqa_image_input = gr.Image(type="pil", label="Input Image")
                vqa_question_input = gr.Textbox(label="Your Question")
                vqa_button = gr.Button("Get Answer")

            with gr.Column():
                vqa_output = gr.Textbox(label="Answer", interactive=False)

        vqa_button.click(
            fn=vqa_fn,
            inputs=[vqa_image_input, vqa_question_input],
            outputs=[vqa_output],
        )

    with gr.Tab("Sentiment Analysis"):
        gr.Markdown("## Sentiment Analysis")
        gr.Markdown("Enter some text to analyze its sentiment.")

        with gr.Row():
            with gr.Column():
                sentiment_input = gr.Textbox(label="Input Text")
                sentiment_button = gr.Button("Analyze Sentiment")

            with gr.Column():
                sentiment_output = gr.Textbox(label="Result", interactive=False)
                sentiment_plot = gr.Plot(label="Sentiment Distribution")

        sentiment_button.click(
            fn=sentiment_fn,
            inputs=[sentiment_input],
            outputs=[sentiment_output, sentiment_plot],
        )

    gr.Markdown("### About")
    gr.Markdown("""

    This demo uses the following pretrained models from Hugging Face:

    - Image Captioning: `nlpconnect/vit-gpt2-image-captioning`

    - Visual Question Answering: `nlpconnect/vit-gpt2-image-captioning` (simplified)

    - Sentiment Analysis: `distilbert-base-uncased-finetuned-sst-2-english`

    """)

# Launch the demo
if __name__ == "__main__":
    demo.launch(share=True)