Spaces:

Amarthya7
/

Multi-model-ai-demo

Sleeping

App Files Files Community

Multi-model-ai-demo / app.py

Amarthya7

Upload 4 files

ae1d6c7 verified about 2 months ago

raw

history blame contribute delete

4.99 kB

	import os
	import gradio as gr
	import matplotlib.pyplot as plt
	from model_utils import (
	analyze_sentiment,
	answer_question,
	generate_caption,
	load_image_captioning_model,
	load_sentiment_model,
	load_vqa_model,
	)

	# Load models at startup
	print("Loading models...")
	image_caption_model, image_caption_processor, image_caption_tokenizer = (
	load_image_captioning_model()
	)
	vqa_model, vqa_processor, vqa_tokenizer = load_vqa_model()
	sentiment_model, sentiment_tokenizer = load_sentiment_model()
	print("Models loaded successfully!")


	def image_caption_fn(image):
	# Save image temporarily
	temp_path = "temp_image.jpg"
	image.save(temp_path)

	# Generate caption
	caption = generate_caption(
	temp_path, image_caption_model, image_caption_processor, image_caption_tokenizer
	)

	# Clean up
	if os.path.exists(temp_path):
	os.remove(temp_path)

	return caption


	def vqa_fn(image, question):
	# Save image temporarily
	temp_path = "temp_image.jpg"
	image.save(temp_path)

	# Answer question
	answer = answer_question(
	temp_path, question, vqa_model, vqa_processor, vqa_tokenizer
	)

	# Clean up
	if os.path.exists(temp_path):
	os.remove(temp_path)

	return answer


	def sentiment_fn(text):
	sentiment, confidence = analyze_sentiment(
	text, sentiment_model, sentiment_tokenizer
	)
	confidence_percentage = f"{confidence:.2%}"

	# Create a simple bar chart for visualization
	labels = ["Negative", "Positive"]
	values = (
	[1 - confidence, confidence]
	if sentiment == "positive"
	else [confidence, 1 - confidence]
	)

	fig, ax = plt.subplots(figsize=(6, 3))
	bars = ax.bar(labels, values, color=["#FF6B6B", "#4ECDC4"])
	ax.set_ylim(0, 1)
	ax.set_title("Sentiment Analysis")

	for bar in bars:
	height = bar.get_height()
	ax.text(
	bar.get_x() + bar.get_width() / 2.0,
	height + 0.02,
	f"{height:.2f}",
	ha="center",
	va="bottom",
	)

	return f"Sentiment: {sentiment.upper()} (Confidence: {confidence_percentage})", fig


	# Create the Gradio interface
	with gr.Blocks(title="Multi-Modal AI Demo") as demo:
	gr.Markdown("# Multi-Modal AI Demo")
	gr.Markdown(
	"This application demonstrates multi-modal AI capabilities using Hugging Face models."
	)

	with gr.Tab("Image Captioning"):
	gr.Markdown("## Image Captioning")
	gr.Markdown("Upload an image to generate a descriptive caption.")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type="pil", label="Input Image")
	caption_button = gr.Button("Generate Caption")

	with gr.Column():
	caption_output = gr.Textbox(
	label="Generated Caption", interactive=False
	)

	caption_button.click(
	fn=image_caption_fn, inputs=[image_input], outputs=[caption_output]
	)

	with gr.Tab("Visual Question Answering"):
	gr.Markdown("## Visual Question Answering")
	gr.Markdown("Upload an image and ask a question about it.")

	with gr.Row():
	with gr.Column():
	vqa_image_input = gr.Image(type="pil", label="Input Image")
	vqa_question_input = gr.Textbox(label="Your Question")
	vqa_button = gr.Button("Get Answer")

	with gr.Column():
	vqa_output = gr.Textbox(label="Answer", interactive=False)

	vqa_button.click(
	fn=vqa_fn,
	inputs=[vqa_image_input, vqa_question_input],
	outputs=[vqa_output],
	)

	with gr.Tab("Sentiment Analysis"):
	gr.Markdown("## Sentiment Analysis")
	gr.Markdown("Enter some text to analyze its sentiment.")

	with gr.Row():
	with gr.Column():
	sentiment_input = gr.Textbox(label="Input Text")
	sentiment_button = gr.Button("Analyze Sentiment")

	with gr.Column():
	sentiment_output = gr.Textbox(label="Result", interactive=False)
	sentiment_plot = gr.Plot(label="Sentiment Distribution")

	sentiment_button.click(
	fn=sentiment_fn,
	inputs=[sentiment_input],
	outputs=[sentiment_output, sentiment_plot],
	)

	gr.Markdown("### About")
	gr.Markdown("""
	This demo uses the following pretrained models from Hugging Face:
	- Image Captioning: `nlpconnect/vit-gpt2-image-captioning`
	- Visual Question Answering: `nlpconnect/vit-gpt2-image-captioning` (simplified)
	- Sentiment Analysis: `distilbert-base-uncased-finetuned-sst-2-english`
	""")

	# Launch the demo
	if __name__ == "__main__":
	demo.launch(share=True)