Spaces:

joey1101
/

Assignment

Running

App Files Files Community

Assignment / app.py

joey1101

Update app.py

2da2d23 verified about 2 months ago

raw

history blame

3.12 kB

	# Import necessary libraries
	import streamlit as st # Streamlit for creating the web application
	from transformers import pipeline # Pipeline for using Hugging Face models
	from PIL import Image # PIL for image processing

	# Function to load models
	def load_models():
	# Load the image to text model
	caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") # Load pre-trained image to text model
	# Load the text generation model
	story_model = pipeline("text-generation", model="gpt2") # Load pre-trained text generation model
	# Load the text-to-speech model
	tts_model = pipeline("text-to-speech", model="suno/bark") # Load a TTS model
	return caption_model, story_model, tts_model # Return all three models

	# Function to generate story from caption
	def generate_story(caption, story_model):
	# Generate a story based on the caption
	story = story_model(caption, max_length=100, num_return_sequences=1)[0]['generated_text'] # Generate the story
	return story # Return the generated story

	# Function to convert text to audio
	def text_to_audio(text, tts_model):
	audio = tts_model(text) # Generate audio from text using the TTS model
	return audio # Return the audio object

	# Function to process the uploaded image and generate a story
	def process_image(image, caption_model, story_model):
	# Generate a caption from the uploaded image
	result = caption_model(image) # Get the result from the model
	caption = result[0]['generated_text'] # Access the generated caption
	# Generate a story from the caption
	story = generate_story(caption, story_model) # Call the story generation function
	return caption, story # Return both caption and story

	# Main part
	def main():
	st.set_page_config(page_title="Storytelling Friend", page_icon="🦦") # Title of the application
	st.write("Upload an image to generate a story!") # Instructions for the user

	# Upload image section
	uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) # File uploader for images

	# Load models once
	caption_model, story_model, tts_model = load_models() # Load models

	if uploaded_file is not None:
	# Open and read the uploaded image
	image = Image.open(uploaded_file) # Open the uploaded image file
	st.image(image, caption="Uploaded Image", use_container_width=True) # Display the uploaded image

	# Process the image and generate story
	caption, story = process_image(image, caption_model, story_model) # Get caption and story
	st.subheader("Generated Caption:") # Subheader for caption
	st.write(caption) # Display the caption
	st.subheader("Generated Story:") # Subheader for story
	st.write(story) # Display the generated story

	# Convert story to audio and play it
	audio = text_to_audio(story, tts_model) # Convert story to audio
	st.audio(audio, format='audio/wav') # Play the audio

	# Run the app
	if __name__ == "__main__":
	main() # Call the main function to run the app