Assignment / app.py
joey1101's picture
Update app.py
2da2d23 verified
raw
history blame
3.12 kB
# Import necessary libraries
import streamlit as st # Streamlit for creating the web application
from transformers import pipeline # Pipeline for using Hugging Face models
from PIL import Image # PIL for image processing
# Function to load models
def load_models():
# Load the image to text model
caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") # Load pre-trained image to text model
# Load the text generation model
story_model = pipeline("text-generation", model="gpt2") # Load pre-trained text generation model
# Load the text-to-speech model
tts_model = pipeline("text-to-speech", model="suno/bark") # Load a TTS model
return caption_model, story_model, tts_model # Return all three models
# Function to generate story from caption
def generate_story(caption, story_model):
# Generate a story based on the caption
story = story_model(caption, max_length=100, num_return_sequences=1)[0]['generated_text'] # Generate the story
return story # Return the generated story
# Function to convert text to audio
def text_to_audio(text, tts_model):
audio = tts_model(text) # Generate audio from text using the TTS model
return audio # Return the audio object
# Function to process the uploaded image and generate a story
def process_image(image, caption_model, story_model):
# Generate a caption from the uploaded image
result = caption_model(image) # Get the result from the model
caption = result[0]['generated_text'] # Access the generated caption
# Generate a story from the caption
story = generate_story(caption, story_model) # Call the story generation function
return caption, story # Return both caption and story
# Main part
def main():
st.set_page_config(page_title="Storytelling Friend", page_icon="🦦") # Title of the application
st.write("Upload an image to generate a story!") # Instructions for the user
# Upload image section
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) # File uploader for images
# Load models once
caption_model, story_model, tts_model = load_models() # Load models
if uploaded_file is not None:
# Open and read the uploaded image
image = Image.open(uploaded_file) # Open the uploaded image file
st.image(image, caption="Uploaded Image", use_container_width=True) # Display the uploaded image
# Process the image and generate story
caption, story = process_image(image, caption_model, story_model) # Get caption and story
st.subheader("Generated Caption:") # Subheader for caption
st.write(caption) # Display the caption
st.subheader("Generated Story:") # Subheader for story
st.write(story) # Display the generated story
# Convert story to audio and play it
audio = text_to_audio(story, tts_model) # Convert story to audio
st.audio(audio, format='audio/wav') # Play the audio
# Run the app
if __name__ == "__main__":
main() # Call the main function to run the app