joey1101 commited on
Commit
b1da77e
·
verified ·
1 Parent(s): 2da2d23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -62
app.py CHANGED
@@ -1,65 +1,55 @@
1
- # Import necessary libraries
2
- import streamlit as st # Streamlit for creating the web application
3
- from transformers import pipeline # Pipeline for using Hugging Face models
4
- from PIL import Image # PIL for image processing
5
-
6
- # Function to load models
7
- def load_models():
8
- # Load the image to text model
9
- caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") # Load pre-trained image to text model
 
 
 
 
 
10
  # Load the text generation model
11
- story_model = pipeline("text-generation", model="gpt2") # Load pre-trained text generation model
12
- # Load the text-to-speech model
13
- tts_model = pipeline("text-to-speech", model="suno/bark") # Load a TTS model
14
- return caption_model, story_model, tts_model # Return all three models
15
-
16
- # Function to generate story from caption
17
- def generate_story(caption, story_model):
18
- # Generate a story based on the caption
19
- story = story_model(caption, max_length=100, num_return_sequences=1)[0]['generated_text'] # Generate the story
20
- return story # Return the generated story
21
 
22
  # Function to convert text to audio
23
- def text_to_audio(text, tts_model):
24
- audio = tts_model(text) # Generate audio from text using the TTS model
25
- return audio # Return the audio object
26
-
27
- # Function to process the uploaded image and generate a story
28
- def process_image(image, caption_model, story_model):
29
- # Generate a caption from the uploaded image
30
- result = caption_model(image) # Get the result from the model
31
- caption = result[0]['generated_text'] # Access the generated caption
32
- # Generate a story from the caption
33
- story = generate_story(caption, story_model) # Call the story generation function
34
- return caption, story # Return both caption and story
35
-
36
- # Main part
37
- def main():
38
- st.set_page_config(page_title="Storytelling Friend", page_icon="🦦") # Title of the application
39
- st.write("Upload an image to generate a story!") # Instructions for the user
40
-
41
- # Upload image section
42
- uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) # File uploader for images
43
-
44
- # Load models once
45
- caption_model, story_model, tts_model = load_models() # Load models
46
-
47
- if uploaded_file is not None:
48
- # Open and read the uploaded image
49
- image = Image.open(uploaded_file) # Open the uploaded image file
50
- st.image(image, caption="Uploaded Image", use_container_width=True) # Display the uploaded image
51
-
52
- # Process the image and generate story
53
- caption, story = process_image(image, caption_model, story_model) # Get caption and story
54
- st.subheader("Generated Caption:") # Subheader for caption
55
- st.write(caption) # Display the caption
56
- st.subheader("Generated Story:") # Subheader for story
57
- st.write(story) # Display the generated story
58
-
59
- # Convert story to audio and play it
60
- audio = text_to_audio(story, tts_model) # Convert story to audio
61
- st.audio(audio, format='audio/wav') # Play the audio
62
-
63
- # Run the app
64
- if __name__ == "__main__":
65
- main() # Call the main function to run the app
 
1
+ import streamlit as st # Streamlit for building the web application
2
+ from transformers import pipeline # Hugging Face Transformers pipeline for models
3
+ from PIL import Image # PIL for handling image files
4
+
5
+ # Function to convert image to text
6
+ def img2text(image):
7
+ # Load the image-to-text model
8
+ image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
9
+ # Generate a caption for the image
10
+ text = image_to_text_model(image)[0]["generated_text"]
11
+ return text # Return the generated caption
12
+
13
+ # Function to generate a story based on the caption
14
+ def text2story(text):
15
  # Load the text generation model
16
+ story_model = pipeline("text-generation", model="gpt2")
17
+ # Generate a story based on the input text
18
+ story_text = story_model(f"Once upon a time, {text}.", max_length=100, num_return_sequences=1)
19
+ return story_text[0]["generated_text"] # Return the generated story
 
 
 
 
 
 
20
 
21
  # Function to convert text to audio
22
+ def text2audio(story_text):
23
+ # Load the text-to-speech model
24
+ text_to_audio_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
25
+ # Generate audio data from the story text
26
+ audio_data = text_to_audio_model(story_text)
27
+ return audio_data # Return the audio data
28
+
29
+ # Main part of the application
30
+ st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜") # Set the title and icon of the app
31
+ st.header("Storytelling From Your Image") # Header for the application
32
+ uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"]) # File uploader for images
33
+
34
+ if uploaded_file is not None:
35
+ # Open and read the uploaded image
36
+ image = Image.open(uploaded_file) # Use PIL to open the uploaded image
37
+ st.image(image, caption="Uploaded Image", use_container_width=True) # Display the uploaded image
38
+
39
+ # Stage 1: Image to Text
40
+ st.text('Processing image to text...') # Inform the user about the processing stage
41
+ scenario = img2text(image) # Get the caption for the uploaded image
42
+ st.write("Caption:", scenario) # Display the generated caption
43
+
44
+ # Stage 2: Text to Story
45
+ st.text('Generating a story...') # Inform the user about the story generation stage
46
+ story = text2story(scenario) # Generate a story based on the caption
47
+ st.write("Story:", story) # Display the generated story
48
+
49
+ # Stage 3: Story to Audio data
50
+ st.text('Generating audio data...') # Inform the user about the audio generation stage
51
+ audio_data = text2audio(story) # Convert the generated story into audio
52
+
53
+ # Play button for the audio
54
+ if st.button("Play Audio"): # Create a button to play the audio
55
+ st.audio(audio_data['audio'], format="audio/wav", start_time=0, sample_rate=audio_data['sampling_rate']) # Play the audio