joey1101 commited on
Commit
d637bdb
·
verified ·
1 Parent(s): 723f5b2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -0
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ from PIL import Image
4
+
5
+ # Load pipelines
6
+ image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
7
+ text_to_speech = pipeline("text-to-speech", model="facebook/mms-tts-eng")
8
+
9
+ st.title("Image-to-Text and Text-to-Speech App")
10
+
11
+ # Image uploader
12
+ uploaded_image = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
13
+
14
+ if uploaded_image:
15
+ image = Image.open(uploaded_image)
16
+ st.image(image, caption="Uploaded Image", use_container_width=True)
17
+
18
+ # Convert image to text
19
+ text_output = image_to_text(image)[0]['generated_text']
20
+ st.write("### Extracted Text:")
21
+ st.write(text_output)
22
+
23
+ # Convert text to speech
24
+ speech_output = text_to_speech(text_output)
25
+ st.write("### Listen to Speech Output:")
26
+ st.audio(speech_output['audio'],
27
+ format="audio/wav",
28
+ start_time=0,
29
+ sample_rate = speech_output['sampling_rate'])