Spaces:

eskayML
/

Research-Paper-to-Audio

Runtime error

App Files Files Community

Research-Paper-to-Audio / app.py

eskayML

Upload 3 files

a2ec50f verified over 1 year ago

raw

history blame contribute delete

3.57 kB

	import streamlit as st
	import pyttsx3
	import tempfile
	import PyPDF2
	from huggingface_hub import InferenceClient

	page_bg_img = """
	<style>
	.stApp {
	background: linear-gradient( #eee 38%, #ccc 68%);
	}
	</style>
	"""

	st.markdown(page_bg_img, unsafe_allow_html=True)

	st.title("Summarize & Listen to your Academic Materials on the Fly.")

	uploaded_pdf = st.file_uploader("Upload a research Paper", type="pdf")
	full_text = None
	MODEL_NAME = "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO"
	client = InferenceClient(MODEL_NAME)


	DETAILED_SUMMARIZATION_PROMPT = """
	<INST>You are a very powerful summarization engine for summarizing academic contents,
	now you are to summarize the following text you are going to be provided which is from a document, make sure to understand
	all improperly parsed text and actually parse them properly , also make sure that your final summarization is very coherent and understandable by a student and is under 4000 words ,
	also the length of the summarized text should be less than the original provided text,
	if you are provided with a text that includes unnecessary items that do not contribute value to the book like preface about the author, do not include them in the summarization

	Your summary should be concise and should accurately and objectively communicate the key points of the paper.
	You should not include any personal opinions or interpretations in your summary but rather focus on
	objectively presenting the information from the paper. Your summary should be written in your own words
	and should not include any direct quotes from the paper. Please ensure that your summary is clear,
	concise, and accurately reflects the content of the original paper.
	do not go out of context of the words provided.
	Now here is your provided text :
	</INST>
	"""


	with st.spinner("Extracting Text..."):
	if uploaded_pdf is not None:
	tfile = tempfile.NamedTemporaryFile(delete=False)
	tfile.write(uploaded_pdf.read())
	with open(tfile.name, "rb") as pdf_file:
	pdf_reader = PyPDF2.PdfReader(pdf_file)
	num_pages = len(pdf_reader.pages)

	# Get text from all pages
	full_text = ""
	for page_num in range(num_pages):
	page = pdf_reader.pages[page_num]
	page_text = page.extract_text()
	full_text += page_text

	# truncating the full text at 25k characters
	full_text = full_text if len(full_text) < 100000 else full_text[:100000]
	# print(full_text)
	st.success("Text Extracted Successfully!!!")


	###################################################################################


	def synthesize_text_to_audio(text):
	engine = pyttsx3.init()
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
	temp_file_path = temp_file.name
	engine.save_to_file(text, temp_file_path) # Save the audio to a temporary file

	engine.runAndWait()
	sound_file = open(temp_file_path, "rb") # Open the saved audio file for reading
	return sound_file



	summarized_text = None
	if full_text:
	with st.spinner("Summarizing Text Content..."):
	summarized_text = client.text_generation(
	DETAILED_SUMMARIZATION_PROMPT + full_text,
	max_new_tokens=4096,
	temperature=0.2,
	top_p=0.8,
	)
	print(summarized_text)

	if summarized_text:
	with st.spinner('Synthesizing to Audio...'):
	st.audio(synthesize_text_to_audio(summarized_text))