# app.py import streamlit as st import time import re import os import tempfile import pypdf from pydub import AudioSegment, effects import difflib from utils import ( generate_script, generate_audio_mp3, truncate_text, extract_text_from_url, transcribe_youtube_video, research_topic, mix_with_bg_music, DialogueItem ) from prompts import SYSTEM_PROMPT # NEW: For Q&A from qa import transcribe_audio_deepgram, handle_qa_exchange MAX_QA_QUESTIONS = 5 # up to 5 voice/text questions def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str): pattern = r"\*\*(.+?)\*\*:\s*(.+)" matches = re.findall(pattern, edited_text) items = [] if not matches: raw_name = host_name or "Jane" text_line = edited_text.strip() speaker = "Jane" if raw_name.lower() == guest_name.lower(): speaker = "John" item = DialogueItem( speaker=speaker, display_speaker=raw_name, text=text_line ) items.append(item) return items for (raw_name, text_line) in matches: if raw_name.lower() == host_name.lower(): speaker = "Jane" elif raw_name.lower() == guest_name.lower(): speaker = "John" else: speaker = "Jane" item = DialogueItem( speaker=speaker, display_speaker=raw_name, text=text_line ) items.append(item) return items def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None): audio_segments = [] transcript = "" crossfade_duration = 50 # ms for item in dialogue_items: audio_file = generate_audio_mp3(item.text, item.speaker) seg = AudioSegment.from_file(audio_file, format="mp3") audio_segments.append(seg) transcript += f"**{item.display_speaker}**: {item.text}\n\n" os.remove(audio_file) if not audio_segments: return None, "No audio segments were generated." combined_spoken = audio_segments[0] for seg in audio_segments[1:]: combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration) final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio: final_mix.export(temp_audio.name, format="mp3") final_mp3_path = temp_audio.name with open(final_mp3_path, "rb") as f: audio_bytes = f.read() os.remove(final_mp3_path) return audio_bytes, transcript def generate_podcast( file, url, video_url, research_topic_input, tone, length_minutes, host_name, host_desc, guest_name, guest_desc, user_specs, sponsor_content, sponsor_style, custom_bg_music_path ): sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)] if sum(sources) > 1: return None, "Provide only one input (PDF, URL, YouTube, or Topic)." if not any(sources): return None, "Please provide at least one source." text = "" if file: try: if not file.name.lower().endswith('.pdf'): return None, "Please upload a PDF file." reader = pypdf.PdfReader(file) text = " ".join(page.extract_text() for page in reader.pages if page.extract_text()) except Exception as e: return None, f"Error reading PDF: {str(e)}" elif url: try: text = extract_text_from_url(url) if not text: return None, "Failed to extract text from URL." except Exception as e: return None, f"Error extracting text from URL: {str(e)}" elif video_url: try: text = transcribe_youtube_video(video_url) if not text: return None, "Failed to transcribe YouTube video." except Exception as e: return None, f"Error transcribing YouTube video: {str(e)}" elif research_topic_input: try: text = research_topic(research_topic_input) if not text: return None, f"Sorry, no information found on '{research_topic_input}'." except Exception as e: return None, f"Error researching topic: {str(e)}" from utils import truncate_text text = truncate_text(text) extra_instructions = [] if host_name or guest_name: host_line = f"Host: {host_name or 'Jane'} - {host_desc or 'a curious host'}." guest_line = f"Guest: {guest_name or 'John'} - {guest_desc or 'an expert'}." extra_instructions.append(f"{host_line}\n{guest_line}") if user_specs.strip(): extra_instructions.append(f"Additional User Instructions: {user_specs}") if sponsor_content.strip(): extra_instructions.append( f"Sponsor Content Provided (should be under ~30 seconds):\n{sponsor_content}" ) from prompts import SYSTEM_PROMPT combined_instructions = "\n\n".join(extra_instructions).strip() full_prompt = SYSTEM_PROMPT if combined_instructions: full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n" from utils import generate_script, generate_audio_mp3, mix_with_bg_music try: script = generate_script( full_prompt, text, tone, f"{length_minutes} Mins", host_name=host_name or "Jane", guest_name=guest_name or "John", sponsor_style=sponsor_style, sponsor_provided=bool(sponsor_content.strip()) ) except Exception as e: return None, f"Error generating script: {str(e)}" audio_segments = [] transcript = "" crossfade_duration = 50 try: for item in script.dialogue: audio_file = generate_audio_mp3(item.text, item.speaker) seg = AudioSegment.from_file(audio_file, format="mp3") audio_segments.append(seg) transcript += f"**{item.display_speaker}**: {item.text}\n\n" os.remove(audio_file) if not audio_segments: return None, "No audio segments generated." combined_spoken = audio_segments[0] for seg in audio_segments[1:]: combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration) final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio: final_mix.export(temp_audio.name, format="mp3") final_mp3_path = temp_audio.name with open(final_mp3_path, "rb") as f: audio_bytes = f.read() os.remove(final_mp3_path) return audio_bytes, transcript except Exception as e: return None, f"Error generating audio: {str(e)}" def highlight_differences(original: str, edited: str) -> str: matcher = difflib.SequenceMatcher(None, original.split(), edited.split()) highlighted = [] for opcode, i1, i2, j1, j2 in matcher.get_opcodes(): if opcode == 'equal': highlighted.extend(original.split()[i1:i2]) elif opcode in ('replace', 'insert'): added_words = edited.split()[j1:j2] highlighted.extend([f'{word}' for word in added_words]) elif opcode == 'delete': pass return ' '.join(highlighted) def main(): st.set_page_config( page_title="MyPod v2: AI-Powered Podcast Magic", layout="centered" ) # Inject custom CSS for styling adjustments st.markdown(""" """, unsafe_allow_html=True) logo_col, title_col = st.columns([1, 10]) with logo_col: st.image("logomypod.jpg", width=70) # Increased size by ~15% with title_col: st.markdown("## MyPod v2: AI-Powered Podcast Magic") # Reinstated welcome section st.markdown(""" Welcome to **MyPod**, your go-to AI-powered podcast generator! 🎉 MyPod transforms your documents, webpages, YouTube videos, or research topics into a more human-sounding, conversational podcast. Select a tone and a duration range. The output script will be on-topic, concise, and respect your chosen length. """) # "How to Use" as an expander with enumerated list and larger text with st.expander("How to Use"): st.markdown("""
  1. Provide one source: PDF Files, Website URL, YouTube videos, or a Topic to Research.
  2. Choose the tone and the target duration.
  3. Add custom names and descriptions for the speakers if you wish.
  4. Add sponsored content as a separate break or blended into the script.
  5. Click 'Generate Podcast' to produce your podcast.
  6. Post generation you can edit the transcript and re-generate the audio with your edits if needed.
  7. Ask Follow-up Questions via text or voice and get immediate answers.
""", unsafe_allow_html=True) # Retained text below "How to Use" st.markdown(""" **Research a Topic:** If it's too niche or specific, you might not get the desired outcome. **Token Limit:** Up to ~2,048 tokens are supported. Long inputs may be truncated. **Note:** YouTube videos will only work if they have captions built in. ⏳**Please be patient while your podcast is being generated.** This process involves content analysis, script creation, and high-quality audio synthesis, which may take a few minutes. 🔥 **Ready to create your personalized podcast?** Give it a try now and let the magic happen! 🔥 """) # Original placement of input options col1, col2 = st.columns(2) with col1: file = st.file_uploader("Upload File (.pdf only)", type=["pdf"]) url = st.text_input("Or Enter Website URL") video_url = st.text_input("Or Enter YouTube Link (Captioned videos)") with col2: research_topic_input = st.text_input("Or Research a Topic") tone = st.radio("Tone", ["Casual", "Formal", "Humorous", "Youthful"], index=0) length_minutes = st.slider("Podcast Length (in minutes)", 1, 60, 3) st.markdown("### Customize Your Podcast (New Features)") with st.expander("Set Host & Guest Names/Descriptions (Optional)"): host_name = st.text_input("Female Host Name (leave blank for 'Jane')") host_desc = st.text_input("Female Host Description (Optional)") guest_name = st.text_input("Male Guest Name (leave blank for 'John')") guest_desc = st.text_input("Male Guest Description (Optional)") user_specs = st.text_area("Any special instructions or prompts for the script? (Optional)", "") sponsor_content = st.text_area("Sponsored Content / Ad (Optional)", "") # Removed bold heading for Sponsor Integration Style sponsor_style = st.selectbox( "Sponsor Integration Style", ["Separate Break", "Blended"] ) custom_bg_music_file = st.file_uploader("Upload Custom Background Music (Optional)", type=["mp3", "wav"]) custom_bg_music_path = None if custom_bg_music_file: with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(custom_bg_music_file.name)[1]) as tmp: tmp.write(custom_bg_music_file.read()) custom_bg_music_path = tmp.name if "audio_bytes" not in st.session_state: st.session_state["audio_bytes"] = None if "transcript" not in st.session_state: st.session_state["transcript"] = None if "transcript_original" not in st.session_state: st.session_state["transcript_original"] = None # For Q&A if "qa_count" not in st.session_state: st.session_state["qa_count"] = 0 if "conversation_history" not in st.session_state: st.session_state["conversation_history"] = "" generate_button = st.button("Generate Podcast") if generate_button: progress_bar = st.progress(0) progress_text = st.empty() progress_messages = [ "🔍 Analyzing your input...", "📝 Crafting the perfect script...", "🎙️ Generating high-quality audio...", "🎶 Adding the finishing touches..." ] progress_text.write(progress_messages[0]) progress_bar.progress(0) time.sleep(1.0) progress_text.write(progress_messages[1]) progress_bar.progress(25) time.sleep(1.0) progress_text.write(progress_messages[2]) progress_bar.progress(50) time.sleep(1.0) progress_text.write(progress_messages[3]) progress_bar.progress(75) time.sleep(1.0) audio_bytes, transcript = generate_podcast( file, url, video_url, research_topic_input, tone, length_minutes, host_name, host_desc, guest_name, guest_desc, user_specs, sponsor_content, sponsor_style, custom_bg_music_path ) progress_bar.progress(100) progress_text.write("✅ Done!") if audio_bytes is None: st.error(transcript) st.session_state["audio_bytes"] = None st.session_state["transcript"] = None st.session_state["transcript_original"] = None else: st.success("Podcast generated successfully!") st.session_state["audio_bytes"] = audio_bytes st.session_state["transcript"] = transcript st.session_state["transcript_original"] = transcript # Reset Q&A st.session_state["qa_count"] = 0 st.session_state["conversation_history"] = "" if st.session_state["audio_bytes"]: st.audio(st.session_state["audio_bytes"], format='audio/mp3') st.download_button( label="Download Podcast (MP3)", data=st.session_state["audio_bytes"], file_name="my_podcast.mp3", mime="audio/mpeg" ) st.markdown("### Generated Transcript (Editable)") edited_text = st.text_area( "Feel free to tweak lines, fix errors, or reword anything.", value=st.session_state["transcript"], height=300 ) if st.session_state["transcript_original"]: highlighted_transcript = highlight_differences( st.session_state["transcript_original"], edited_text ) st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True) st.markdown(highlighted_transcript, unsafe_allow_html=True) if st.button("Regenerate Audio From Edited Text"): regen_bar = st.progress(0) regen_text = st.empty() regen_text.write("🔄 Regenerating your podcast with the edits...") regen_bar.progress(25) time.sleep(1.0) regen_text.write("🔧 Adjusting the script based on your changes...") regen_bar.progress(50) time.sleep(1.0) dialogue_items = parse_user_edited_transcript( edited_text, host_name or "Jane", guest_name or "John" ) new_audio_bytes, new_transcript = regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path) regen_bar.progress(75) time.sleep(1.0) if new_audio_bytes is None: regen_bar.progress(100) st.error(new_transcript) else: regen_bar.progress(100) regen_text.write("✅ Regeneration complete!") st.success("Regenerated audio below:") st.session_state["audio_bytes"] = new_audio_bytes st.session_state["transcript"] = new_transcript st.session_state["transcript_original"] = new_transcript st.audio(new_audio_bytes, format='audio/mp3') st.download_button( label="Download Edited Podcast (MP3)", data=new_audio_bytes, file_name="my_podcast_edited.mp3", mime="audio/mpeg" ) st.markdown("### Updated Transcript") st.markdown(new_transcript) # ----------------------- # POST-PODCAST Q&A Logic # ----------------------- st.markdown("## Post-Podcast Q&A") used_questions = st.session_state["qa_count"] remaining = MAX_QA_QUESTIONS - used_questions if remaining > 0: st.write(f"You can ask up to {remaining} more question(s).") typed_q = st.text_input("Type your follow-up question:") audio_q = st.audio_input("Or record an audio question (WAV)") if st.button("Submit Q&A"): if used_questions >= MAX_QA_QUESTIONS: st.warning("You have reached the Q&A limit.") else: question_text = typed_q.strip() if audio_q is not None: suffix = ".wav" with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: tmp.write(audio_q.read()) local_audio_path = tmp.name st.write("Transcribing your audio question...") audio_transcript = transcribe_audio_deepgram(local_audio_path) if audio_transcript: question_text = audio_transcript if not question_text: st.warning("No question found (text or audio).") else: st.write("Generating an answer...") ans_audio, ans_text = handle_qa_exchange(question_text) if ans_audio: st.audio(ans_audio, format="audio/mp3") st.markdown(f"**John**: {ans_text}") st.session_state["qa_count"] += 1 else: st.warning("No response could be generated.") else: st.write("You have used all 5 Q&A opportunities.") # Footer with updated text st.markdown("", unsafe_allow_html=True) if __name__ == "__main__": main()