MyPod_10

Running

App Files Files Community

siddhartharyaai commited on Jan 13

Commit

cf2b563

verified ·

1 Parent(s): 84a3c5a

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -126

app.py CHANGED Viewed

@@ -16,41 +16,77 @@ from utils import (
     extract_text_from_url,
     transcribe_youtube_video,
     research_topic,
-    mix_with_bg_music  # We also import the updated function
 )
 from prompts import SYSTEM_PROMPT
-def parse_user_edited_transcript(edited_text: str):
     """
     Looks for lines like:
-        **Jane**: Hello
-        **John**: Sure, I'd love to talk about that.
-    Returns a list of (speaker, text).
     """
-    pattern = r"\*\*(Jane|John)\*\*:\s*(.+)"
     matches = re.findall(pattern, edited_text)
-    if not matches:
-        return [("Jane", edited_text)]
-    return matches
 def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
     """
-    Re-generates multi-speaker audio from user-edited text,
-    then mixes with background music in the root folder (bg_music.mp3)
-    or a user-provided music file.
-    Returns final audio bytes and updated transcript.
     """
     audio_segments = []
     transcript = ""
     crossfade_duration = 50  # in ms
-    for speaker, line_text in dialogue_items:
-        audio_file = generate_audio_mp3(line_text, speaker)
         seg = AudioSegment.from_file(audio_file, format="mp3")
         audio_segments.append(seg)
-        transcript += f"**{speaker}**: {line_text}\n\n"
         os.remove(audio_file)
     if not audio_segments:
@@ -61,10 +97,8 @@ def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
     for seg in audio_segments[1:]:
         combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
-    # Mix with background music or custom user music
     final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
-    # Export to bytes
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
         final_mix.export(temp_audio.name, format="mp3")
         final_mp3_path = temp_audio.name
@@ -75,7 +109,6 @@ def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
     return audio_bytes, transcript
 def generate_podcast(
     file,
     url,
@@ -93,10 +126,11 @@ def generate_podcast(
 ):
     """
     Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
-    Returns (audio_bytes, transcript_str), mixing with background music or user-provided music.
-    """
-    # Ensure only one input source
     sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
     if sum(sources) > 1:
         return None, "Provide only one input (PDF, URL, YouTube, or Research topic)."
@@ -108,7 +142,6 @@ def generate_podcast(
         try:
             if not file.name.lower().endswith('.pdf'):
                 return None, "Please upload a PDF file."
-            # Use the file-like object directly to read the PDF
             reader = pypdf.PdfReader(file)
             text = " ".join(page.extract_text() for page in reader.pages if page.extract_text())
         except Exception as e:
@@ -138,41 +171,38 @@ def generate_podcast(
     # Truncate if needed
     text = truncate_text(text)
-    # Incorporate user-specified host/guest details, user specs, sponsor content into the system prompt
-    # We'll compile an "extra_instructions" string that we feed to generate_script
     extra_instructions = []
-    # (1) Host/Guest details
     if host_name or guest_name:
-        host_str = f"Host: {host_name or 'Jane'} - {host_desc or 'a curious podcast host'}."
-        guest_str = f"Guest: {guest_name or 'John'} - {guest_desc or 'an expert in the subject matter'}."
-        extra_instructions.append(f"{host_str}\n{guest_str}")
-    # (2) User custom specs
     if user_specs.strip():
         extra_instructions.append(f"Additional User Instructions: {user_specs}")
-    # (3) Sponsor content
     if sponsor_content.strip():
         extra_instructions.append(
             "Please include a short sponsored advertisement. The sponsor text is as follows:\n"
             + sponsor_content
         )
-    # Combine all extra instructions
     combined_instructions = "\n\n".join(extra_instructions).strip()
-    # Construct a new system prompt:
-    # We'll keep the existing SYSTEM_PROMPT, but add the combined_instructions at the end.
-    # The 'generate_script' function will handle injecting tone and length logic.
     full_prompt = SYSTEM_PROMPT
     if combined_instructions:
         full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
-    # For up to 1 hour generation, we map minutes to word ranges in generate_script.
-    # We'll pass length_minutes to the function directly now.
     try:
-        script = generate_script(full_prompt, text, tone, f"{length_minutes} Mins")
     except Exception as e:
         return None, f"Error generating script: {str(e)}"
@@ -182,10 +212,12 @@ def generate_podcast(
     try:
         for item in script.dialogue:
             audio_file = generate_audio_mp3(item.text, item.speaker)
             seg = AudioSegment.from_file(audio_file, format="mp3")
             audio_segments.append(seg)
-            transcript += f"**{item.speaker}**: {item.text}\n\n"
             os.remove(audio_file)
         if not audio_segments:
@@ -195,7 +227,6 @@ def generate_podcast(
         for seg in audio_segments[1:]:
             combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
-        # Mix with bg music or user-provided custom music
         final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
@@ -211,7 +242,6 @@ def generate_podcast(
     except Exception as e:
         return None, f"Error generating audio: {str(e)}"
 def highlight_differences(original: str, edited: str) -> str:
     """
     Highlights the differences between the original and edited transcripts.
@@ -221,22 +251,17 @@ def highlight_differences(original: str, edited: str) -> str:
     highlighted = []
     for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
         if opcode == 'equal':
-            # Unchanged words
             highlighted.extend(original.split()[i1:i2])
         elif opcode in ('replace', 'insert'):
-            # Added or replaced words - highlight in red
             added_words = edited.split()[j1:j2]
             highlighted.extend([f'<span style="color:red">{word}</span>' for word in added_words])
         elif opcode == 'delete':
-            # Deleted words - optionally, can be shown differently
             pass
     return ' '.join(highlighted)
 def main():
     st.set_page_config(page_title="MyPod - AI-based Podcast Generator", layout="centered")
-    # Use smaller font for the main header
     st.markdown("## MyPod - AI powered Podcast Generator")
     st.markdown(
@@ -247,7 +272,6 @@ def main():
         "1. **Provide one source:** PDF Files, Website URL, YouTube link or a Topic to Research.\n"
         "2. **Choose the tone and the target duration.**\n"
         "3. **Click 'Generate Podcast'** to produce your podcast. After the audio is generated, you can edit the transcript and re-generate the audio with your edits if needed.\n\n"
-        "**Research a Topic:** If it's too niche or specific, you might not get the desired outcome.\n\n"
         "**Token Limit:** Up to ~2,048 tokens are supported. Long inputs may be truncated.\n"
         "**Note:** YouTube videos will only work if they have captions built in.\n\n"
         "⏳**Please be patient while your podcast is being generated.** This process involves content analysis, script creation, "
@@ -263,37 +287,25 @@ def main():
     with col2:
         research_topic_input = st.text_input("Or Research a Topic")
         tone = st.radio("Tone", ["Humorous", "Formal", "Casual", "Youthful"], index=2)
-        # (3) A slider for length (1 to 60 mins)
         length_minutes = st.slider("Podcast Length (in minutes)", 1, 60, 3)
-    # Additional user options for the new features:
     st.markdown("### Customize Your Podcast (New Features)")
-    # (1) Host/Guest customization
     with st.expander("Set Host & Guest Names/Descriptions (Optional)"):
         host_name = st.text_input("Host Name (leave blank for 'Jane')")
         host_desc = st.text_input("Host Description (Optional)")
         guest_name = st.text_input("Guest Name (leave blank for 'John')")
         guest_desc = st.text_input("Guest Description (Optional)")
-    # (2) User custom specs/prompts
     user_specs = st.text_area("Any special instructions or prompts for the script? (Optional)", "")
-    # (4) Sponsored content
     sponsor_content = st.text_area("Sponsored Content / Ad (Optional)", "")
-    # (5) Custom music upload
     custom_bg_music_file = st.file_uploader("Upload Custom Background Music (Optional)", type=["mp3", "wav"])
-    # We'll store the path to a temp file if the user uploads custom music
     custom_bg_music_path = None
     if custom_bg_music_file:
-        # Save to a temp file
         with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(custom_bg_music_file.name)[1]) as tmp:
             tmp.write(custom_bg_music_file.read())
             custom_bg_music_path = tmp.name
-    # Store results in session_state
     if "audio_bytes" not in st.session_state:
         st.session_state["audio_bytes"] = None
     if "transcript" not in st.session_state:
@@ -307,27 +319,26 @@ def main():
         progress_bar = st.progress(0)
         progress_text = st.empty()
-        progress_messages = [
             "🔍 Analyzing your input...",
             "📝 Crafting the perfect script...",
             "🎙️ Generating high-quality audio...",
             "🎶 Adding the finishing touches..."
         ]
-        # Incremental updates
-        progress_text.write(progress_messages[0])
         progress_bar.progress(0)
         time.sleep(1.0)
-        progress_text.write(progress_messages[1])
         progress_bar.progress(25)
         time.sleep(1.0)
-        progress_text.write(progress_messages[2])
         progress_bar.progress(50)
         time.sleep(1.0)
-        progress_text.write(progress_messages[3])
         progress_bar.progress(75)
         time.sleep(1.0)
@@ -371,22 +382,19 @@ def main():
         )
         st.markdown("### Generated Transcript (Editable)")
         edited_text = st.text_area(
             "Feel free to tweak lines, fix errors, or reword anything.",
             value=st.session_state["transcript"],
             height=300
         )
-        # Compute differences and highlight
         if st.session_state["transcript_original"]:
-            highlighted_transcript = highlight_differences(
                 st.session_state["transcript_original"],
                 edited_text
             )
             st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
-            st.markdown(highlighted_transcript, unsafe_allow_html=True)
         if st.button("Regenerate Audio From Edited Text"):
             regen_bar = st.progress(0)
@@ -400,7 +408,9 @@ def main():
             regen_bar.progress(50)
             time.sleep(1.0)
-            dialogue_items = parse_user_edited_transcript(edited_text)
             new_audio_bytes, new_transcript = regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path)
             regen_bar.progress(75)
@@ -428,58 +438,5 @@ def main():
                 st.markdown("### Updated Transcript")
                 st.markdown(new_transcript)
-# ---------------------------------------------------------------------
-# Overriding the local function to keep consistent usage:
-# ---------------------------------------------------------------------
-def mix_with_bg_music(spoken: AudioSegment, custom_bg_music_path=None) -> AudioSegment:
-    """
-    Mixes 'spoken' with bg_music.mp3 or a custom music file:
-    1) Start with 2 seconds of music alone before speech begins.
-    2) Loop the music if it's shorter than the final audio length.
-    3) Lower the music volume so the speech is clear.
-    """
-    if custom_bg_music_path:
-        music_path = custom_bg_music_path
-    else:
-        music_path = "bg_music.mp3"  # default in root folder
-    try:
-        bg_music = AudioSegment.from_file(music_path)
-    except Exception as e:
-        print("[ERROR] Failed to load background music:", e)
-        return spoken
-    bg_music = bg_music - 14.0  # Lower volume (e.g. -14 dB)
-    total_length_ms = len(spoken) + 2000
-    looped_music = AudioSegment.empty()
-    while len(looped_music) < total_length_ms:
-        looped_music += bg_music
-    looped_music = looped_music[:total_length_ms]
-    final_mix = looped_music.overlay(spoken, position=2000)
-    return final_mix
-def highlight_differences(original: str, edited: str) -> str:
-    """
-    Highlights the differences between the original and edited transcripts.
-    Added or modified words are wrapped in <span> tags with red color.
-    """
-    matcher = difflib.SequenceMatcher(None, original.split(), edited.split())
-    highlighted = []
-    for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
-        if opcode == 'equal':
-            highlighted.extend(original.split()[i1:i2])
-        elif opcode in ('replace', 'insert'):
-            added_words = edited.split()[j1:j2]
-            highlighted.extend([f'<span style="color:red">{word}</span>' for word in added_words])
-        elif opcode == 'delete':
-            pass
-    return ' '.join(highlighted)
 if __name__ == "__main__":
     main()

     extract_text_from_url,
     transcribe_youtube_video,
     research_topic,
+    mix_with_bg_music,
+    DialogueItem  # so we can construct items
 )
 from prompts import SYSTEM_PROMPT
+def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
     """
     Looks for lines like:
+        **Angela**: Hello
+        **Dimitris**: Great topic...
+    We treat 'Angela' as the raw display_speaker, 'Hello' as text.
+    Then we map 'Angela' -> speaker='Jane' if it matches host_name (case-insensitive),
+    'Dimitris' -> speaker='John' if it matches guest_name, else default to 'Jane'.
+    Returns a list of (DialogueItem).
     """
+    pattern = r"\*\*(.+?)\*\*:\s*(.+)"
     matches = re.findall(pattern, edited_text)
+    items = []
+    if not matches:
+        # No lines found, treat entire text as if it's host
+        raw_name = host_name or "Jane"
+        text_line = edited_text.strip()
+        speaker = "Jane"
+        if raw_name.lower() == guest_name.lower():
+            speaker = "John"
+        # build a single item
+        item = DialogueItem(
+            speaker=speaker,
+            display_speaker=raw_name,
+            text=text_line
+        )
+        items.append(item)
+        return items
+    # If we have multiple lines
+    for (raw_name, text_line) in matches:
+        # Map to TTS speaker
+        if raw_name.lower() == host_name.lower():
+            # host -> female
+            speaker = "Jane"
+        elif raw_name.lower() == guest_name.lower():
+            # guest -> male
+            speaker = "John"
+        else:
+            # unknown -> default to female host
+            speaker = "Jane"
+        item = DialogueItem(
+            speaker=speaker,
+            display_speaker=raw_name,
+            text=text_line
+        )
+        items.append(item)
+    return items
 def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
     """
+    Re-generates multi-speaker audio from user-edited DialogueItems,
+    then mixes with background music (bg_music.mp3) or custom music.
+    Returns final audio bytes and updated transcript (using display_speaker).
     """
     audio_segments = []
     transcript = ""
     crossfade_duration = 50  # in ms
+    for item in dialogue_items:
+        audio_file = generate_audio_mp3(item.text, item.speaker)
         seg = AudioSegment.from_file(audio_file, format="mp3")
         audio_segments.append(seg)
+        # Use item.display_speaker for the text transcript
+        transcript += f"**{item.display_speaker}**: {item.text}\n\n"
         os.remove(audio_file)
     if not audio_segments:
     for seg in audio_segments[1:]:
         combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
     final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
         final_mix.export(temp_audio.name, format="mp3")
         final_mp3_path = temp_audio.name
     return audio_bytes, transcript
 def generate_podcast(
     file,
     url,
 ):
     """
     Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
+    Uses female voice (Jane) for host, male voice (John) for guest.
+    Display_speaker is user-chosen name, speaker is "Jane" or "John".
+    Returns (audio_bytes, transcript_str).
+    """
     sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
     if sum(sources) > 1:
         return None, "Provide only one input (PDF, URL, YouTube, or Research topic)."
         try:
             if not file.name.lower().endswith('.pdf'):
                 return None, "Please upload a PDF file."
             reader = pypdf.PdfReader(file)
             text = " ".join(page.extract_text() for page in reader.pages if page.extract_text())
         except Exception as e:
     # Truncate if needed
     text = truncate_text(text)
+    # Build extra instructions
     extra_instructions = []
     if host_name or guest_name:
+        h = f"Host: {host_name or 'Jane'} - {host_desc or 'a curious host'}."
+        g = f"Guest: {guest_name or 'John'} - {guest_desc or 'an expert'}."
+        extra_instructions.append(f"{h}\n{g}")
     if user_specs.strip():
         extra_instructions.append(f"Additional User Instructions: {user_specs}")
     if sponsor_content.strip():
         extra_instructions.append(
             "Please include a short sponsored advertisement. The sponsor text is as follows:\n"
             + sponsor_content
         )
     combined_instructions = "\n\n".join(extra_instructions).strip()
     full_prompt = SYSTEM_PROMPT
     if combined_instructions:
         full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
+    # Use "generate_script" with host/guest name so it can do the mapping
     try:
+        script = generate_script(
+            full_prompt,
+            text,
+            tone,
+            f"{length_minutes} Mins",
+            host_name=host_name or "Jane",
+            guest_name=guest_name or "John"
+        )
     except Exception as e:
         return None, f"Error generating script: {str(e)}"
     try:
         for item in script.dialogue:
+            # item.speaker is guaranteed "Jane" or "John"
+            # item.display_speaker is the user-facing name
             audio_file = generate_audio_mp3(item.text, item.speaker)
             seg = AudioSegment.from_file(audio_file, format="mp3")
             audio_segments.append(seg)
+            transcript += f"**{item.display_speaker}**: {item.text}\n\n"
             os.remove(audio_file)
         if not audio_segments:
         for seg in audio_segments[1:]:
             combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
         final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
     except Exception as e:
         return None, f"Error generating audio: {str(e)}"
 def highlight_differences(original: str, edited: str) -> str:
     """
     Highlights the differences between the original and edited transcripts.
     highlighted = []
     for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
         if opcode == 'equal':
             highlighted.extend(original.split()[i1:i2])
         elif opcode in ('replace', 'insert'):
             added_words = edited.split()[j1:j2]
             highlighted.extend([f'<span style="color:red">{word}</span>' for word in added_words])
         elif opcode == 'delete':
             pass
     return ' '.join(highlighted)
 def main():
     st.set_page_config(page_title="MyPod - AI-based Podcast Generator", layout="centered")
     st.markdown("## MyPod - AI powered Podcast Generator")
     st.markdown(
         "1. **Provide one source:** PDF Files, Website URL, YouTube link or a Topic to Research.\n"
         "2. **Choose the tone and the target duration.**\n"
         "3. **Click 'Generate Podcast'** to produce your podcast. After the audio is generated, you can edit the transcript and re-generate the audio with your edits if needed.\n\n"
         "**Token Limit:** Up to ~2,048 tokens are supported. Long inputs may be truncated.\n"
         "**Note:** YouTube videos will only work if they have captions built in.\n\n"
         "⏳**Please be patient while your podcast is being generated.** This process involves content analysis, script creation, "
     with col2:
         research_topic_input = st.text_input("Or Research a Topic")
         tone = st.radio("Tone", ["Humorous", "Formal", "Casual", "Youthful"], index=2)
         length_minutes = st.slider("Podcast Length (in minutes)", 1, 60, 3)
     st.markdown("### Customize Your Podcast (New Features)")
     with st.expander("Set Host & Guest Names/Descriptions (Optional)"):
         host_name = st.text_input("Host Name (leave blank for 'Jane')")
         host_desc = st.text_input("Host Description (Optional)")
         guest_name = st.text_input("Guest Name (leave blank for 'John')")
         guest_desc = st.text_input("Guest Description (Optional)")
     user_specs = st.text_area("Any special instructions or prompts for the script? (Optional)", "")
     sponsor_content = st.text_area("Sponsored Content / Ad (Optional)", "")
     custom_bg_music_file = st.file_uploader("Upload Custom Background Music (Optional)", type=["mp3", "wav"])
     custom_bg_music_path = None
     if custom_bg_music_file:
         with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(custom_bg_music_file.name)[1]) as tmp:
             tmp.write(custom_bg_music_file.read())
             custom_bg_music_path = tmp.name
     if "audio_bytes" not in st.session_state:
         st.session_state["audio_bytes"] = None
     if "transcript" not in st.session_state:
         progress_bar = st.progress(0)
         progress_text = st.empty()
+        messages = [
             "🔍 Analyzing your input...",
             "📝 Crafting the perfect script...",
             "🎙️ Generating high-quality audio...",
             "🎶 Adding the finishing touches..."
         ]
+        progress_text.write(messages[0])
         progress_bar.progress(0)
         time.sleep(1.0)
+        progress_text.write(messages[1])
         progress_bar.progress(25)
         time.sleep(1.0)
+        progress_text.write(messages[2])
         progress_bar.progress(50)
         time.sleep(1.0)
+        progress_text.write(messages[3])
         progress_bar.progress(75)
         time.sleep(1.0)
         )
         st.markdown("### Generated Transcript (Editable)")
         edited_text = st.text_area(
             "Feel free to tweak lines, fix errors, or reword anything.",
             value=st.session_state["transcript"],
             height=300
         )
         if st.session_state["transcript_original"]:
+            highlighted = highlight_differences(
                 st.session_state["transcript_original"],
                 edited_text
             )
             st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
+            st.markdown(highlighted, unsafe_allow_html=True)
         if st.button("Regenerate Audio From Edited Text"):
             regen_bar = st.progress(0)
             regen_bar.progress(50)
             time.sleep(1.0)
+            # Parse lines, map to DialogueItem with correct TTS speaker
+            # host => female (Jane), guest => male (John)
+            dialogue_items = parse_user_edited_transcript(edited_text, host_name or "Jane", guest_name or "John")
             new_audio_bytes, new_transcript = regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path)
             regen_bar.progress(75)
                 st.markdown("### Updated Transcript")
                 st.markdown(new_transcript)
 if __name__ == "__main__":
     main()