Spaces:

awacke1
/

PDF-Paper-Maker-AI-UI-UX

Running

App Files Files Community

awacke1 commited on 24 days ago

Commit

01216ba

verified ·

1 Parent(s): 646f19c

Update backup7.app.py

Browse files

Files changed (1) hide show

backup7.app.py +157 -6

backup7.app.py CHANGED Viewed

@@ -20,13 +20,17 @@ import pytz
 st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
 def get_timestamp_prefix():
     central = pytz.timezone("US/Central")
     now = datetime.now(central)
     # Format: three-letter day, MMDD, HHMM + AM/PM (all uppercase)
     return now.strftime("%a %m%d %I%M%p").upper()
 def clean_for_speech(text):
     # Remove hash marks
     text = text.replace("#", "")
     # Remove emojis using a regex pattern that covers a wide range
@@ -45,12 +49,86 @@ def clean_for_speech(text):
     text = emoji_pattern.sub('', text)
     return text
 async def generate_audio(text, voice, filename):
     communicate = edge_tts.Communicate(text, voice)
     await communicate.save(filename)
     return filename
 def apply_emoji_font(text, emoji_font):
     emoji_pattern = re.compile(
         r"([\U0001F300-\U0001F5FF"
         r"\U0001F600-\U0001F64F"
@@ -65,40 +143,89 @@ def apply_emoji_font(text, emoji_font):
         r"\u2600-\u26FF"
         r"\u2700-\u27BF]+)"
     )
     def replace_emoji(match):
         emoji = match.group(1)
         emoji = unicodedata.normalize('NFC', emoji)
         return f'<font face="{emoji_font}">{emoji}</font>'
     segments = []
     last_pos = 0
     for match in emoji_pattern.finditer(text):
         start, end = match.span()
         if last_pos < start:
             segments.append(f'<font face="DejaVuSans">{text[last_pos:start]}</font>')
         segments.append(replace_emoji(match))
         last_pos = end
     if last_pos < len(text):
         segments.append(f'<font face="DejaVuSans">{text[last_pos:]}</font>')
-    return ''.join(segments)
 def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers):
     lines = markdown_text.strip().split('\n')
     pdf_content = []
     number_pattern = re.compile(r'^\d+\.\s')
     for line in lines:
         line = line.strip()
         if not line or line.startswith('# '):
             continue
         if render_with_bold:
             line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)
         if auto_bold_numbers and number_pattern.match(line):
             if not (line.startswith("<b>") and line.endswith("</b>")):
-                line = f"<b>{line}</b>"
         pdf_content.append(line)
     total_lines = len(pdf_content)
     return pdf_content, total_lines
 def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns):
     buffer = io.BytesIO()
     page_width = A4[0] * 2
     page_height = A4[1]
@@ -106,19 +233,26 @@ def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_number
     styles = getSampleStyleSheet()
     spacer_height = 10
     pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers)
     item_style = ParagraphStyle(
         'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
-        fontSize=base_font_size, leading=base_font_size * 1.15, spaceAfter=1
     )
     numbered_bold_style = ParagraphStyle(
         'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
         fontSize=base_font_size + 1 if enlarge_numbered else base_font_size,
-        leading=(base_font_size + 1) * 1.15 if enlarge_numbered else base_font_size * 1.15, spaceAfter=1
     )
     section_style = ParagraphStyle(
         'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
-        textColor=colors.darkblue, fontSize=base_font_size * 1.1, leading=base_font_size * 1.32, spaceAfter=2
     )
     try:
         available_font_files = glob.glob("*.ttf")
         if not available_font_files:
@@ -135,17 +269,22 @@ def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_number
     except Exception as e:
         st.error(f"Font registration error: {e}")
         return
     columns = [[] for _ in range(num_columns)]
     lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
     current_line_count = 0
     current_column = 0
     number_pattern = re.compile(r'^\d+\.\s')
     for item in pdf_content:
         if current_line_count >= lines_per_column and current_column < num_columns - 1:
             current_column += 1
             current_line_count = 0
         columns[current_column].append(item)
         current_line_count += 1
     column_cells = [[] for _ in range(num_columns)]
     for col_idx, column in enumerate(columns):
         for item in column:
@@ -157,9 +296,13 @@ def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_number
                     column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
             else:
                 column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style))
     max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
     for cells in column_cells:
         cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
     col_width = (page_width - 72) / num_columns if num_columns > 0 else page_width - 72
     table_data = list(zip(*column_cells)) if column_cells else [[]]
     table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
@@ -174,12 +317,16 @@ def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_number
         ('TOPPADDING', (0, 0), (-1, -1), 1),
         ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
     ]))
     story = [Spacer(1, spacer_height), table]
     doc.build(story)
     buffer.seek(0)
     return buffer.getvalue()
 def pdf_to_image(pdf_bytes):
     try:
         doc = fitz.open(stream=pdf_bytes, filetype="pdf")
         images = []
@@ -197,6 +344,7 @@ def pdf_to_image(pdf_bytes):
 md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
 md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
 with st.sidebar:
     st.markdown("### PDF Options")
     if md_options:
@@ -256,9 +404,11 @@ with st.sidebar:
             mime="audio/mpeg"
         )
 with st.spinner("Generating PDF..."):
     pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns)
 with st.container():
     pdf_images = pdf_to_image(pdf_bytes)
     if pdf_images:
@@ -267,10 +417,11 @@ with st.container():
     else:
         st.info("Download the PDF to view it locally.")
 with st.sidebar:
     st.download_button(
         label="💾📄 Save PDF",
         data=pdf_bytes,
         file_name=f"{prefix} {selected_md}.pdf" if selected_md else f"{prefix} output.pdf",
         mime="application/pdf"
-    )

 st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
+# 🕒 Time flies when you're having function calls
 def get_timestamp_prefix():
+    """🕰️ Gets a timestamp that's more reliable than your coworker's ETA estimates"""
     central = pytz.timezone("US/Central")
     now = datetime.now(central)
     # Format: three-letter day, MMDD, HHMM + AM/PM (all uppercase)
     return now.strftime("%a %m%d %I%M%p").upper()
+# 🧹 Because text needs a bath before being spoken
 def clean_for_speech(text):
+    """🧼 Scrubs your text cleaner than your bathroom will ever be"""
     # Remove hash marks
     text = text.replace("#", "")
     # Remove emojis using a regex pattern that covers a wide range
     text = emoji_pattern.sub('', text)
     return text
+# 🎤 Making robots talk so you don't have to
 async def generate_audio(text, voice, filename):
+    """🔊 Turn text into speech, because reading is so last century"""
     communicate = edge_tts.Communicate(text, voice)
     await communicate.save(filename)
     return filename
+# 🔗 Detecting links like a digital bloodhound
+def detect_and_convert_links(text):
+    """🕸️ Finds URLs in your text and turns them into actual clickable links"""
+    # Pattern to find URLs (http/https/ftp/www)
+    url_pattern = re.compile(
+        r'(https?://|www\.)[^\s\[\]()<>{}]+(\.[^\s\[\]()<>{}]+)+(/[^\s\[\]()<>{}]*)?',
+        re.IGNORECASE
+    )
+    # Pattern to find markdown links [text](url)
+    md_link_pattern = re.compile(r'\[(.*?)\]\((https?://[^\s\[\]()<>{}]+)\)')
+    # First convert markdown links
+    text = md_link_pattern.sub(r'<a href="\2">\1</a>', text)
+    # Then find and convert plain URLs not already in tags
+    start_idx = 0
+    result = []
+    while start_idx < len(text):
+        # Find the next URL
+        match = url_pattern.search(text, start_idx)
+        if not match:
+            # No more URLs, add the remaining text
+            result.append(text[start_idx:])
+            break
+        # Check if the URL is already inside an <a> tag
+        # This is a simplified check. A more robust approach might use an HTML parser
+        prev_text = text[start_idx:match.start()]
+        tag_balance = prev_text.count('<a') - prev_text.count('</a')
+        if tag_balance > 0:
+            # URL is likely inside a tag, skip this match
+            result.append(text[start_idx:match.end()])
+        else:
+            # Add text before the URL
+            result.append(text[start_idx:match.start()])
+            # Get the URL
+            url = match.group(0)
+            # Add proper URL prefix if needed
+            if url.startswith('www.'):
+                url_with_prefix = 'http://' + url
+            else:
+                url_with_prefix = url
+            # Add the URL as a link
+            result.append(f'<a href="{url_with_prefix}">{url}</a>')
+        start_idx = match.end()
+    return ''.join(result)
+# 🎭 Making emojis wear the right font costume
 def apply_emoji_font(text, emoji_font):
+    """🦄 Because emojis deserve their own font fashion show"""
+    # First handle links - temporarily replace them with placeholders
+    link_pattern = re.compile(r'<a\s+href="([^"]+)">(.*?)</a>')
+    links = []
+    def save_link(match):
+        link_idx = len(links)
+        links.append((match.group(1), match.group(2)))
+        return f"###LINK_{link_idx}###"
+    text = link_pattern.sub(save_link, text)
+    # Now handle bold formatting
+    text = re.sub(r'<b>(.*?)</b>', lambda m: f'###BOLD_START###{m.group(1)}###BOLD_END###', text)
+    # Apply emoji font replacement
     emoji_pattern = re.compile(
         r"([\U0001F300-\U0001F5FF"
         r"\U0001F600-\U0001F64F"
         r"\u2600-\u26FF"
         r"\u2700-\u27BF]+)"
     )
     def replace_emoji(match):
         emoji = match.group(1)
         emoji = unicodedata.normalize('NFC', emoji)
         return f'<font face="{emoji_font}">{emoji}</font>'
     segments = []
     last_pos = 0
     for match in emoji_pattern.finditer(text):
         start, end = match.span()
         if last_pos < start:
             segments.append(f'<font face="DejaVuSans">{text[last_pos:start]}</font>')
         segments.append(replace_emoji(match))
         last_pos = end
     if last_pos < len(text):
         segments.append(f'<font face="DejaVuSans">{text[last_pos:]}</font>')
+    combined_text = ''.join(segments)
+    # Restore bold tags
+    combined_text = combined_text.replace('###BOLD_START###', '</font><b><font face="DejaVuSans">')
+    combined_text = combined_text.replace('###BOLD_END###', '</font></b><font face="DejaVuSans">')
+    # Restore links
+    for i, (url, label) in enumerate(links):
+        placeholder = f"###LINK_{i}###"
+        if placeholder in combined_text:
+            # If the link is within a font tag, we need to close and reopen it
+            parts = combined_text.split(placeholder)
+            if len(parts) == 2:
+                before, after = parts
+                # Check if we're inside a font tag
+                if before.rfind('<font') > before.rfind('</font>'):
+                    # Close font tag before link, reopen after
+                    link_html = f'</font><a href="{url}">{label}</a><font face="DejaVuSans">'
+                    combined_text = before + link_html + after
+                else:
+                    # Simple replacement
+                    combined_text = before + f'<a href="{url}">{label}</a>' + after
+    return combined_text
+# 📝 Converting markdown to PDF content, because PDFs never go out of style
 def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers):
+    """📋 Transforms your disorganized thoughts into structured PDF content"""
     lines = markdown_text.strip().split('\n')
     pdf_content = []
     number_pattern = re.compile(r'^\d+\.\s')
     for line in lines:
         line = line.strip()
         if not line or line.startswith('# '):
             continue
+        # Process links before any other formatting
+        line = detect_and_convert_links(line)
+        # Handle bold formatting
         if render_with_bold:
             line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)
         if auto_bold_numbers and number_pattern.match(line):
+            # Only apply bold if not already entirely bold
             if not (line.startswith("<b>") and line.endswith("</b>")):
+                # If there's already some bold formatting inside, we need to handle carefully
+                if "<b>" in line and "</b>" in line:
+                    # Complex case - for simplicity, just make the whole line bold
+                    # but remove any existing bold tags first
+                    line = re.sub(r'</?b>', '', line)
+                    line = f"<b>{line}</b>"
+                else:
+                    line = f"<b>{line}</b>"
         pdf_content.append(line)
     total_lines = len(pdf_content)
     return pdf_content, total_lines
+# 🏗️ Building PDFs like it's your second job
 def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns):
+    """🔨 Constructs a PDF with the precision of a sleep-deprived architect"""
     buffer = io.BytesIO()
     page_width = A4[0] * 2
     page_height = A4[1]
     styles = getSampleStyleSheet()
     spacer_height = 10
     pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers)
+    # Define styles for different text types
     item_style = ParagraphStyle(
         'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
+        fontSize=base_font_size, leading=base_font_size * 1.15, spaceAfter=1,
+        linkUnderline=True  # Enable underline for links
     )
     numbered_bold_style = ParagraphStyle(
         'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
         fontSize=base_font_size + 1 if enlarge_numbered else base_font_size,
+        leading=(base_font_size + 1) * 1.15 if enlarge_numbered else base_font_size * 1.15, spaceAfter=1,
+        linkUnderline=True  # Enable underline for links
     )
     section_style = ParagraphStyle(
         'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
+        textColor=colors.darkblue, fontSize=base_font_size * 1.1, leading=base_font_size * 1.32, spaceAfter=2,
+        linkUnderline=True  # Enable underline for links
     )
+    # Register fonts
     try:
         available_font_files = glob.glob("*.ttf")
         if not available_font_files:
     except Exception as e:
         st.error(f"Font registration error: {e}")
         return
+    # Distribute content across columns
     columns = [[] for _ in range(num_columns)]
     lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
     current_line_count = 0
     current_column = 0
     number_pattern = re.compile(r'^\d+\.\s')
     for item in pdf_content:
         if current_line_count >= lines_per_column and current_column < num_columns - 1:
             current_column += 1
             current_line_count = 0
         columns[current_column].append(item)
         current_line_count += 1
+    # Format columns into Paragraph objects
     column_cells = [[] for _ in range(num_columns)]
     for col_idx, column in enumerate(columns):
         for item in column:
                     column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
             else:
                 column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style))
+    # Ensure columns have the same number of cells
     max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
     for cells in column_cells:
         cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
+    # Create the table layout
     col_width = (page_width - 72) / num_columns if num_columns > 0 else page_width - 72
     table_data = list(zip(*column_cells)) if column_cells else [[]]
     table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
         ('TOPPADDING', (0, 0), (-1, -1), 1),
         ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
     ]))
+    # Build the PDF
     story = [Spacer(1, spacer_height), table]
     doc.build(story)
     buffer.seek(0)
     return buffer.getvalue()
+# 🖼️ Converting PDFs to images, because we can't leave well enough alone
 def pdf_to_image(pdf_bytes):
+    """🔎 Turns your PDF into pictures because some people just want to see the world rendered"""
     try:
         doc = fitz.open(stream=pdf_bytes, filetype="pdf")
         images = []
 md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
 md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
+# 🎪 The main Streamlit show begins here
 with st.sidebar:
     st.markdown("### PDF Options")
     if md_options:
             mime="audio/mpeg"
         )
+# 🚀 Generating the PDF with more complexity than a rocket launch
 with st.spinner("Generating PDF..."):
     pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns)
+# 📺 Displaying the preview, because everyone loves to window shop
 with st.container():
     pdf_images = pdf_to_image(pdf_bytes)
     if pdf_images:
     else:
         st.info("Download the PDF to view it locally.")
+# 💾 Last chance to save your masterpiece before it's gone forever
 with st.sidebar:
     st.download_button(
         label="💾📄 Save PDF",
         data=pdf_bytes,
         file_name=f"{prefix} {selected_md}.pdf" if selected_md else f"{prefix} output.pdf",
         mime="application/pdf"
+    )