awacke1 commited on
Commit
e71466c
Β·
verified Β·
1 Parent(s): f5013c1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +384 -0
app.py ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import re
3
+ import os
4
+ import glob
5
+ import asyncio
6
+ import hashlib
7
+ import unicodedata
8
+ import streamlit as st
9
+ from PIL import Image
10
+ import fitz
11
+ import edge_tts
12
+ from reportlab.lib.pagesizes import A4
13
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
14
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
15
+ from reportlab.lib import colors
16
+ from reportlab.pdfbase import pdfmetrics
17
+ from reportlab.pdfbase.ttfonts import TTFont
18
+ from datetime import datetime
19
+ import pytz
20
+
21
+ st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
22
+
23
+ def get_timestamp_prefix():
24
+ central = pytz.timezone("US/Central")
25
+ now = datetime.now(central)
26
+ return now.strftime("%a %m%d %I%M%p").upper()
27
+
28
+ def clean_for_speech(text):
29
+ text = text.replace("#", "")
30
+ emoji_pattern = re.compile(
31
+ r"[\U0001F300-\U0001F5FF"
32
+ r"\U0001F600-\U0001F64F"
33
+ r"\U0001F680-\U0001F6FF"
34
+ r"\U0001F700-\U0001F77F"
35
+ r"\U0001F780-\U0001F7FF"
36
+ r"\U0001F800-\U0001F8FF"
37
+ r"\U0001F900-\U0001F9FF"
38
+ r"\U0001FA00-\U0001FA6F"
39
+ r"\U0001FA70-\U0001FAFF"
40
+ r"\u2600-\u26FF"
41
+ r"\u2700-\u27BF]+", flags=re.UNICODE)
42
+ text = emoji_pattern.sub('', text)
43
+ return text
44
+
45
+ def trim_emojis_except_numbered(markdown_text):
46
+ emoji_pattern = re.compile(
47
+ r"[\U0001F300-\U0001F5FF"
48
+ r"\U0001F600-\U0001F64F"
49
+ r"\U0001F680-\U0001F6FF"
50
+ r"\U0001F700-\U0001F77F"
51
+ r"\U0001F780-\U0001F7FF"
52
+ r"\U0001F800-\U0001F8FF"
53
+ r"\U0001F900-\U0001F9FF"
54
+ r"\U0001FAD0-\U0001FAD9"
55
+ r"\U0001FA00-\U0001FA6F"
56
+ r"\U0001FA70-\U0001FAFF"
57
+ r"\u2600-\u26FF"
58
+ r"\u2700-\u27BF]+"
59
+ )
60
+ number_pattern = re.compile(r'^\d+\.\s')
61
+ lines = markdown_text.strip().split('\n')
62
+ processed_lines = []
63
+
64
+ for line in lines:
65
+ if number_pattern.match(line):
66
+ # Keep emojis in numbered lines
67
+ processed_lines.append(line)
68
+ else:
69
+ # Remove emojis from other lines
70
+ processed_lines.append(emoji_pattern.sub('', line))
71
+
72
+ return '\n'.join(processed_lines)
73
+
74
+ async def generate_audio(text, voice, filename):
75
+ communicate = edge_tts.Communicate(text, voice)
76
+ await communicate.save(filename)
77
+ return filename
78
+
79
+ def detect_and_convert_links(text):
80
+ url_pattern = re.compile(
81
+ r'(https?://|www\.)[^\s\[\]()<>{}]+(\.[^\s\[\]()<>{}]+)+(/[^\s\[\]()<>{}]*)?',
82
+ re.IGNORECASE
83
+ )
84
+ md_link_pattern = re.compile(r'\[(.*?)\]\((https?://[^\s\[\]()<>{}]+)\)')
85
+ text = md_link_pattern.sub(r'<a href="\2">\1</a>', text)
86
+ start_idx = 0
87
+ result = []
88
+ while start_idx < len(text):
89
+ match = url_pattern.search(text, start_idx)
90
+ if not match:
91
+ result.append(text[start_idx:])
92
+ break
93
+ prev_text = text[start_idx:match.start()]
94
+ tag_balance = prev_text.count('<a') - prev_text.count('</a')
95
+ if tag_balance > 0:
96
+ result.append(text[start_idx:match.end()])
97
+ else:
98
+ result.append(text[start_idx:match.start()])
99
+ url = match.group(0)
100
+ if url.startswith('www.'):
101
+ url_with_prefix = 'http://' + url
102
+ else:
103
+ url_with_prefix = url
104
+ result.append(f'<a href="{url_with_prefix}">{url}</a>')
105
+ start_idx = match.end()
106
+ return ''.join(result)
107
+
108
+ def apply_emoji_font(text, emoji_font):
109
+ link_pattern = re.compile(r'<a\s+href="([^"]+)">(.*?)</a>')
110
+ links = []
111
+ def save_link(match):
112
+ link_idx = len(links)
113
+ links.append((match.group(1), match.group(2)))
114
+ return f"###LINK_{link_idx}###"
115
+ text = link_pattern.sub(save_link, text)
116
+ text = re.sub(r'<b>(.*?)</b>', lambda m: f'###BOLD_START###{m.group(1)}###BOLD_END###', text)
117
+ emoji_pattern = re.compile(
118
+ r"([\U0001F300-\U0001F5FF"
119
+ r"\U0001F600-\U0001F64F"
120
+ r"\U0001F680-\U0001F6FF"
121
+ r"\U0001F700-\U0001F77F"
122
+ r"\U0001F780-\U0001F7FF"
123
+ r"\U0001F800-\U0001F8FF"
124
+ r"\U0001F900-\U0001F9FF"
125
+ r"\U0001FAD0-\U0001FAD9"
126
+ r"\U0001FA00-\U0001FA6F"
127
+ r"\U0001FA70-\U0001FAFF"
128
+ r"\u2600-\u26FF"
129
+ r"\u2700-\u27BF]+)"
130
+ )
131
+ def replace_emoji(match):
132
+ emoji = match.group(1)
133
+ emoji = unicodedata.normalize('NFC', emoji)
134
+ return f'<font face="{emoji_font}">{emoji}</font>'
135
+ segments = []
136
+ last_pos = 0
137
+ for match in emoji_pattern.finditer(text):
138
+ start, end = match.span()
139
+ if last_pos < start:
140
+ segments.append(f'<font face="DejaVuSans">{text[last_pos:start]}</font>')
141
+ segments.append(replace_emoji(match))
142
+ last_pos = end
143
+ if last_pos < len(text):
144
+ segments.append(f'<font face="DejaVuSans">{text[last_pos:]}</font>')
145
+ combined_text = ''.join(segments)
146
+ combined_text = combined_text.replace('###BOLD_START###', '</font><b><font face="DejaVuSans">')
147
+ combined_text = combined_text.replace('###BOLD_END###', '</font></b><font face="DejaVuSans">')
148
+ for i, (url, label) in enumerate(links):
149
+ placeholder = f"###LINK_{i}###"
150
+ if placeholder in combined_text:
151
+ parts = combined_text.split(placeholder)
152
+ if len(parts) == 2:
153
+ before, after = parts
154
+ if before.rfind('<font') > before.rfind('</font>'):
155
+ link_html = f'</font><a href="{url}">{label}</a><font face="DejaVuSans">'
156
+ combined_text = before + link_html + after
157
+ else:
158
+ combined_text = before + f'<a href="{url}">{label}</a>' + after
159
+ return combined_text
160
+
161
+ def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers):
162
+ lines = markdown_text.strip().split('\n')
163
+ pdf_content = []
164
+ number_pattern = re.compile(r'^\d+\.\s')
165
+ for line in lines:
166
+ line = line.strip()
167
+ if not line or line.startswith('# '):
168
+ continue
169
+ line = detect_and_convert_links(line)
170
+ if render_with_bold:
171
+ line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)
172
+ if auto_bold_numbers and number_pattern.match(line):
173
+ if not (line.startswith("<b>") and line.endswith("</b>")):
174
+ if "<b>" in line and "</b>" in line:
175
+ line = re.sub(r'</?b>', '', line)
176
+ line = f"<b>{line}</b>"
177
+ else:
178
+ line = f"<b>{line}</b>"
179
+ pdf_content.append(line)
180
+ total_lines = len(pdf_content)
181
+ return pdf_content, total_lines
182
+
183
+ def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns):
184
+ buffer = io.BytesIO()
185
+ page_width = A4[0] * 2
186
+ page_height = A4[1]
187
+ doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36)
188
+ styles = getSampleStyleSheet()
189
+ spacer_height = 10
190
+ pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers)
191
+ try:
192
+ available_font_files = glob.glob("*.ttf")
193
+ if not available_font_files:
194
+ st.error("No .ttf font files found.")
195
+ return
196
+ selected_font_path = next((f for f in available_font_files if "NotoEmoji-Bold" in f), None)
197
+ if selected_font_path:
198
+ pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", selected_font_path))
199
+ pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf"))
200
+ except Exception as e:
201
+ st.error(f"Font registration error: {e}")
202
+ return
203
+ total_chars = sum(len(line) for line in pdf_content)
204
+ hierarchy_weight = sum(1.5 if line.startswith("<b>") else 1 for line in pdf_content)
205
+ content_density = total_lines * hierarchy_weight + total_chars / 50
206
+ usable_height = page_height - 72 - spacer_height
207
+ usable_width = page_width - 72
208
+ avg_line_chars = total_chars / total_lines if total_lines > 0 else 50
209
+ ideal_lines_per_col = 20
210
+ suggested_columns = max(1, min(6, int(total_lines / ideal_lines_per_col) + 1))
211
+ num_columns = num_columns if num_columns != 0 else suggested_columns
212
+ col_width = usable_width / num_columns
213
+ min_font_size = 6
214
+ max_font_size = 16
215
+ lines_per_col = total_lines / num_columns if num_columns > 0 else total_lines
216
+ target_height_per_line = usable_height / lines_per_col if lines_per_col > 0 else usable_height
217
+ estimated_font_size = int(target_height_per_line / 1.5)
218
+ adjusted_font_size = max(min_font_size, min(max_font_size, estimated_font_size))
219
+ if avg_line_chars > col_width / adjusted_font_size * 10:
220
+ adjusted_font_size = int(col_width / (avg_line_chars / 10))
221
+ adjusted_font_size = max(min_font_size, adjusted_font_size)
222
+ item_style = ParagraphStyle(
223
+ 'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
224
+ fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1,
225
+ linkUnderline=True
226
+ )
227
+ numbered_bold_style = ParagraphStyle(
228
+ 'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
229
+ fontSize=adjusted_font_size + 1 if enlarge_numbered else adjusted_font_size,
230
+ leading=(adjusted_font_size + 1) * 1.15 if enlarge_numbered else adjusted_font_size * 1.15, spaceAfter=1,
231
+ linkUnderline=True
232
+ )
233
+ section_style = ParagraphStyle(
234
+ 'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
235
+ textColor=colors.darkblue, fontSize=adjusted_font_size * 1.1, leading=adjusted_font_size * 1.32, spaceAfter=2,
236
+ linkUnderline=True
237
+ )
238
+ columns = [[] for _ in range(num_columns)]
239
+ lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
240
+ current_line_count = 0
241
+ current_column = 0
242
+ number_pattern = re.compile(r'^\d+\.\s')
243
+ for item in pdf_content:
244
+ if current_line_count >= lines_per_column and current_column < num_columns - 1:
245
+ current_column += 1
246
+ current_line_count = 0
247
+ columns[current_column].append(item)
248
+ current_line_count += 1
249
+ column_cells = [[] for _ in range(num_columns)]
250
+ for col_idx, column in enumerate(columns):
251
+ for item in column:
252
+ if isinstance(item, str) and item.startswith("<b>") and item.endswith("</b>"):
253
+ content = item[3:-4].strip()
254
+ if number_pattern.match(content):
255
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
256
+ else:
257
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
258
+ else:
259
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style))
260
+ max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
261
+ for cells in column_cells:
262
+ cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
263
+ table_data = list(zip(*column_cells)) if column_cells else [[]]
264
+ table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
265
+ table.setStyle(TableStyle([
266
+ ('VALIGN', (0, 0), (-1, -1), 'TOP'),
267
+ ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
268
+ ('BACKGROUND', (0, 0), (-1, -1), colors.white),
269
+ ('GRID', (0, 0), (-1, -1), 0, colors.white),
270
+ ('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey),
271
+ ('LEFTPADDING', (0, 0), (-1, -1), 2),
272
+ ('RIGHTPADDING', (0, 0), (-1, -1), 2),
273
+ ('TOPPADDING', (0, 0), (-1, -1), 1),
274
+ ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
275
+ ]))
276
+ story = [Spacer(1, spacer_height), table]
277
+ doc.build(story)
278
+ buffer.seek(0)
279
+ return buffer.getvalue()
280
+
281
+ def pdf_to_image(pdf_bytes):
282
+ try:
283
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
284
+ images = []
285
+ for page in doc:
286
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
287
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
288
+ images.append(img)
289
+ doc.close()
290
+ return images
291
+ except Exception as e:
292
+ st.error(f"Failed to render PDF preview: {e}")
293
+ return None
294
+
295
+ md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
296
+ md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
297
+
298
+ with st.sidebar:
299
+ st.markdown("### PDF Options")
300
+ if md_options:
301
+ selected_md = st.selectbox("Select Markdown File", options=md_options, index=0)
302
+ with open(f"{selected_md}.md", "r", encoding="utf-8") as f:
303
+ st.session_state.markdown_content = f.read()
304
+ else:
305
+ st.warning("No markdown file found. Please add one to your folder.")
306
+ selected_md = None
307
+ st.session_state.markdown_content = ""
308
+ available_font_files = {os.path.splitext(os.path.basename(f))[0]: f for f in glob.glob("*.ttf")}
309
+ selected_font_name = st.selectbox("Select Emoji Font", options=list(available_font_files.keys()),
310
+ index=list(available_font_files.keys()).index("NotoEmoji-Bold") if "NotoEmoji-Bold" in available_font_files else 0)
311
+ base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1)
312
+ render_with_bold = st.checkbox("Render with Bold Formatting (remove ** markers)", value=True, key="render_with_bold")
313
+ auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers")
314
+ enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered")
315
+ column_options = ["Auto"] + list(range(1, 7))
316
+ num_columns = st.selectbox("Number of Columns", options=column_options, index=0)
317
+ num_columns = 0 if num_columns == "Auto" else int(num_columns)
318
+ st.info("Font size and columns adjust to fit one page.")
319
+
320
+ # Changed label from "Modify the markdown content below:" to "Input Markdown"
321
+ edited_markdown = st.text_area("Input Markdown", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}")
322
+
323
+ # Added emoji to "Update PDF" button and created a two-column layout for buttons
324
+ col1, col2 = st.columns(2)
325
+ with col1:
326
+ if st.button("πŸ”„πŸ“„ Update PDF"):
327
+ st.session_state.markdown_content = edited_markdown
328
+ if selected_md:
329
+ with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
330
+ f.write(edited_markdown)
331
+ st.rerun()
332
+
333
+ # Added "Trim Emojis" button in second column
334
+ with col2:
335
+ if st.button("βœ‚οΈ Trim Emojis"):
336
+ trimmed_content = trim_emojis_except_numbered(edited_markdown)
337
+ st.session_state.markdown_content = trimmed_content
338
+ if selected_md:
339
+ with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
340
+ f.write(trimmed_content)
341
+ st.rerun()
342
+
343
+ prefix = get_timestamp_prefix()
344
+ st.download_button(
345
+ label="πŸ’ΎπŸ“ Save Markdown",
346
+ data=st.session_state.markdown_content,
347
+ file_name=f"{prefix} {selected_md}.md" if selected_md else f"{prefix} default.md",
348
+ mime="text/markdown"
349
+ )
350
+ st.markdown("### Text-to-Speech")
351
+ VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
352
+ selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
353
+ if st.button("Generate Audio"):
354
+ cleaned_text = clean_for_speech(st.session_state.markdown_content)
355
+ audio_filename = f"{prefix} {selected_md} {selected_voice}.mp3" if selected_md else f"{prefix} default {selected_voice}.mp3"
356
+ audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, audio_filename))
357
+ st.audio(audio_file)
358
+ with open(audio_file, "rb") as f:
359
+ audio_bytes = f.read()
360
+ st.download_button(
361
+ label="πŸ’ΎπŸ”Š Save Audio",
362
+ data=audio_bytes,
363
+ file_name=audio_filename,
364
+ mime="audio/mpeg"
365
+ )
366
+
367
+ with st.spinner("Generating PDF..."):
368
+ pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns)
369
+
370
+ with st.container():
371
+ pdf_images = pdf_to_image(pdf_bytes)
372
+ if pdf_images:
373
+ for img in pdf_images:
374
+ st.image(img, use_container_width=True)
375
+ else:
376
+ st.info("Download the PDF to view it locally.")
377
+
378
+ with st.sidebar:
379
+ st.download_button(
380
+ label="πŸ’ΎπŸ“„ Save PDF",
381
+ data=pdf_bytes,
382
+ file_name=f"{prefix} {selected_md}.pdf" if selected_md else f"{prefix} output.pdf",
383
+ mime="application/pdf"
384
+ )