awacke1 commited on
Commit
f58a6a4
Β·
verified Β·
1 Parent(s): cda2207

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +276 -0
app.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import re
3
+ import os
4
+ import glob
5
+ import asyncio
6
+ import hashlib
7
+ import unicodedata
8
+ import streamlit as st
9
+ from PIL import Image
10
+ import fitz
11
+ import edge_tts
12
+ from reportlab.lib.pagesizes import A4
13
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
14
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
15
+ from reportlab.lib import colors
16
+ from reportlab.pdfbase import pdfmetrics
17
+ from reportlab.pdfbase.ttfonts import TTFont
18
+ from datetime import datetime
19
+ import pytz
20
+
21
+ st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
22
+
23
+ def get_timestamp_prefix():
24
+ central = pytz.timezone("US/Central")
25
+ now = datetime.now(central)
26
+ # Format: three-letter day, MMDD, HHMM + AM/PM (all uppercase)
27
+ return now.strftime("%a %m%d %I%M%p").upper()
28
+
29
+ def clean_for_speech(text):
30
+ # Remove hash marks
31
+ text = text.replace("#", "")
32
+ # Remove emojis using a regex pattern that covers a wide range
33
+ emoji_pattern = re.compile(
34
+ r"[\U0001F300-\U0001F5FF"
35
+ r"\U0001F600-\U0001F64F"
36
+ r"\U0001F680-\U0001F6FF"
37
+ r"\U0001F700-\U0001F77F"
38
+ r"\U0001F780-\U0001F7FF"
39
+ r"\U0001F800-\U0001F8FF"
40
+ r"\U0001F900-\U0001F9FF"
41
+ r"\U0001FA00-\U0001FA6F"
42
+ r"\U0001FA70-\U0001FAFF"
43
+ r"\u2600-\u26FF"
44
+ r"\u2700-\u27BF]+", flags=re.UNICODE)
45
+ text = emoji_pattern.sub('', text)
46
+ return text
47
+
48
+ async def generate_audio(text, voice, filename):
49
+ communicate = edge_tts.Communicate(text, voice)
50
+ await communicate.save(filename)
51
+ return filename
52
+
53
+ def apply_emoji_font(text, emoji_font):
54
+ emoji_pattern = re.compile(
55
+ r"([\U0001F300-\U0001F5FF"
56
+ r"\U0001F600-\U0001F64F"
57
+ r"\U0001F680-\U0001F6FF"
58
+ r"\U0001F700-\U0001F77F"
59
+ r"\U0001F780-\U0001F7FF"
60
+ r"\U0001F800-\U0001F8FF"
61
+ r"\U0001F900-\U0001F9FF"
62
+ r"\U0001FAD0-\U0001FAD9" # additional range if needed
63
+ r"\U0001FA00-\U0001FA6F"
64
+ r"\U0001FA70-\U0001FAFF"
65
+ r"\u2600-\u26FF"
66
+ r"\u2700-\u27BF]+)"
67
+ )
68
+ def replace_emoji(match):
69
+ emoji = match.group(1)
70
+ emoji = unicodedata.normalize('NFC', emoji)
71
+ return f'<font face="{emoji_font}">{emoji}</font>'
72
+ segments = []
73
+ last_pos = 0
74
+ for match in emoji_pattern.finditer(text):
75
+ start, end = match.span()
76
+ if last_pos < start:
77
+ segments.append(f'<font face="DejaVuSans">{text[last_pos:start]}</font>')
78
+ segments.append(replace_emoji(match))
79
+ last_pos = end
80
+ if last_pos < len(text):
81
+ segments.append(f'<font face="DejaVuSans">{text[last_pos:]}</font>')
82
+ return ''.join(segments)
83
+
84
+ def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers):
85
+ lines = markdown_text.strip().split('\n')
86
+ pdf_content = []
87
+ number_pattern = re.compile(r'^\d+\.\s')
88
+ for line in lines:
89
+ line = line.strip()
90
+ if not line or line.startswith('# '):
91
+ continue
92
+ if render_with_bold:
93
+ line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)
94
+ if auto_bold_numbers and number_pattern.match(line):
95
+ if not (line.startswith("<b>") and line.endswith("</b>")):
96
+ line = f"<b>{line}</b>"
97
+ pdf_content.append(line)
98
+ total_lines = len(pdf_content)
99
+ return pdf_content, total_lines
100
+
101
+ def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns):
102
+ buffer = io.BytesIO()
103
+ page_width = A4[0] * 2
104
+ page_height = A4[1]
105
+ doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36)
106
+ styles = getSampleStyleSheet()
107
+ spacer_height = 10
108
+ pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers)
109
+ item_style = ParagraphStyle(
110
+ 'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
111
+ fontSize=base_font_size, leading=base_font_size * 1.15, spaceAfter=1
112
+ )
113
+ numbered_bold_style = ParagraphStyle(
114
+ 'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
115
+ fontSize=base_font_size + 1 if enlarge_numbered else base_font_size,
116
+ leading=(base_font_size + 1) * 1.15 if enlarge_numbered else base_font_size * 1.15, spaceAfter=1
117
+ )
118
+ section_style = ParagraphStyle(
119
+ 'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
120
+ textColor=colors.darkblue, fontSize=base_font_size * 1.1, leading=base_font_size * 1.32, spaceAfter=2
121
+ )
122
+ try:
123
+ available_font_files = glob.glob("*.ttf")
124
+ if not available_font_files:
125
+ st.error("No .ttf font files found in the current directory.")
126
+ return
127
+ selected_font_path = None
128
+ for f in available_font_files:
129
+ if "NotoEmoji-Bold" in f:
130
+ selected_font_path = f
131
+ break
132
+ if selected_font_path:
133
+ pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", selected_font_path))
134
+ pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf"))
135
+ except Exception as e:
136
+ st.error(f"Font registration error: {e}")
137
+ return
138
+ columns = [[] for _ in range(num_columns)]
139
+ lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
140
+ current_line_count = 0
141
+ current_column = 0
142
+ number_pattern = re.compile(r'^\d+\.\s')
143
+ for item in pdf_content:
144
+ if current_line_count >= lines_per_column and current_column < num_columns - 1:
145
+ current_column += 1
146
+ current_line_count = 0
147
+ columns[current_column].append(item)
148
+ current_line_count += 1
149
+ column_cells = [[] for _ in range(num_columns)]
150
+ for col_idx, column in enumerate(columns):
151
+ for item in column:
152
+ if isinstance(item, str) and item.startswith("<b>") and item.endswith("</b>"):
153
+ content = item[3:-4].strip()
154
+ if number_pattern.match(content):
155
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
156
+ else:
157
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
158
+ else:
159
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style))
160
+ max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
161
+ for cells in column_cells:
162
+ cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
163
+ col_width = (page_width - 72) / num_columns if num_columns > 0 else page_width - 72
164
+ table_data = list(zip(*column_cells)) if column_cells else [[]]
165
+ table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
166
+ table.setStyle(TableStyle([
167
+ ('VALIGN', (0, 0), (-1, -1), 'TOP'),
168
+ ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
169
+ ('BACKGROUND', (0, 0), (-1, -1), colors.white),
170
+ ('GRID', (0, 0), (-1, -1), 0, colors.white),
171
+ ('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey),
172
+ ('LEFTPADDING', (0, 0), (-1, -1), 2),
173
+ ('RIGHTPADDING', (0, 0), (-1, -1), 2),
174
+ ('TOPPADDING', (0, 0), (-1, -1), 1),
175
+ ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
176
+ ]))
177
+ story = [Spacer(1, spacer_height), table]
178
+ doc.build(story)
179
+ buffer.seek(0)
180
+ return buffer.getvalue()
181
+
182
+ def pdf_to_image(pdf_bytes):
183
+ try:
184
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
185
+ images = []
186
+ for page in doc:
187
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
188
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
189
+ images.append(img)
190
+ doc.close()
191
+ return images
192
+ except Exception as e:
193
+ st.error(f"Failed to render PDF preview: {e}")
194
+ return None
195
+
196
+ # -- Markdown File Selection --
197
+ md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
198
+ md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
199
+
200
+ with st.sidebar:
201
+ st.markdown("### PDF Options")
202
+ if md_options:
203
+ selected_md = st.selectbox("Select Markdown File", options=md_options, index=0)
204
+ with open(f"{selected_md}.md", "r", encoding="utf-8") as f:
205
+ st.session_state.markdown_content = f.read()
206
+ else:
207
+ st.warning("No markdown file found. Please add one to your folder.")
208
+ selected_md = None
209
+ st.session_state.markdown_content = ""
210
+
211
+ available_font_files = {os.path.splitext(os.path.basename(f))[0]: f for f in glob.glob("*.ttf")}
212
+ selected_font_name = st.selectbox("Select Emoji Font", options=list(available_font_files.keys()),
213
+ index=list(available_font_files.keys()).index("NotoEmoji-Bold") if "NotoEmoji-Bold" in available_font_files else 0)
214
+ base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1)
215
+ render_with_bold = st.checkbox("Render with Bold Formatting (remove ** markers)", value=True, key="render_with_bold")
216
+ auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers")
217
+ enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered")
218
+ num_columns = st.selectbox("Number of Columns", options=[1, 2, 3, 4, 5, 6], index=3)
219
+
220
+ # Use the file's content for editing
221
+ edited_markdown = st.text_area("Modify the markdown content below:", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}")
222
+ if st.button("Update PDF"):
223
+ st.session_state.markdown_content = edited_markdown
224
+ if selected_md:
225
+ with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
226
+ f.write(edited_markdown)
227
+ st.rerun()
228
+
229
+ # Create a timestamp prefix for file naming
230
+ prefix = get_timestamp_prefix()
231
+
232
+ # Download button for Markdown with new naming convention and double emoji label
233
+ st.download_button(
234
+ label="πŸ“πŸ“ Save Markdown",
235
+ data=st.session_state.markdown_content,
236
+ file_name=f"{prefix} {selected_md}.md" if selected_md else f"{prefix} default.md",
237
+ mime="text/markdown"
238
+ )
239
+
240
+ st.markdown("### Text-to-Speech")
241
+ VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
242
+ selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
243
+ if st.button("Generate Audio"):
244
+ # Clean markdown input for speech generation
245
+ cleaned_text = clean_for_speech(st.session_state.markdown_content)
246
+ # Create a filename for the audio file using the timestamp, markdown name, and selected voice
247
+ audio_filename = f"{prefix} {selected_md} {selected_voice}.mp3" if selected_md else f"{prefix} default {selected_voice}.mp3"
248
+ audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, audio_filename))
249
+ st.audio(audio_file)
250
+ with open(audio_file, "rb") as f:
251
+ audio_bytes = f.read()
252
+ st.download_button(
253
+ label="πŸ”ŠπŸ”Š Save Audio",
254
+ data=audio_bytes,
255
+ file_name=audio_filename,
256
+ mime="audio/mpeg"
257
+ )
258
+
259
+ with st.spinner("Generating PDF..."):
260
+ pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns)
261
+
262
+ with st.container():
263
+ pdf_images = pdf_to_image(pdf_bytes)
264
+ if pdf_images:
265
+ for img in pdf_images:
266
+ st.image(img, use_container_width=True)
267
+ else:
268
+ st.info("Download the PDF to view it locally.")
269
+
270
+ with st.sidebar:
271
+ st.download_button(
272
+ label="πŸ“„πŸ“„ Save PDF",
273
+ data=pdf_bytes,
274
+ file_name=f"{prefix} {selected_md}.pdf" if selected_md else f"{prefix} output.pdf",
275
+ mime="application/pdf"
276
+ )