awacke1 commited on
Commit
ee2d5e8
·
verified ·
1 Parent(s): 1dc6c54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -52
app.py CHANGED
@@ -80,43 +80,46 @@ async def generate_audio(text, voice, filename):
80
  return filename
81
 
82
  def detect_and_convert_links(text):
 
 
 
 
 
83
  url_pattern = re.compile(
84
- r'(https?://|www\.)[^\s\[\]()<>{}]+(\.[^\s\[\]()<>{}]+)+(/[^\s\[\]()<>{}]*)?',
85
  re.IGNORECASE
86
  )
87
- md_link_pattern = re.compile(r'\[(.*?)\]\((https?://[^\s\[\]()<>{}]+)\)')
88
- text = md_link_pattern.sub(r'<a href="\2">\1</a>', text)
89
- start_idx = 0
90
- result = []
91
- while start_idx < len(text):
92
- match = url_pattern.search(text, start_idx)
93
- if not match:
94
- result.append(text[start_idx:])
95
- break
96
- prev_text = text[start_idx:match.start()]
97
- tag_balance = prev_text.count('<a') - prev_text.count('</a')
98
- if tag_balance > 0:
99
- result.append(text[start_idx:match.end()])
100
- else:
101
- result.append(text[start_idx:match.start()])
102
- url = match.group(0)
103
- if url.startswith('www.'):
104
- url_with_prefix = 'http://' + url
105
- else:
106
- url_with_prefix = url
107
- result.append(f'<a href="{url_with_prefix}">{url}</a>')
108
- start_idx = match.end()
109
- return ''.join(result)
110
 
111
  def apply_emoji_font(text, emoji_font):
112
- link_pattern = re.compile(r'<a\s+href="([^"]+)">(.*?)</a>')
113
- links = []
114
- def save_link(match):
115
- link_idx = len(links)
116
- links.append((match.group(1), match.group(2)))
117
- return f"###LINK_{link_idx}###"
118
- text = link_pattern.sub(save_link, text)
119
- text = re.sub(r'<b>(.*?)</b>', lambda m: f'###BOLD_START###{m.group(1)}###BOLD_END###', text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  emoji_pattern = re.compile(
121
  r"([\U0001F300-\U0001F5FF"
122
  r"\U0001F600-\U0001F64F"
@@ -131,28 +134,32 @@ def apply_emoji_font(text, emoji_font):
131
  r"\u2600-\u26FF"
132
  r"\u2700-\u27BF]+)"
133
  )
 
134
  def replace_emoji(match):
135
  emoji = match.group(1)
136
  emoji = unicodedata.normalize('NFC', emoji)
137
  return f'<font face="{emoji_font}">{emoji}</font>'
138
- segments = []
139
- last_pos = 0
140
- for match in emoji_pattern.finditer(text):
141
- start, end = match.span()
142
- if last_pos < start:
143
- segments.append(f'<font face="DejaVuSans">{text[last_pos:start]}</font>')
144
- segments.append(replace_emoji(match))
145
- last_pos = end
146
- if last_pos < len(text):
147
- segments.append(f'<font face="DejaVuSans">{text[last_pos:]}</font>')
148
- combined_text = ''.join(segments)
149
- combined_text = combined_text.replace('###BOLD_START###', '</font><b><font face="DejaVuSans">')
150
- combined_text = combined_text.replace('###BOLD_END###', '</font></b><font face="DejaVuSans">')
151
- for i, (url, label) in enumerate(links):
152
- placeholder = f"###LINK_{i}###"
153
- link_html = f'<a href="{url}"><font face="DejaVuSans">{label}</font></a>'
154
- combined_text = combined_text.replace(placeholder, link_html)
155
- return combined_text
 
 
 
156
 
157
  def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered, headings_to_fonts):
158
  lines = markdown_text.strip().split('\n')
@@ -500,9 +507,8 @@ with st.sidebar:
500
  enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered")
501
  add_space_before_numbered = st.checkbox("Add Space Ahead of Numbered Lines", value=False, key="add_space_before_numbered")
502
 
503
- # Here we use a font that has more emojis
504
  headings_to_fonts = st.checkbox("Headings to Fonts", value=False, key="headings_to_fonts",
505
- help="Convert Markdown headings (# Heading) and emphasis (*word*) to appropriate font styles")
506
 
507
  auto_columns = st.checkbox("AutoColumns", value=False, key="auto_columns")
508
 
 
80
  return filename
81
 
82
  def detect_and_convert_links(text):
83
+ # Convert Markdown links [text](url) to HTML <a> tags
84
+ md_link_pattern = re.compile(r'\[(.*?)\]\((https?://[^\s\[\]()<>{}]+)\)')
85
+ text = md_link_pattern.sub(r'<a href="\2">\1</a>', text)
86
+
87
+ # Convert plain URLs to HTML <a> tags, avoiding already tagged links
88
  url_pattern = re.compile(
89
+ r'(?<!href=")(https?://[^\s\[\]()<>{}]+)',
90
  re.IGNORECASE
91
  )
92
+ def replace_url(match):
93
+ url = match.group(1)
94
+ return f'<a href="{url}">{url}</a>'
95
+
96
+ text = url_pattern.sub(replace_url, text)
97
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  def apply_emoji_font(text, emoji_font):
100
+ # Preserve links and bold tags
101
+ link_pattern = re.compile(r'(<a\s+href="[^"]+">.*?</a>)')
102
+ bold_pattern = re.compile(r'(<b>.*?</b>)')
103
+
104
+ # Split text around links and bold tags
105
+ segments = []
106
+ last_pos = 0
107
+ for match in link_pattern.finditer(text):
108
+ start, end = match.span()
109
+ if last_pos < start:
110
+ segments.append(('text', text[last_pos:start]))
111
+ segments.append(('link', match.group(0)))
112
+ last_pos = end
113
+ for match in bold_pattern.finditer(text[last_pos:]):
114
+ start, end = match.span()
115
+ if last_pos < start + last_pos:
116
+ segments.append(('text', text[last_pos:start + last_pos]))
117
+ segments.append(('bold', match.group(0)))
118
+ last_pos = start + end
119
+ if last_pos < len(text):
120
+ segments.append(('text', text[last_pos:]))
121
+
122
+ # Apply emoji font to text segments only
123
  emoji_pattern = re.compile(
124
  r"([\U0001F300-\U0001F5FF"
125
  r"\U0001F600-\U0001F64F"
 
134
  r"\u2600-\u26FF"
135
  r"\u2700-\u27BF]+)"
136
  )
137
+
138
  def replace_emoji(match):
139
  emoji = match.group(1)
140
  emoji = unicodedata.normalize('NFC', emoji)
141
  return f'<font face="{emoji_font}">{emoji}</font>'
142
+
143
+ result = []
144
+ for seg_type, content in segments:
145
+ if seg_type == 'text':
146
+ # Apply font to non-emoji text and emoji separately
147
+ parts = []
148
+ last_pos = 0
149
+ for match in emoji_pattern.finditer(content):
150
+ start, end = match.span()
151
+ if last_pos < start:
152
+ parts.append(f'<font face="DejaVuSans">{content[last_pos:start]}</font>')
153
+ parts.append(replace_emoji(match))
154
+ last_pos = end
155
+ if last_pos < len(content):
156
+ parts.append(f'<font face="DejaVuSans">{content[last_pos:]}</font>')
157
+ result.append(''.join(parts))
158
+ else:
159
+ # Keep links and bold tags unchanged
160
+ result.append(content)
161
+
162
+ return ''.join(result)
163
 
164
  def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered, headings_to_fonts):
165
  lines = markdown_text.strip().split('\n')
 
507
  enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered")
508
  add_space_before_numbered = st.checkbox("Add Space Ahead of Numbered Lines", value=False, key="add_space_before_numbered")
509
 
 
510
  headings_to_fonts = st.checkbox("Headings to Fonts", value=False, key="headings_to_fonts",
511
+ help="Convert Markdown headings (# Heading) and emphasis (*word*) to appropriate font styles")
512
 
513
  auto_columns = st.checkbox("AutoColumns", value=False, key="auto_columns")
514