awacke1 commited on
Commit
da62dd8
ยท
verified ยท
1 Parent(s): 0c2cfb0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +199 -1185
app.py CHANGED
@@ -1,1208 +1,175 @@
1
  import io
2
  import re
3
- import streamlit as st
4
- import glob
5
  import os
 
 
 
 
 
6
  from PIL import Image
7
  import fitz
 
8
  from reportlab.lib.pagesizes import A4
9
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
10
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
11
  from reportlab.lib import colors
12
  from reportlab.pdfbase import pdfmetrics
13
  from reportlab.pdfbase.ttfonts import TTFont
14
- import unicodedata
15
- import asyncio
16
- import websockets
17
- import uuid
18
- from datetime import datetime
19
- import random
20
- import time
21
- import hashlib
22
- import base64
23
- import streamlit.components.v1 as components
24
- import edge_tts
25
- from audio_recorder_streamlit import audio_recorder
26
- import nest_asyncio
27
- import pytz
28
- import shutil
29
- import anthropic
30
- import openai
31
- from PyPDF2 import PdfReader
32
- import threading
33
- import json
34
- import zipfile
35
- from gradio_client import Client
36
- from dotenv import load_dotenv
37
- from streamlit_marquee import streamlit_marquee
38
- from collections import defaultdict, Counter
39
- import pandas as pd
40
-
41
- nest_asyncio.apply()
42
 
43
  st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
44
 
45
- icons = '๐Ÿค–๐Ÿง ๐Ÿ”ฌ๐Ÿ“'
46
- Site_Name = '๐Ÿค–๐Ÿง Chat & Quote Node๐Ÿ“๐Ÿ”ฌ'
47
- START_ROOM = "Sector ๐ŸŒŒ"
48
- FUN_USERNAMES = {
49
- "CosmicJester ๐ŸŒŒ": "en-US-AriaNeural",
50
- "PixelPanda ๐Ÿผ": "en-US-JennyNeural",
51
- "QuantumQuack ๐Ÿฆ†": "en-GB-SoniaNeural",
52
- "StellarSquirrel ๐Ÿฟ๏ธ": "en-AU-NatashaNeural",
53
- "GizmoGuru โš™๏ธ": "en-CA-ClaraNeural",
54
- "NebulaNinja ๐ŸŒ ": "en-US-GuyNeural",
55
- "ByteBuster ๐Ÿ’พ": "en-GB-RyanNeural",
56
- "GalacticGopher ๐ŸŒ": "en-AU-WilliamNeural",
57
- "RocketRaccoon ๐Ÿš€": "en-CA-LiamNeural",
58
- "EchoElf ๐Ÿง": "en-US-AnaNeural",
59
- "PhantomFox ๐ŸฆŠ": "en-US-BrandonNeural",
60
- "WittyWizard ๐Ÿง™": "en-GB-ThomasNeural",
61
- "LunarLlama ๐ŸŒ™": "en-AU-FreyaNeural",
62
- "SolarSloth โ˜€๏ธ": "en-CA-LindaNeural",
63
- "AstroAlpaca ๐Ÿฆ™": "en-US-ChristopherNeural",
64
- "CyberCoyote ๐Ÿบ": "en-GB-ElliotNeural",
65
- "MysticMoose ๐ŸฆŒ": "en-AU-JamesNeural",
66
- "GlitchGnome ๐Ÿงš": "en-CA-EthanNeural",
67
- "VortexViper ๐Ÿ": "en-US-AmberNeural",
68
- "ChronoChimp ๐Ÿ’": "en-GB-LibbyNeural"
69
- }
70
- EDGE_TTS_VOICES = list(set(FUN_USERNAMES.values()))
71
- FILE_EMOJIS = {"md": "๐Ÿ“", "mp3": "๐ŸŽต", "png": "๐Ÿ–ผ๏ธ", "mp4": "๐ŸŽฅ", "zip": "๐Ÿ“ฆ"}
72
-
73
- for d in ["chat_logs", "vote_logs", "audio_logs", "history_logs", "audio_cache", "paper_metadata"]:
74
- os.makedirs(d, exist_ok=True)
75
-
76
- CHAT_DIR = "chat_logs"
77
- VOTE_DIR = "vote_logs"
78
- MEDIA_DIR = "."
79
- AUDIO_CACHE_DIR = "audio_cache"
80
- AUDIO_DIR = "audio_logs"
81
- PAPER_DIR = "paper_metadata"
82
- STATE_FILE = "user_state.txt"
83
-
84
- CHAT_FILE = os.path.join(CHAT_DIR, "global_chat.md")
85
- QUOTE_VOTES_FILE = os.path.join(VOTE_DIR, "quote_votes.md")
86
- IMAGE_VOTES_FILE = os.path.join(VOTE_DIR, "image_votes.md")
87
- HISTORY_FILE = os.path.join(VOTE_DIR, "vote_history.md")
88
-
89
- load_dotenv()
90
- anthropic_key = os.getenv('ANTHROPIC_API_KEY', st.secrets.get('ANTHROPIC_API_KEY', ""))
91
- openai_api_key = os.getenv('OPENAI_API_KEY', st.secrets.get('OPENAI_API_KEY', ""))
92
- openai_client = openai.OpenAI(api_key=openai_api_key)
93
-
94
- def format_timestamp_prefix(username=""):
95
- central = pytz.timezone('US/Central')
96
- now = datetime.now(central)
97
- return f"{now.strftime('%Y%m%d_%H%M%S')}-by-{username}"
98
-
99
- class PerformanceTimer:
100
- def __init__(self, name):
101
- self.name, self.start = name, None
102
- def __enter__(self):
103
- self.start = time.time()
104
- return self
105
- def __exit__(self, *args):
106
- duration = time.time() - self.start
107
- st.session_state['operation_timings'][self.name] = duration
108
- st.session_state['performance_metrics'][self.name].append(duration)
109
-
110
- def init_session_state():
111
- defaults = {
112
- 'server_running': False, 'server_task': None, 'active_connections': {},
113
- 'media_notifications': [], 'last_chat_update': 0, 'displayed_chat_lines': [],
114
- 'message_text': "", 'audio_cache': {}, 'pasted_image_data': None,
115
- 'quote_line': None, 'refresh_rate': 10, 'base64_cache': {},
116
- 'transcript_history': [], 'last_transcript': "", 'image_hashes': set(),
117
- 'tts_voice': "en-US-AriaNeural", 'chat_history': [], 'marquee_settings': {
118
- "background": "#1E1E1E", "color": "#FFFFFF", "font-size": "14px",
119
- "animationDuration": "20s", "width": "100%", "lineHeight": "35px"
120
- }, 'operation_timings': {}, 'performance_metrics': defaultdict(list),
121
- 'enable_audio': True, 'download_link_cache': {}, 'username': None,
122
- 'autosend': True, 'autosearch': True, 'last_message': "", 'last_query': "",
123
- 'mp3_files': {}, 'timer_start': time.time(), 'quote_index': 0,
124
- 'quote_source': "famous", 'last_sent_transcript': "", 'old_val': None,
125
- 'last_refresh': time.time(), 'paper_metadata': {}, 'paste_image_base64': "",
126
- 'use_arxiv': True, 'use_arxiv_audio': False, 'speech_processed': False,
127
- 'auto_refresh': True
128
- }
129
- for k, v in defaults.items():
130
- if k not in st.session_state:
131
- st.session_state[k] = v
132
-
133
- def update_marquee_settings_ui():
134
- st.sidebar.markdown("### ๐ŸŽฏ Marquee Settings")
135
- cols = st.sidebar.columns(2)
136
- with cols[0]:
137
- st.session_state['marquee_settings']['background'] = st.color_picker("๐ŸŽจ Background", "#1E1E1E")
138
- st.session_state['marquee_settings']['color'] = st.color_picker("โœ๏ธ Text", "#FFFFFF")
139
- with cols[1]:
140
- st.session_state['marquee_settings']['font-size'] = f"{st.slider('๐Ÿ“ Size', 10, 24, 14)}px"
141
- st.session_state['marquee_settings']['animationDuration'] = f"{st.slider('โฑ๏ธ Speed', 1, 20, 20)}s"
142
-
143
- def display_marquee(text, settings, key_suffix=""):
144
- truncated = text[:280] + "..." if len(text) > 280 else text
145
- streamlit_marquee(content=truncated, **settings, key=f"marquee_{key_suffix}")
146
- st.write("")
147
-
148
- def clean_text_for_tts(text):
149
- return re.sub(r'[#*!\[\]]+', '', ' '.join(text.split()))[:200] or "No text"
150
-
151
- def clean_text_for_filename(text):
152
- return '_'.join(re.sub(r'[^\w\s-]', '', text.lower()).split())[:50]
153
-
154
- def get_high_info_terms(text, top_n=10):
155
- stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'}
156
- words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
157
- bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
158
- filtered = [t for t in words + bi_grams if t not in stop_words and len(t.split()) <= 2]
159
- return [t for t, _ in Counter(filtered).most_common(top_n)]
160
-
161
- def generate_filename(prompt, username, file_type="md", title=None):
162
- timestamp = format_timestamp_prefix(username)
163
- if title:
164
- high_info = '-'.join(get_high_info_terms(title, 5))
165
- return f"{timestamp}-{clean_text_for_filename(prompt[:20])}-{high_info}.{file_type}"
166
- hash_val = hashlib.md5(prompt.encode()).hexdigest()[:8]
167
- return f"{timestamp}-{hash_val}.{file_type}"
168
-
169
- def create_file(prompt, username, file_type="md", title=None):
170
- filename = generate_filename(prompt, username, file_type, title)
171
- with open(filename, 'w', encoding='utf-8') as f:
172
- f.write(prompt)
173
  return filename
174
 
175
- def get_download_link(file, file_type="mp3"):
176
- cache_key = f"dl_{file}"
177
- if cache_key not in st.session_state['download_link_cache']:
178
- with open(file, "rb") as f:
179
- b64 = base64.b64encode(f.read()).decode()
180
- mime_types = {"mp3": "audio/mpeg", "png": "image/png", "mp4": "video/mp4", "md": "text/markdown", "zip": "application/zip"}
181
- st.session_state['download_link_cache'][cache_key] = f'<a href="data:{mime_types.get(file_type, "application/octet-stream")};base64,{b64}" download="{os.path.basename(file)}">{FILE_EMOJIS.get(file_type, "Download")} Download {os.path.basename(file)}</a>'
182
- return st.session_state['download_link_cache'][cache_key]
183
-
184
- def save_username(username):
185
- try:
186
- with open(STATE_FILE, 'w') as f:
187
- f.write(username)
188
- except Exception as e:
189
- print(f"Failed to save username: {e}")
190
-
191
- def load_username():
192
- if os.path.exists(STATE_FILE):
193
- try:
194
- with open(STATE_FILE, 'r') as f:
195
- return f.read().strip()
196
- except Exception as e:
197
- print(f"Failed to load username: {e}")
198
- return None
199
-
200
- def concatenate_markdown_files(exclude_files=["README.md"]):
201
- md_files = sorted([f for f in glob.glob("*.md") if os.path.basename(f) not in exclude_files], key=os.path.getmtime)
202
- all_md_content = ""
203
- for i, md_file in enumerate(md_files, 1):
204
- with open(md_file, 'r', encoding='utf-8') as f:
205
- content = f.read().strip()
206
- all_md_content += f"{i}. {content}\n"
207
- return all_md_content.rstrip()
208
-
209
- def get_chat_text_only(exclude_files=["README.md"]):
210
- md_files = sorted([f for f in glob.glob("*.md") if os.path.basename(f) not in exclude_files], key=os.path.getmtime)
211
- chat_text = ""
212
- for i, md_file in enumerate(md_files, 1):
213
- with open(md_file, 'r', encoding='utf-8') as f:
214
- content = f.read().strip()
215
- lines = content.split('\n')
216
- for line in lines:
217
- if line.strip() and not line.startswith('#'):
218
- match = re.match(r'\[(.*?)\]\s(.*?)\s\((.*?)\):\s*(.*)', line)
219
- if match:
220
- message = match.group(4).strip()
221
- if message.startswith('```markdown'):
222
- message = message.replace('```markdown', '').replace('```', '').strip()
223
- chat_text += f"{message}\n"
224
- return chat_text.rstrip()
225
-
226
- async def async_edge_tts_generate(text, voice, username, rate=0, pitch=0, file_format="mp3"):
227
- cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
228
- if cache_key in st.session_state['audio_cache']:
229
- return st.session_state['audio_cache'][cache_key], 0
230
- start_time = time.time()
231
- text = clean_text_for_tts(text)
232
- if not text or text == "No text":
233
- print(f"Skipping audio generation for empty/invalid text: '{text}'")
234
- return None, 0
235
- filename = f"{format_timestamp_prefix(username)}-{hashlib.md5(text.encode()).hexdigest()[:8]}.{file_format}"
236
- try:
237
- communicate = edge_tts.Communicate(text, voice, rate=f"{rate:+d}%", pitch=f"{pitch:+d}Hz")
238
- await communicate.save(filename)
239
- if os.path.exists(filename) and os.path.getsize(filename) > 0:
240
- st.session_state['audio_cache'][cache_key] = filename
241
- return filename, time.time() - start_time
242
- else:
243
- print(f"Audio file {filename} was not created or is empty.")
244
- return None, 0
245
- except edge_tts.exceptions.NoAudioReceived as e:
246
- print(f"No audio received for text: '{text}' with voice: {voice}. Error: {e}")
247
- return None, 0
248
- except Exception as e:
249
- print(f"Error generating audio for text: '{text}' with voice: {voice}. Error: {e}")
250
- return None, 0
251
-
252
- def play_and_download_audio(file_path):
253
- if file_path and os.path.exists(file_path):
254
- st.audio(file_path)
255
- st.markdown(get_download_link(file_path), unsafe_allow_html=True)
256
- else:
257
- st.warning(f"Audio file not found: {file_path}")
258
-
259
- def load_mp3_viewer():
260
- mp3_files = sorted(glob.glob("*.mp3"), key=os.path.getmtime)
261
- for i, mp3 in enumerate(mp3_files, 1):
262
- filename = os.path.basename(mp3)
263
- if filename not in st.session_state['mp3_files']:
264
- st.session_state['mp3_files'][filename] = (i, mp3)
265
-
266
- async def save_chat_entry(username, message, voice, is_markdown=False):
267
- if not message.strip() or message == st.session_state.last_transcript:
268
- return None, None
269
- central = pytz.timezone('US/Central')
270
- timestamp = datetime.now(central).strftime("%Y-%m-%d %H:%M:%S")
271
- entry = f"[{timestamp}] {username} ({voice}): {message}" if not is_markdown else f"[{timestamp}] {username} ({voice}):\n```markdown\n{message}\n```"
272
- md_file = create_file(entry, username, "md")
273
- with open(CHAT_FILE, 'a') as f:
274
- f.write(f"{entry}\n")
275
- audio_file, _ Distance = await async_edge_tts_generate(message, voice, username)
276
- if audio_file:
277
- with open(HISTORY_FILE, 'a') as f:
278
- f.write(f"[{timestamp}] {username}: Audio - {audio_file}\n")
279
- st.session_state['mp3_files'][os.path.basename(audio_file)] = (len(st.session_state['chat_history']) + 1, audio_file)
280
- if st.session_state.get('speech_processed', False) and st.session_state.get('message_input', '') == message:
281
- st.session_state['message_input'] = ""
282
- st.session_state['speech_processed'] = False
283
- else:
284
- st.warning(f"Failed to generate audio for: {message}")
285
- await broadcast_message(f"{username}|{message}", "chat")
286
- st.session_state.last_chat_update = time.time()
287
- st.session_state.chat_history.append(entry)
288
- st.session_state.last_transcript = message
289
- return md_file, audio_file
290
-
291
- async def load_chat():
292
- if not os.path.exists(CHAT_FILE):
293
- with open(CHAT_FILE, 'a') as f:
294
- f.write(f"# {START_ROOM} Chat\n\nWelcome to the cosmic hub! ๐ŸŽค\n")
295
- with open(CHAT_FILE, 'r') as f:
296
- content = f.read().strip()
297
- lines = content.split('\n')
298
- unique_lines = list(dict.fromkeys(line for line in lines if line.strip()))
299
- return unique_lines
300
-
301
- async def perform_claude_search(query, username, image=None):
302
- if not query.strip() or query == st.session_state.last_transcript:
303
- return None, None, None
304
- client = anthropic.Anthropic(api_key=anthropic_key)
305
- message_content = [{"type": "text", "text": query}]
306
- if image:
307
- buffered = io.BytesIO()
308
- image.save(buffered, format="PNG")
309
- img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
310
- message_content.append({
311
- "type": "image",
312
- "source": {
313
- "type": "base64",
314
- "media_type": "image/png",
315
- "data": img_base64
316
- }
317
- })
318
- try:
319
- response = client.messages.create(
320
- model="claude-3-sonnet-20240229",
321
- max_tokens=1000,
322
- messages=[{"role": "user", "content": message_content}]
323
- )
324
- result = response.content[0].text
325
- st.markdown(f"### Claude's Reply ๐Ÿง \n{result}")
326
- except Exception as e:
327
- st.error(f"Claude processing failed: {e}")
328
- return None, None, None
329
-
330
- voice = FUN_USERNAMES.get(username, "en-US-AriaNeural")
331
- full_text = f"Prompt: {query}\nResponse: {result}"
332
- md_file, audio_file = await save_chat_entry(username, full_text, voice, True)
333
- return md_file, audio_file, result
334
-
335
- async def perform_arxiv_search(query, username, claude_result=None):
336
- if not query.strip() or query == st.session_state.last_transcript:
337
- return None, None
338
- if claude_result is None:
339
- client = anthropic.Anthropic(api_key=anthropic_key)
340
- claude_response = client.messages.create(
341
- model="claude-3-sonnet-20240229",
342
- max_tokens=1000,
343
- messages=[{"role": "user", "content": query}]
344
- )
345
- claude_result = claude_response.content[0].text
346
- st.markdown(f"### Claude's Reply ๐Ÿง \n{claude_result}")
347
-
348
- enhanced_query = f"{query}\n\n{claude_result}"
349
- gradio_client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
350
- refs = gradio_client.predict(
351
- enhanced_query, 10, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md"
352
- )[0]
353
- result = f"๐Ÿ”Ž {enhanced_query}\n\n{refs}"
354
- voice = FUN_USERNAMES.get(username, "en-US-AriaNeural")
355
- md_file, audio_file = await save_chat_entry(username, result, voice, True)
356
- return md_file, audio_file
357
-
358
- async def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False, useArxiv=True, useArxivAudio=False):
359
- start = time.time()
360
- client = anthropic.Anthropic(api_key=anthropic_key)
361
- response = client.messages.create(
362
- model="claude-3-sonnet-20240229",
363
- max_tokens=1000,
364
- messages=[{"role": "user", "content": q}]
365
  )
366
- st.write("Claude's reply ๐Ÿง :")
367
- st.markdown(response.content[0].text)
368
-
369
- result = response.content[0].text
370
- md_file = create_file(result, "System", "md")
371
- audio_file, _ = await async_edge_tts_generate(result, st.session_state['tts_voice'], "System")
372
- st.subheader("๐Ÿ“ Main Response Audio")
373
- play_and_download_audio(audio_file)
374
-
375
- papers = []
376
- if useArxiv:
377
- q = q + result
378
- st.write('Running Arxiv RAG with Claude inputs.')
379
- gradio_client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
380
- refs = gradio_client.predict(
381
- q, 20, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md"
382
- )[0]
383
- papers = parse_arxiv_refs(refs, q)
384
- for paper in papers:
385
- filename = create_file(generate_5min_feature_markdown(paper), "System", "md", paper['title'])
386
- paper['md_file'] = filename
387
- st.session_state['paper_metadata'][paper['title']] = filename
388
- if papers and useArxivAudio:
389
- await create_paper_audio_files(papers, q)
390
- elapsed = time.time() - start
391
- st.write(f"**Total Elapsed:** {elapsed:.2f} s")
392
- return result, papers
393
-
394
- async def websocket_handler(websocket, path):
395
- client_id = str(uuid.uuid4())
396
- room_id = "chat"
397
- if room_id not in st.session_state.active_connections:
398
- st.session_state.active_connections[room_id] = {}
399
- st.session_state.active_connections[room_id][client_id] = websocket
400
- username = st.session_state.get('username', random.choice(list(FUN_USERNAMES.keys())))
401
- chat_content = await load_chat()
402
- if not any(f"Client-{client_id}" in line for line in chat_content):
403
- await save_chat_entry("System ๐ŸŒŸ", f"{username} has joined {START_ROOM}!", "en-US-AriaNeural")
404
- try:
405
- async for message in websocket:
406
- if '|' in message:
407
- username, content = message.split('|', 1)
408
- voice = FUN_USERNAMES.get(username, "en-US-AriaNeural")
409
- await save_chat_entry(username, content, voice)
410
- else:
411
- await websocket.send("ERROR|Message format: username|content")
412
- except websockets.ConnectionClosed:
413
- await save_chat_entry("System ๐ŸŒŸ", f"{username} has left {START_ROOM}!", "en-US-AriaNeural")
414
- finally:
415
- if room_id in st.session_state.active_connections and client_id in st.session_state.active_connections[room_id]:
416
- del st.session_state.active_connections[room_id][client_id]
417
-
418
- async def broadcast_message(message, room_id):
419
- if room_id in st.session_state.active_connections:
420
- disconnected = []
421
- for client_id, ws in st.session_state.active_connections[room_id].items():
422
- try:
423
- await ws.send(message)
424
- except websockets.ConnectionClosed:
425
- disconnected.append(client_id)
426
- for client_id in disconnected:
427
- if client_id in st.session_state.active_connections[room_id]:
428
- del st.session_state.active_connections[room_id][client_id]
429
-
430
- async def run_websocket_server():
431
- if not st.session_state.get('server_running', False):
432
- server = await websockets.serve(websocket_handler, '0.0.0.0', 8765)
433
- st.session_state['server_running'] = True
434
- await server.wait_closed()
435
-
436
- def start_websocket_server():
437
- loop = asyncio.new_event_loop()
438
- asyncio.set_event_loop(loop)
439
- loop.run_until_complete(run_websocket_server())
440
-
441
- class AudioProcessor:
442
- def __init__(self):
443
- self.cache_dir = AUDIO_CACHE_DIR
444
- os.makedirs(self.cache_dir, exist_ok=True)
445
- self.metadata = json.load(open(f"{self.cache_dir}/metadata.json")) if os.path.exists(f"{self.cache_dir}/metadata.json") else {}
446
-
447
- def _save_metadata(self):
448
- with open(f"{self.cache_dir}/metadata.json", 'w') as f:
449
- json.dump(self.metadata, f)
450
-
451
- async def create_audio(self, text, voice='en-US-AriaNeural'):
452
- cache_key = hashlib.md5(f"{text}:{voice}".encode()).hexdigest()
453
- cache_path = f"{self.cache_dir}/{cache_key}.mp3"
454
- if cache_key in self.metadata and os.path.exists(cache_path):
455
- return cache_path
456
- text = clean_text_for_tts(text)
457
- if not text:
458
- return None
459
- communicate = edge_tts.Communicate(text, voice)
460
- await communicate.save(cache_path)
461
- self.metadata[cache_key] = {'timestamp': datetime.now().isoformat(), 'text_length': len(text), 'voice': voice}
462
- self._save_metadata()
463
- return cache_path
464
-
465
- def process_pdf(pdf_file, max_pages, voice, audio_processor):
466
- reader = PdfReader(pdf_file)
467
- total_pages = min(len(reader.pages), max_pages)
468
- texts, audios = [], {}
469
- async def process_page(i, text):
470
- audio_path = await audio_processor.create_audio(text, voice)
471
- if audio_path:
472
- audios[i] = audio_path
473
- for i in range(total_pages):
474
- text = reader.pages[i].extract_text()
475
- texts.append(text)
476
- threading.Thread(target=lambda: asyncio.run(process_page(i, text))).start()
477
- return texts, audios, total_pages
478
-
479
- def parse_arxiv_refs(ref_text, query):
480
- if not ref_text:
481
- return []
482
- papers = []
483
- current = {}
484
- for line in ref_text.split('\n'):
485
- if line.count('|') == 2:
486
- if current:
487
- papers.append(current)
488
- date, title, *_ = line.strip('* ').split('|')
489
- url = re.search(r'(https://arxiv.org/\S+)', line).group(1) if re.search(r'(https://arxiv.org/\S+)', line) else f"paper_{len(papers)}"
490
- current = {'date': date, 'title': title, 'url': url, 'authors': '', 'summary': '', 'full_audio': None, 'download_base64': '', 'query': query}
491
- elif current:
492
- if not current['authors']:
493
- current['authors'] = line.strip('* ')
494
- else:
495
- current['summary'] += ' ' + line.strip() if current['summary'] else line.strip()
496
- if current:
497
- papers.append(current)
498
- return papers[:20]
499
-
500
- def generate_5min_feature_markdown(paper):
501
- title, summary, authors, date, url = paper['title'], paper['summary'], paper['authors'], paper['date'], paper['url']
502
- pdf_url = url.replace("abs", "pdf") + (".pdf" if not url.endswith(".pdf") else "")
503
- wct, sw = len(title.split()), len(summary.split())
504
- terms = get_high_info_terms(summary, 15)
505
- rouge = round((len(terms) / max(sw, 1)) * 100, 2)
506
- mermaid = "```mermaid\nflowchart TD\n" + "\n".join(f' T{i+1}["{terms[i]}"] --> T{i+2}["{terms[i+1]}"]' for i in range(len(terms)-1)) + "\n```"
507
- return f"""
508
- ## ๐Ÿ“„ {title}
509
- **Authors:** {authors}
510
- **Date:** {date}
511
- **Words:** Title: {wct}, Summary: {sw}
512
- **Links:** [Abstract]({url}) | [PDF]({pdf_url})
513
- **Terms:** {', '.join(terms)}
514
- **ROUGE:** {rouge}%
515
- ### ๐ŸŽค TTF Read Aloud
516
- - **Title:** {title}
517
- - **Terms:** {', '.join(terms)}
518
- - **ROUGE:** {rouge}%
519
- #### Concepts Graph
520
- {mermaid}
521
- ---
522
- """
523
-
524
- async def create_paper_audio_files(papers, query):
525
- for p in papers:
526
- audio_text = clean_text_for_tts(f"{p['title']} by {p['authors']}. {p['summary']}")
527
- p['full_audio'], _ = await async_edge_tts_generate(audio_text, st.session_state['tts_voice'], p['authors'])
528
- if p['full_audio']:
529
- p['download_base64'] = get_download_link(p['full_audio'])
530
-
531
- def save_vote(file, item, user_hash):
532
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
533
- entry = f"[{timestamp}] {user_hash} voted for {item}"
534
- try:
535
- with open(file, 'a') as f:
536
- f.write(f"{entry}\n")
537
- with open(HISTORY_FILE, 'a') as f:
538
- f.write(f"- {timestamp} - User {user_hash} voted for {item}\n")
539
- return True
540
- except Exception as e:
541
- print(f"Vote save flop: {e}")
542
- return False
543
-
544
- def load_votes(file):
545
- if not os.path.exists(file):
546
- with open(file, 'w') as f:
547
- f.write("# Vote Tally\n\nNo votes yet - get clicking! ๐Ÿ–ฑ๏ธ\n")
548
- try:
549
- with open(file, 'r') as f:
550
- lines = f.read().strip().split('\n')
551
- votes = {}
552
- for line in lines[2:]:
553
- if line.strip() and 'voted for' in line:
554
- item = line.split('voted for ')[1]
555
- votes[item] = votes.get(item, 0) + 1
556
- return votes
557
- except Exception as e:
558
- print(f"Vote load oopsie: {e}")
559
- return {}
560
-
561
- def generate_user_hash():
562
- if 'user_hash' not in st.session_state:
563
- session_id = str(random.getrandbits(128))
564
- hash_object = hashlib.md5(session_id.encode())
565
- st.session_state['user_hash'] = hash_object.hexdigest()[:8]
566
- return st.session_state['user_hash']
567
-
568
- async def save_pasted_image(image, username, prompt=""):
569
- img_hash = hashlib.md5(image.tobytes()).hexdigest()[:8]
570
- if img_hash in st.session_state.image_hashes:
571
- return None
572
- context = prompt if prompt else st.session_state.get('last_message', "pasted_image")
573
- timestamp = format_timestamp_prefix(username)
574
- filename = f"{timestamp}-{clean_text_for_filename(context)}-{img_hash}.png"
575
- filepath = filename
576
- try:
577
- image.save(filepath, "PNG")
578
- st.session_state.image_hashes.add(img_hash)
579
- await save_chat_entry(username, f"Pasted image saved: {filepath}", FUN_USERNAMES.get(username, "en-US-AriaNeural"))
580
- return filepath
581
- except Exception as e:
582
- st.error(f"Failed to save image: {e}")
583
- return None
584
-
585
- def create_zip_of_files(files, prefix="All", query="latest"):
586
- if not files:
587
- return None
588
- terms = get_high_info_terms(" ".join([open(f, 'r', encoding='utf-8').read() if f.endswith('.md') else os.path.splitext(os.path.basename(f))[0].replace('_', ' ') for f in files] + [query]), 5)
589
- zip_name = f"{prefix}_{format_timestamp_prefix()}_{'-'.join(terms)[:20]}.zip"
590
- with zipfile.ZipFile(zip_name, 'w') as z:
591
- [z.write(f) for f in files]
592
- return zip_name
593
-
594
- def delete_files(file_type, exclude_files=["README.md"]):
595
- files = glob.glob(f"*.{file_type}")
596
- if file_type == "md":
597
- files = [f for f in files if os.path.basename(f) not in exclude_files]
598
- for f in files:
599
- try:
600
- os.remove(f)
601
- st.session_state['mp3_files'] = {k: v for k, v in st.session_state['mp3_files'].items() if not k.endswith(f".{file_type}")}
602
- except Exception as e:
603
- st.error(f"Failed to delete {f}: {e}")
604
- if file_type in ["md", "mp3", "png", "mp4"]:
605
- st.session_state['download_link_cache'] = {}
606
-
607
- def paste_image_component():
608
- with st.form(key="paste_form"):
609
- paste_input = st.text_area("Paste Image Data Here", key="paste_input", height=100)
610
- submit_button = st.form_submit_button("Paste Image ๐Ÿ“‹")
611
-
612
- if submit_button and paste_input:
613
- try:
614
- if paste_input.startswith('data:image'):
615
- mime_type = paste_input.split(';')[0].split(':')[1]
616
- base64_str = paste_input.split(',')[1]
617
- img_bytes = base64.b64decode(base64_str)
618
- img = Image.open(io.BytesIO(img_bytes))
619
- st.image(img, caption=f"Pasted Image ({mime_type.split('/')[1].upper()})", use_column_width=True)
620
- return img, mime_type.split('/')[1]
621
- else:
622
- st.warning("Pasted data is not a recognized image format.")
623
- return None, None
624
- except Exception as e:
625
- st.error(f"Error decoding pasted image: {e}")
626
- return None, None
627
- return None, None
628
-
629
- def create_pdf_tab(default_markdown):
630
- font_files = glob.glob("*.ttf")
631
- if not font_files:
632
- st.error("No .ttf font files found in the current directory. Please add some, e.g., NotoEmoji-Bold.ttf and DejaVuSans.ttf.")
633
- return
634
- available_fonts = {os.path.splitext(os.path.basename(f))[0]: f for f in font_files}
635
-
636
- md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
637
- md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
638
-
639
- with st.sidebar:
640
- selected_md = st.selectbox("Select Markdown File", options=md_options, index=0 if md_options else -1)
641
- selected_font_name = st.selectbox("Select Emoji Font", options=list(available_fonts.keys()), index=0 if "NotoEmoji-Bold" in available_fonts else 0)
642
- selected_font_path = available_fonts[selected_font_name]
643
- base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1)
644
- plain_text_mode = st.checkbox("Render as Plain Text (Preserve Bold Only)", value=False)
645
- auto_bold_numbers = st.checkbox("Auto-Bold Numbered Lines", value=False)
646
- enlarge_font_size = st.checkbox("Enlarge Font Size for Numbered Lines", value=True)
647
- num_columns = st.selectbox("Number of Columns", options=[1, 2, 3, 4, 5, 6], index=3)
648
-
649
- if 'markdown_content' not in st.session_state or not md_options:
650
- st.session_state.markdown_content = default_markdown
651
-
652
- if md_options and selected_md:
653
- with open(f"{selected_md}.md", "r", encoding="utf-8") as f:
654
- st.session_state.markdown_content = f.read()
655
-
656
- edited_markdown = st.text_area("Modify the markdown content below:", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}")
657
- if st.button("Update PDF"):
658
- st.session_state.markdown_content = edited_markdown
659
- with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
660
- f.write(edited_markdown)
661
- st.rerun()
662
-
663
- st.download_button(label="Save Markdown", data=st.session_state.markdown_content, file_name=f"{selected_md}.md", mime="text/markdown")
664
-
665
- st.subheader("Voice Settings")
666
- voice = st.selectbox("Select Voice", options=EDGE_TTS_VOICES, index=0)
667
- if st.button("Generate MP3"):
668
- audio_file, _ = asyncio.run(async_edge_tts_generate(edited_markdown, voice, "System"))
669
- if audio_file:
670
- st.audio(audio_file)
671
- st.markdown(get_download_link(audio_file), unsafe_allow_html=True)
672
-
673
- if not md_options:
674
- st.warning("No .md files found in the directory (excluding README.md). Using default content.")
675
- return
676
-
677
  try:
678
- pdfmetrics.registerFont(TTFont(selected_font_name, selected_font_path))
 
 
 
 
 
 
 
 
 
 
679
  pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf"))
680
  except Exception as e:
681
- st.error(f"Failed to register fonts: {e}. Ensure both {selected_font_name}.ttf and DejaVuSans.ttf are in the directory.")
682
  return
683
-
684
- def apply_emoji_font(text, emoji_font):
685
- emoji_pattern = re.compile(
686
- r"([\U0001F300-\U0001F5FF"
687
- r"\U0001F600-\U0001F64F"
688
- r"\U0001F680-\U0001F6FF"
689
- r"\U0001F700-\U0001F77F"
690
- r"\U0001F780-\U0001F7FF"
691
- r"\U0001F800-\U0001F8FF"
692
- r"\U0001F900-\U0001F9FF"
693
- r"\U0001FA00-\U0001FA6F"
694
- r"\U0001FA70-\U0001FAFF"
695
- r"\u2600-\u26FF"
696
- r"\u2700-\u27BF]+)"
697
- )
698
-
699
- def replace_emoji(match):
700
- emoji = match.group(1)
701
- emoji = unicodedata.normalize('NFC', emoji)
702
- return f'<font face="{emoji_font}">{emoji}</font>'
703
-
704
- segments = []
705
- last_pos = 0
706
- for match in emoji_pattern.finditer(text):
707
- start, end = match.span()
708
- if last_pos < start:
709
- segments.append(f'<font face="DejaVuSans">{text[last_pos:start]}</font>')
710
- segments.append(replace_emoji(match))
711
- last_pos = end
712
- if last_pos < len(text):
713
- segments.append(f'<font face="DejaVuSans">{text[last_pos:]}</font>')
714
- return ''.join(segments)
715
-
716
- def markdown_to_pdf_content(markdown_text, plain_text_mode, auto_bold_numbers):
717
- lines = markdown_text.strip().split('\n')
718
- pdf_content = []
719
- number_pattern = re.compile(r'^\d+\.\s')
720
-
721
- if plain_text_mode:
722
- for line in lines:
723
- line = line.strip()
724
- if not line or line.startswith('# '):
725
- continue
726
- bold_pattern = re.compile(r'\*\*(.*?)\*\*')
727
- line = bold_pattern.sub(r'<b>\1</b>', line)
728
- line = re.sub(r'\*\*', '', line)
729
- pdf_content.append(line)
730
- else:
731
- for line in lines:
732
- line = line.strip()
733
- if not line or line.startswith('# '):
734
- continue
735
- bold_pattern = re.compile(r'\*\*(.*?)\*\*')
736
- if bold_pattern.search(line):
737
- line = bold_pattern.sub(r'<b>\1</b>', line)
738
- line = re.sub(r'\*\*', '', line)
739
- if line.startswith('## ') or line.startswith('### '):
740
- text = line.replace('## ', '').replace('### ', '').strip()
741
- pdf_content.append(f"<b>{text}</b>")
742
- elif auto_bold_numbers and number_pattern.match(line):
743
- pdf_content.append(f"<b>{line}</b>")
744
- else:
745
- pdf_content.append(line.strip())
746
-
747
- total_lines = len(pdf_content)
748
- return pdf_content, total_lines
749
-
750
- def create_pdf(markdown_text, base_font_size, plain_text_mode, num_columns, auto_bold_numbers, enlarge_font_size):
751
- buffer = io.BytesIO()
752
- page_width = A4[0] * 2
753
- page_height = A4[1]
754
- doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36)
755
- styles = getSampleStyleSheet()
756
- story = []
757
- spacer_height = 10
758
- section_spacer_height = 15
759
- pdf_content, total_lines = markdown_to_pdf_content(markdown_text, plain_text_mode, auto_bold_numbers)
760
-
761
- item_font_size = base_font_size
762
- section_font_size = base_font_size * 1.1
763
- numbered_font_size = base_font_size + 1 if enlarge_font_size else base_font_size
764
-
765
- section_style = ParagraphStyle(
766
- 'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
767
- textColor=colors.darkblue, fontSize=section_font_size, leading=section_font_size * 1.2, spaceAfter=2
768
- )
769
- item_style = ParagraphStyle(
770
- 'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
771
- fontSize=item_font_size, leading=item_font_size * 1.15, spaceAfter=1
772
- )
773
- numbered_style = ParagraphStyle(
774
- 'NumberedStyle', parent=styles['Normal'], fontName="DejaVuSans",
775
- fontSize=numbered_font_size, leading=numbered_font_size * 1.15, spaceAfter=1
776
- )
777
-
778
- story.append(Spacer(1, spacer_height))
779
- columns = [[] for _ in range(num_columns)]
780
- lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
781
- current_line_count = 0
782
- current_column = 0
783
-
784
- number_pattern = re.compile(r'^\d+\.\s')
785
- for i, item in enumerate(pdf_content):
786
- if i > 0 and number_pattern.match(item.replace('<b>', '').replace('</b>', '')):
787
- columns[current_column].append(Spacer(1, section_spacer_height))
788
-
789
- if current_line_count >= lines_per_column and current_column < num_columns - 1:
790
- current_column += 1
791
- current_line_count = 0
792
- columns[current_column].append(item)
793
- current_line_count += 1
794
-
795
- column_cells = [[] for _ in range(num_columns)]
796
- for col_idx, column in enumerate(columns):
797
- for item in column:
798
- if isinstance(item, Spacer):
799
- column_cells[col_idx].append(item)
800
- elif isinstance(item, str) and item.startswith('<b>'):
801
- text = item.replace('<b>', '').replace('</b>', '')
802
- column_cells[col_idx].append(Paragraph(apply_emoji_font(text, selected_font_name), section_style))
803
- elif number_pattern.match(item):
804
- column_cells[col_idx].append(Paragraph(apply_emoji_font(item, selected_font_name), numbered_style))
805
  else:
806
- column_cells[col_idx].append(Paragraph(apply_emoji_font(item, selected_font_name), item_style))
807
-
808
- max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
809
- for cells in column_cells:
810
- cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
811
-
812
- col_width = (page_width - 72) / num_columns if num_columns > 0 else page_width - 72
813
- table_data = list(zip(*column_cells)) if column_cells else [[]]
814
- table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
815
- table.setStyle(TableStyle([
816
- ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
817
- ('BACKGROUND', (0, 0), (-1, -1), colors.white), ('GRID', (0, 0), (-1, -1), 0, colors.white),
818
- ('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey),
819
- ('LEFTPADDING', (0, 0), (-1, -1), 2), ('RIGHTPADDING', (0, 0), (-1, -1), 2),
820
- ('TOPPADDING', (0, 0), (-1, -1), 1), ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
821
- ]))
822
-
823
- story.append(table)
824
- doc.build(story)
825
- buffer.seek(0)
826
- return buffer.getvalue()
827
-
828
- def pdf_to_image(pdf_bytes):
829
- try:
830
- doc = fitz.open(stream=pdf_bytes, filetype="pdf")
831
- images = []
832
- for page in doc:
833
- pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
834
- img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
835
- images.append(img)
836
- doc.close()
837
- return images
838
- except Exception as e:
839
- st.error(f"Failed to render PDF preview: {e}")
840
- return None
841
-
842
- with st.spinner("Generating PDF..."):
843
- pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, plain_text_mode, num_columns, auto_bold_numbers, enlarge_font_size)
844
-
845
- with st.container():
846
- pdf_images = pdf_to_image(pdf_bytes)
847
- if pdf_images:
848
- for img in pdf_images:
849
- st.image(img, use_container_width=True)
850
- else:
851
- st.info("Download the PDF to view it locally.")
852
-
853
- with st.sidebar:
854
- st.download_button(label="Download PDF", data=pdf_bytes, file_name="deities_guide.pdf", mime="application/pdf")
855
-
856
- def main():
857
- init_session_state()
858
- load_mp3_viewer()
859
- saved_username = load_username()
860
- if saved_username and saved_username in FUN_USERNAMES:
861
- st.session_state.username = saved_username
862
- if not st.session_state.username:
863
- available = [n for n in FUN_USERNAMES if not any(f"{n} has joined" in l for l in asyncio.run(load_chat()))]
864
- st.session_state.username = random.choice(available or list(FUN_USERNAMES.keys()))
865
- st.session_state.tts_voice = FUN_USERNAMES[st.session_state.username]
866
- asyncio.run(save_chat_entry("System ๐ŸŒŸ", f"{st.session_state.username} has joined {START_ROOM}!", "en-US-AriaNeural"))
867
- save_username(st.session_state.username)
868
-
869
- st.title(f"{Site_Name} for {st.session_state.username}")
870
- update_marquee_settings_ui()
871
- chat_text = get_chat_text_only()
872
- display_marquee(f"๐Ÿš€ Welcome to {START_ROOM} | ๐Ÿค– {st.session_state.username} | Chat: {chat_text}", st.session_state['marquee_settings'], "welcome")
873
-
874
- mycomponent = components.declare_component("mycomponent", path="mycomponent")
875
- val = mycomponent(my_input_value="", key=f"speech_{st.session_state.get('speech_processed', False)}")
876
- if val and val != st.session_state.last_transcript:
877
- val_stripped = val.strip().replace('\n', ' ')
878
- if val_stripped:
879
- voice = FUN_USERNAMES.get(st.session_state.username, "en-US-AriaNeural")
880
- st.session_state['speech_processed'] = True
881
- md_file, audio_file = asyncio.run(save_chat_entry(st.session_state.username, val_stripped, voice))
882
- if audio_file:
883
- play_and_download_audio(audio_file)
884
- st.rerun()
885
-
886
- tab_main = st.radio("Action:", ["๐ŸŽค Chat & Voice", "๐Ÿ” ArXiv", "๐Ÿ“š PDF to Audio", "๐Ÿ“œ PDF Output"], horizontal=True, key="tab_main")
887
- st.checkbox("Search ArXiv", key="use_arxiv")
888
- st.checkbox("ArXiv Audio", key="use_arxiv_audio")
889
- st.checkbox("Autosend Chat", key="autosend")
890
- st.checkbox("Autosearch ArXiv", key="autosearch")
891
-
892
- if tab_main == "๐ŸŽค Chat & Voice":
893
- st.subheader(f"{START_ROOM} Chat ๐Ÿ’ฌ")
894
- chat_content = asyncio.run(load_chat())
895
- chat_container = st.container()
896
- with chat_container:
897
- numbered_content = "\n".join(f"{i+1}. {line}" for i, line in enumerate(chat_content))
898
- st.code(numbered_content, language="python")
899
-
900
- message = st.text_input(f"Message as {st.session_state.username}", key="message_input")
901
-
902
- col_paste, col_upload = st.columns(2)
903
- with col_paste:
904
- pasted_image, img_type = paste_image_component()
905
- with col_upload:
906
- uploaded_files = st.file_uploader("Upload Files", accept_multiple_files=True, type=["mp3", "png", "mp4", "md"], key="file_upload")
907
-
908
- if pasted_image is not None:
909
- if st.session_state['paste_image_base64'] != base64.b64encode(pasted_image.tobytes()).decode('utf-8'):
910
- st.session_state['paste_image_base64'] = base64.b64encode(pasted_image.tobytes()).decode('utf-8')
911
- voice = FUN_USERNAMES.get(st.session_state.username, "en-USA-AriaNeural")
912
- image_prompt = st.text_input("Add a prompt for Claude (e.g., 'OCR this image')", key="image_prompt", value="")
913
- with st.spinner("Saving image..."):
914
- filename = asyncio.run(save_pasted_image(pasted_image, st.session_state.username, image_prompt))
915
- if filename:
916
- st.success(f"Image saved as: {filename}")
917
- if image_prompt:
918
- with st.spinner("Processing with Claude..."):
919
- md_file_claude, audio_file_claude, claude_result = asyncio.run(
920
- perform_claude_search(image_prompt, st.session_state.username, pasted_image)
921
- )
922
- if audio_file_claude:
923
- play_and_download_audio(audio_file_claude)
924
- if claude_result:
925
- with st.spinner("Searching ArXiv..."):
926
- md_file_arxiv, audio_file_arxiv = asyncio.run(
927
- perform_arxiv_search(image_prompt, st.session_state.username, claude_result)
928
- )
929
- if audio_file_arxiv:
930
- play_and_download_audio(audio_file_arxiv)
931
- st.session_state.pasted_image_data = None
932
- st.session_state['paste_image_base64'] = ""
933
- st.session_state.timer_start = time.time()
934
- save_username(st.session_state.username)
935
- st.rerun()
936
-
937
- if uploaded_files:
938
- for uploaded_file in uploaded_files:
939
- file_type = uploaded_file.name.split('.')[-1].lower()
940
- if file_type in ["mp3", "png", "mp4", "md"]:
941
- timestamp = format_timestamp_prefix(st.session_state.username)
942
- filename = f"{timestamp}-{clean_text_for_filename(uploaded_file.name)}"
943
- with open(filename, "wb") as f:
944
- f.write(uploaded_file.getbuffer())
945
- st.success(f"Uploaded {file_type.upper()} as: {filename}")
946
- if file_type == "png":
947
- img = Image.open(filename)
948
- st.image(img, caption=f"Uploaded Image: {filename}", use_column_width=True)
949
- elif file_type == "mp3":
950
- st.audio(filename)
951
- elif file_type == "mp4":
952
- st.video(filename)
953
- elif file_type == "md":
954
- with open(filename, 'r', encoding='utf-8') as f:
955
- st.markdown(f.read())
956
- asyncio.run(save_chat_entry(st.session_state.username, f"Uploaded {file_type.upper()}: {filename}", FUN_USERNAMES.get(st.session_state.username, "en-US-AriaNeural")))
957
- st.session_state.timer_start = time.time()
958
- save_username(st.session_state.username)
959
- st.rerun()
960
-
961
- if (message and message != st.session_state.last_message) or (st.session_state.pasted_image_data and not st.session_state['paste_image_base64']):
962
- st.session_state.last_message = message
963
- col_send, col_claude, col_arxiv = st.columns([1, 1, 1])
964
-
965
- with col_send:
966
- if st.session_state.autosend or st.button("Send ๐Ÿš€", key="send_button"):
967
- voice = FUN_USERNAMES.get(st.session_state.username, "en-US-AriaNeural")
968
- if message.strip():
969
- md_file, audio_file = asyncio.run(save_chat_entry(st.session_state.username, message, voice, True))
970
- if audio_file:
971
- play_and_download_audio(audio_file)
972
- if st.session_state.pasted_image_data:
973
- asyncio.run(save_chat_entry(st.session_state.username, f"Pasted image: {st.session_state.pasted_image_data}", voice))
974
- st.session_state.pasted_image_data = None
975
- st.session_state.timer_start = time.time()
976
- save_username(st.session_state.username)
977
- st.rerun()
978
-
979
- with col_claude:
980
- if st.button("๐Ÿง  Claude", key="claude_button"):
981
- voice = FUN_USERNAMES.get(st.session_state.username, "en-US-AriaNeural")
982
- if message.strip():
983
- md_file, audio_file, _ = asyncio.run(perform_claude_search(message, st.session_state.username))
984
- if audio_file:
985
- play_and_download_audio(audio_file)
986
- st.session_state.timer_start = time.time()
987
- save_username(st.session_state.username)
988
- st.rerun()
989
-
990
- with col_arxiv:
991
- if st.button("๐Ÿ” ArXiv", key="arxiv_button"):
992
- voice = FUN_USERNAMES.get(st.session_state.username, "en-US-AriaNeural")
993
- if message.strip():
994
- md_file, audio_file = asyncio.run(perform_arxiv_search(message, st.session_state.username))
995
- if audio_file:
996
- play_and_download_audio(audio_file)
997
- st.session_state.timer_start = time.time()
998
- save_username(st.session_state.username)
999
- st.rerun()
1000
-
1001
- elif tab_main == "๐Ÿ” ArXiv":
1002
- st.subheader("๐Ÿ” Query ArXiv")
1003
- q = st.text_input("๐Ÿ” Query:", key="arxiv_query")
1004
- if q and q != st.session_state.last_query:
1005
- st.session_state.last_query = q
1006
- if st.session_state.autosearch or st.button("๐Ÿ” Run", key="arxiv_run"):
1007
- result, papers = asyncio.run(perform_ai_lookup(q, useArxiv=st.session_state['use_arxiv'], useArxivAudio=st.session_state['use_arxiv_audio']))
1008
- st.markdown(f"### Query: {q}")
1009
- for i, p in enumerate(papers, 1):
1010
- expander_label = f"{p['title']} | [arXiv Link]({p['url']})"
1011
- with st.expander(expander_label):
1012
- with open(p['md_file'], 'r', encoding='utf-8') as f:
1013
- content = f.read()
1014
- numbered_content = "\n".join(f"{j+1}. {line}" for j, line in enumerate(content.split('\n')))
1015
- st.code(numbered_content, language="python")
1016
-
1017
- elif tab_main == "๐Ÿ“š PDF to Audio":
1018
- audio_processor = AudioProcessor()
1019
- pdf_file = st.file_uploader("Choose PDF", "pdf", key="pdf_upload")
1020
- max_pages = st.slider('Pages', 1, 100, 10, key="pdf_pages")
1021
- if pdf_file:
1022
- with st.spinner('Processing...'):
1023
- texts, audios, total = process_pdf(pdf_file, max_pages, st.session_state['tts_voice'], audio_processor)
1024
- for i, text in enumerate(texts):
1025
- with st.expander(f"Page {i+1}"):
1026
- st.markdown(text)
1027
- while i not in audios:
1028
- time.sleep(0.1)
1029
- if audios.get(i):
1030
- st.audio(audios[i])
1031
- st.markdown(get_download_link(audios[i], "mp3"), unsafe_allow_html=True)
1032
- voice = FUN_USERNAMES.get(st.session_state.username, "en-US-AriaNeural")
1033
- asyncio.run(save_chat_entry(st.session_state.username, f"PDF Page {i+1} converted to audio: {audios[i]}", voice))
1034
-
1035
- elif tab_main == "๐Ÿ“œ PDF Output":
1036
- create_pdf_tab(default_markdown)
1037
-
1038
- st.header("๐Ÿ“ธ Media Gallery")
1039
- all_files = sorted(glob.glob("*.md") + glob.glob("*.mp3") + glob.glob("*.png") + glob.glob("*.mp4"), key=os.path.getmtime)
1040
- md_files = [f for f in all_files if f.endswith('.md') and os.path.basename(f) != "README.md"]
1041
- mp3_files = [f for f in all_files if f.endswith('.mp3')]
1042
- png_files = [f for f in all_files if f.endswith('.png')]
1043
- mp4_files = [f for f in all_files if f.endswith('.mp4')]
1044
-
1045
- st.subheader("All Submitted Text")
1046
- all_md_content = concatenate_markdown_files()
1047
- with st.expander("View All Markdown Content"):
1048
- st.markdown(all_md_content)
1049
-
1050
- st.subheader("๐ŸŽต Audio (MP3)")
1051
- for filename, (num, mp3) in sorted(st.session_state['mp3_files'].items(), key=lambda x: x[1][0]):
1052
- with st.expander(f"{num}. {os.path.basename(mp3)}"):
1053
- st.audio(mp3)
1054
- st.markdown(get_download_link(mp3, "mp3"), unsafe_allow_html=True)
1055
-
1056
- st.subheader("๐Ÿ–ผ๏ธ Images (PNG)")
1057
- for png in sorted(png_files, key=os.path.getmtime):
1058
- with st.expander(os.path.basename(png)):
1059
- st.image(png, use_container_width=True)
1060
- st.markdown(get_download_link(png, "png"), unsafe_allow_html=True)
1061
-
1062
- st.subheader("๐ŸŽฅ Videos (MP4)")
1063
- for mp4 in sorted(mp4_files, key=os.path.getmtime):
1064
- with st.expander(os.path.basename(mp4)):
1065
- st.video(mp4)
1066
- st.markdown(get_download_link(mp4, "mp4"), unsafe_allow_html=True)
1067
-
1068
- st.sidebar.subheader("Voice Settings")
1069
- new_username = st.sidebar.selectbox("Change Name/Voice", list(FUN_USERNAMES.keys()), index=list(FUN_USERNAMES.keys()).index(st.session_state.username), key="username_select")
1070
- if new_username != st.session_state.username:
1071
- asyncio.run(save_chat_entry("System ๐ŸŒŸ", f"{st.session_state.username} changed to {new_username}", "en-US-AriaNeural"))
1072
- st.session_state.username, st.session_state.tts_voice = new_username, FUN_USERNAMES[new_username]
1073
- st.session_state.timer_start = time.time()
1074
- save_username(st.session_state.username)
1075
- st.rerun()
1076
-
1077
- st.sidebar.markdown("### ๐Ÿ’ฌ Chat Dialog")
1078
- chat_content = asyncio.run(load_chat())
1079
- with st.sidebar.expander("Chat History"):
1080
- numbered_content = "\n".join(f"{i+1}. {line}" for i, line in enumerate(chat_content))
1081
- st.code(numbered_content, language="python")
1082
-
1083
- st.sidebar.markdown("### ๐Ÿ’ฌ Chat Text Only")
1084
- chat_text_only = get_chat_text_only()
1085
- with st.sidebar.expander("Text Only History"):
1086
- numbered_text = "\n".join(f"{i+1}. {line}" for i, line in enumerate(chat_text_only.split('\n')))
1087
- st.code(numbered_text, language="python")
1088
-
1089
- st.sidebar.subheader("Vote Totals")
1090
- chat_votes = load_votes(QUOTE_VOTES_FILE)
1091
- image_votes = load_votes(IMAGE_VOTES_FILE)
1092
- for item, count in chat_votes.items():
1093
- st.sidebar.write(f"{item}: {count} votes")
1094
- for image, count in image_votes.items():
1095
- st.sidebar.write(f"{image}: {count} votes")
1096
-
1097
- st.sidebar.markdown("### ๐Ÿ“‚ File History")
1098
- for f in all_files[:10]:
1099
- st.sidebar.write(f"{FILE_EMOJIS.get(f.split('.')[-1], '๐Ÿ“„')} {os.path.basename(f)}")
1100
-
1101
- st.sidebar.subheader("๐Ÿ“ฆ Zip & Delete")
1102
- col_zip, col_del = st.sidebar.columns(2)
1103
- with col_zip:
1104
- if st.button("โฌ‡๏ธ Zip All", key="zip_all"):
1105
- zip_name = create_zip_of_files(all_files, "All")
1106
- if zip_name:
1107
- st.session_state['download_link_cache'] = {}
1108
- if st.button("โฌ‡๏ธ Zip All MD", key="zip_md"):
1109
- zip_name = create_zip_of_files(md_files, "MD")
1110
- if zip_name:
1111
- st.session_state['download_link_cache'] = {}
1112
- if st.button("โฌ‡๏ธ Zip All MP3", key="zip_mp3"):
1113
- zip_name = create_zip_of_files(mp3_files, "MP3")
1114
- if zip_name:
1115
- st.session_state['download_link_cache'] = {}
1116
- if st.button("โฌ‡๏ธ Zip All PNG", key="zip_png"):
1117
- zip_name = create_zip_of_files(png_files, "PNG")
1118
- if zip_name:
1119
- st.session_state['download_link_cache'] = {}
1120
- if st.button("โฌ‡๏ธ Zip All MP4", key="zip_mp4"):
1121
- zip_name = create_zip_of_files(mp4_files, "MP4")
1122
- if zip_name:
1123
- st.session_state['download_link_cache'] = {}
1124
- with col_del:
1125
- if st.button("๐Ÿ—‘๏ธ Del All", key="del_all"):
1126
- for ft in ["md", "mp3", "png", "mp4"]:
1127
- delete_files(ft)
1128
- st.rerun()
1129
- if st.button("๐Ÿ—‘๏ธ Del All MD", key="del_md"):
1130
- delete_files("md")
1131
- st.rerun()
1132
- if st.button("๐Ÿ—‘๏ธ Del All MP3", key="del_mp3"):
1133
- delete_files("mp3")
1134
- st.rerun()
1135
- if st.button("๐Ÿ—‘๏ธ Del All PNG", key="del_png"):
1136
- delete_files("png")
1137
- st.rerun()
1138
- if st.button("๐Ÿ—‘๏ธ Del All MP4", key="del_mp4"):
1139
- delete_files("mp4")
1140
- st.rerun()
1141
- if st.button("๐Ÿ—‘๏ธ Del All Zip", key="del_zip"):
1142
- delete_files("zip", exclude_files=[])
1143
- st.rerun()
1144
-
1145
- zip_files = sorted(glob.glob("*.zip"), key=os.path.getmtime, reverse=True)
1146
- for zip_file in zip_files:
1147
- st.sidebar.markdown(get_download_link(zip_file, "zip"), unsafe_allow_html=True)
1148
-
1149
- st.sidebar.subheader("Set Refresh Rate โณ")
1150
- st.session_state['auto_refresh'] = st.sidebar.radio("Auto Refresh", ["On", "Off"], index=0 if st.session_state['auto_refresh'] else 1) == "On"
1151
- st.markdown("""
1152
- <style>
1153
- .timer {
1154
- font-size: 24px;
1155
- color: #ffcc00;
1156
- text-align: center;
1157
- animation: pulse 1s infinite;
1158
- }
1159
- @keyframes pulse {
1160
- 0% { transform: scale(1); }
1161
- 50% { transform: scale(1.1); }
1162
- 100% { transform: scale(1); }
1163
- }
1164
- </style>
1165
- """, unsafe_allow_html=True)
1166
-
1167
- refresh_rate = st.sidebar.slider("Refresh Rate (seconds)", min_value=1, max_value=300, value=st.session_state.refresh_rate, step=1)
1168
- if refresh_rate != st.session_state.refresh_rate:
1169
- st.session_state.refresh_rate = refresh_rate
1170
- st.session_state.timer_start = time.time()
1171
- save_username(st.session_state.username)
1172
-
1173
- col1, col2, col3 = st.sidebar.columns(3)
1174
- with col1:
1175
- if st.button("๐Ÿ‡ Small (1s)"):
1176
- st.session_state.refresh_rate = 1
1177
- st.session_state.timer_start = time.time()
1178
- save_username(st.session_state.username)
1179
- with col2:
1180
- if st.button("๐Ÿข Medium (10s)"):
1181
- st.session_state.refresh_rate = 10
1182
- st.session_state.timer_start = time.time()
1183
- save_username(st.session_state.username)
1184
- with col3:
1185
- if st.button("๐Ÿ˜ Large (5m)"):
1186
- st.session_state.refresh_rate = 300
1187
- st.session_state.timer_start = time.time()
1188
- save_username(st.session_state.username)
1189
-
1190
- timer_placeholder = st.sidebar.empty()
1191
- def update_timer():
1192
- start_time = st.session_state.timer_start
1193
- remaining_time = max(0, int(st.session_state.refresh_rate - (time.time() - start_time)))
1194
- timer_placeholder.markdown(f"<p class='timer'>โณ Next refresh in: {remaining_time} seconds</p>", unsafe_allow_html=True)
1195
- if st.session_state['auto_refresh'] and remaining_time <= 0:
1196
- st.session_state.timer_start = time.time()
1197
- st.session_state.last_refresh = time.time()
1198
- st.rerun()
1199
-
1200
- threading.Thread(target=lambda: [time.sleep(1) or update_timer() for _ in range(int(st.session_state.refresh_rate)+1)], daemon=True).start()
1201
- update_timer()
1202
-
1203
- if not st.session_state.get('server_running', False) and not st.session_state.get('server_task', None):
1204
- st.session_state.server_task = threading.Thread(target=start_websocket_server, daemon=True)
1205
- st.session_state.server_task.start()
1206
 
1207
  default_markdown = """# Deities Guide: Mythology and Moral Lessons ๐ŸŒŸโœจ
1208
 
@@ -1381,5 +348,52 @@ default_markdown = """# Deities Guide: Mythology and Moral Lessons ๐ŸŒŸโœจ
1381
  - **Saints/Prophets**: Virtues, e.g., justice โš–๏ธ and prophecy ๐Ÿ”ฎ.
1382
  """
1383
 
1384
- if __name__ == "__main__":
1385
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import io
2
  import re
 
 
3
  import os
4
+ import glob
5
+ import asyncio
6
+ import hashlib
7
+ import unicodedata
8
+ import streamlit as st
9
  from PIL import Image
10
  import fitz
11
+ import edge_tts
12
  from reportlab.lib.pagesizes import A4
13
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
14
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
15
  from reportlab.lib import colors
16
  from reportlab.pdfbase import pdfmetrics
17
  from reportlab.pdfbase.ttfonts import TTFont
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
20
 
21
+ async def generate_audio(text, voice):
22
+ filename = f"{hashlib.md5(text.encode()).hexdigest()}_{voice}.mp3"
23
+ communicate = edge_tts.Communicate(text, voice)
24
+ await communicate.save(filename)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  return filename
26
 
27
+ def apply_emoji_font(text, emoji_font):
28
+ emoji_pattern = re.compile(
29
+ r"([\U0001F300-\U0001F5FF"
30
+ r"\U0001F600-\U0001F64F"
31
+ r"\U0001F680-\U0001F6FF"
32
+ r"\U0001F700-\U0001F77F"
33
+ r"\U0001F780-\U0001F7FF"
34
+ r"\U0001F800-\U0001F8FF"
35
+ r"\U0001F900-\U0001F9FF"
36
+ r"\U0001FA00-\U0001FA6F"
37
+ r"\U0001FA70-\U0001FAFF"
38
+ r"\u2600-\u26FF"
39
+ r"\u2700-\u27BF]+)"
40
+ )
41
+ def replace_emoji(match):
42
+ emoji = match.group(1)
43
+ emoji = unicodedata.normalize('NFC', emoji)
44
+ return f'<font face="{emoji_font}">{emoji}</font>'
45
+ segments = []
46
+ last_pos = 0
47
+ for match in emoji_pattern.finditer(text):
48
+ start, end = match.span()
49
+ if last_pos < start:
50
+ segments.append(f'<font face="DejaVuSans">{text[last_pos:start]}</font>')
51
+ segments.append(replace_emoji(match))
52
+ last_pos = end
53
+ if last_pos < len(text):
54
+ segments.append(f'<font face="DejaVuSans">{text[last_pos:]}</font>')
55
+ return ''.join(segments)
56
+
57
+ def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers):
58
+ lines = markdown_text.strip().split('\n')
59
+ pdf_content = []
60
+ number_pattern = re.compile(r'^\d+\.\s')
61
+ for line in lines:
62
+ line = line.strip()
63
+ if not line or line.startswith('# '):
64
+ continue
65
+ if render_with_bold:
66
+ line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)
67
+ if auto_bold_numbers and number_pattern.match(line):
68
+ if not (line.startswith("<b>") and line.endswith("</b>")):
69
+ line = f"<b>{line}</b>"
70
+ pdf_content.append(line)
71
+ total_lines = len(pdf_content)
72
+ return pdf_content, total_lines
73
+
74
+ def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns):
75
+ buffer = io.BytesIO()
76
+ page_width = A4[0] * 2
77
+ page_height = A4[1]
78
+ doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36)
79
+ styles = getSampleStyleSheet()
80
+ spacer_height = 10
81
+ section_spacer_height = 15
82
+ pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers)
83
+ item_style = ParagraphStyle(
84
+ 'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
85
+ fontSize=base_font_size, leading=base_font_size * 1.15, spaceAfter=1
86
+ )
87
+ bold_style = ParagraphStyle(
88
+ 'BoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
89
+ fontSize=base_font_size, leading=base_font_size * 1.15, spaceAfter=1
90
+ )
91
+ numbered_bold_style = ParagraphStyle(
92
+ 'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
93
+ fontSize=base_font_size + 1 if enlarge_numbered else base_font_size,
94
+ leading=(base_font_size + 1) * 1.15 if enlarge_numbered else base_font_size * 1.15, spaceAfter=1
95
+ )
96
+ section_style = ParagraphStyle(
97
+ 'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
98
+ textColor=colors.darkblue, fontSize=base_font_size * 1.1, leading=base_font_size * 1.32, spaceAfter=2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  try:
101
+ available_font_files = glob.glob("*.ttf")
102
+ if not available_font_files:
103
+ st.error("No .ttf font files found in the current directory.")
104
+ return
105
+ selected_font_path = None
106
+ for f in available_font_files:
107
+ if "NotoEmoji-Bold" in f:
108
+ selected_font_path = f
109
+ break
110
+ if selected_font_path:
111
+ pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", selected_font_path))
112
  pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf"))
113
  except Exception as e:
114
+ st.error(f"Font registration error: {e}")
115
  return
116
+ columns = [[] for _ in range(num_columns)]
117
+ lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
118
+ current_line_count = 0
119
+ current_column = 0
120
+ number_pattern = re.compile(r'^\d+\.\s')
121
+ for item in pdf_content:
122
+ if current_line_count >= lines_per_column and current_column < num_columns - 1:
123
+ current_column += 1
124
+ current_line_count = 0
125
+ columns[current_column].append(item)
126
+ current_line_count += 1
127
+ column_cells = [[] for _ in range(num_columns)]
128
+ for col_idx, column in enumerate(columns):
129
+ for item in column:
130
+ if isinstance(item, str) and item.startswith("<b>") and item.endswith("</b>"):
131
+ content = item[3:-4].strip()
132
+ if number_pattern.match(content):
133
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  else:
135
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
136
+ else:
137
+ column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style))
138
+ max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
139
+ for cells in column_cells:
140
+ cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
141
+ col_width = (page_width - 72) / num_columns if num_columns > 0 else page_width - 72
142
+ table_data = list(zip(*column_cells)) if column_cells else [[]]
143
+ table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
144
+ table.setStyle(TableStyle([
145
+ ('VALIGN', (0, 0), (-1, -1), 'TOP'),
146
+ ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
147
+ ('BACKGROUND', (0, 0), (-1, -1), colors.white),
148
+ ('GRID', (0, 0), (-1, -1), 0, colors.white),
149
+ ('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey),
150
+ ('LEFTPADDING', (0, 0), (-1, -1), 2),
151
+ ('RIGHTPADDING', (0, 0), (-1, -1), 2),
152
+ ('TOPPADDING', (0, 0), (-1, -1), 1),
153
+ ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
154
+ ]))
155
+ story = [Spacer(1, spacer_height), table]
156
+ doc.build(story)
157
+ buffer.seek(0)
158
+ return buffer.getvalue()
159
+
160
+ def pdf_to_image(pdf_bytes):
161
+ try:
162
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
163
+ images = []
164
+ for page in doc:
165
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
166
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
167
+ images.append(img)
168
+ doc.close()
169
+ return images
170
+ except Exception as e:
171
+ st.error(f"Failed to render PDF preview: {e}")
172
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
  default_markdown = """# Deities Guide: Mythology and Moral Lessons ๐ŸŒŸโœจ
175
 
 
348
  - **Saints/Prophets**: Virtues, e.g., justice โš–๏ธ and prophecy ๐Ÿ”ฎ.
349
  """
350
 
351
+ md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
352
+ md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
353
+
354
+ with st.sidebar:
355
+ st.markdown("### PDF Options")
356
+ selected_md = st.selectbox("Select Markdown File", options=md_options, index=0 if md_options else -1)
357
+ available_font_files = {os.path.splitext(os.path.basename(f))[0]: f for f in glob.glob("*.ttf")}
358
+ selected_font_name = st.selectbox("Select Emoji Font", options=list(available_font_files.keys()), index=list(available_font_files.keys()).index("NotoEmoji-Bold") if "NotoEmoji-Bold" in available_font_files else 0)
359
+ base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1)
360
+ render_with_bold = st.checkbox("Render with Bold Formatting (remove ** markers)", value=True, key="render_with_bold")
361
+ auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers")
362
+ enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered")
363
+ num_columns = st.selectbox("Number of Columns", options=[1, 2, 3, 4, 5, 6], index=3)
364
+ if 'markdown_content' not in st.session_state or not md_options:
365
+ st.session_state.markdown_content = default_markdown
366
+ if md_options and selected_md:
367
+ with open(f"{selected_md}.md", "r", encoding="utf-8") as f:
368
+ st.session_state.markdown_content = f.read()
369
+ edited_markdown = st.text_area("Modify the markdown content below:", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}")
370
+ if st.button("Update PDF"):
371
+ st.session_state.markdown_content = edited_markdown
372
+ if md_options and selected_md:
373
+ with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
374
+ f.write(edited_markdown)
375
+ st.experimental_rerun()
376
+ st.download_button(label="Save Markdown", data=st.session_state.markdown_content, file_name=f"{selected_md}.md" if selected_md else "default.md", mime="text/markdown")
377
+ st.markdown("### Text-to-Speech")
378
+ VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
379
+ selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
380
+ if st.button("Generate Audio"):
381
+ audio_file = asyncio.run(generate_audio(st.session_state.markdown_content, selected_voice))
382
+ st.audio(audio_file)
383
+ with open(audio_file, "rb") as f:
384
+ audio_bytes = f.read()
385
+ st.download_button("Download Audio", data=audio_bytes, file_name=os.path.basename(audio_file), mime="audio/mpeg")
386
+
387
+ with st.spinner("Generating PDF..."):
388
+ pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns)
389
+
390
+ with st.container():
391
+ pdf_images = pdf_to_image(pdf_bytes)
392
+ if pdf_images:
393
+ for img in pdf_images:
394
+ st.image(img, use_container_width=True)
395
+ else:
396
+ st.info("Download the PDF to view it locally.")
397
+
398
+ with st.sidebar:
399
+ st.download_button(label="Download PDF", data=pdf_bytes, file_name="output.pdf", mime="application/pdf")