import streamlit as st import streamlit.components.v1 as components import openai import os import base64 import glob import io import json import mistune import pytz import math import requests import sys import time import re import textract import zipfile # New import for zipping files import random from datetime import datetime from openai import ChatCompletion from xml.etree import ElementTree as ET from bs4 import BeautifulSoup from collections import deque from audio_recorder_streamlit import audio_recorder from dotenv import load_dotenv from PyPDF2 import PdfReader from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.chat_models import ChatOpenAI from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain from templates import css, bot_template, user_template # page config and sidebar declares up front allow all other functions to see global class variables st.set_page_config(page_title="GPT Streamlit Document Reasoner", layout="wide") should_save = st.sidebar.checkbox("๐พ Save", value=True) # LLM engines for ChatCompletion and Chat with files def chat_with_model(prompt, document_section, model_choice='gpt-3.5-turbo'): model = model_choice conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}] conversation.append({'role': 'user', 'content': prompt}) if len(document_section)>0: conversation.append({'role': 'assistant', 'content': document_section}) start_time = time.time() report = [] res_box = st.empty() collected_chunks = [] collected_messages = [] key = os.getenv('OPENAI_API_KEY') openai.api_key = key for chunk in openai.ChatCompletion.create( model='gpt-3.5-turbo', messages=conversation, temperature=0.5, stream=True ): collected_chunks.append(chunk) # save the event response chunk_message = chunk['choices'][0]['delta'] # extract the message collected_messages.append(chunk_message) # save the message content=chunk["choices"][0].get("delta",{}).get("content") try: report.append(content) if len(content) > 0: result = "".join(report).strip() #result = result.replace("\n", "") res_box.markdown(f'*{result}*') except: st.write(' ') full_reply_content = ''.join([m.get('content', '') for m in collected_messages]) st.write("Elapsed time:") st.write(time.time() - start_time) readitaloud(full_reply_content) return full_reply_content def chat_with_file_contents(prompt, file_content, model_choice='gpt-3.5-turbo'): conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}] conversation.append({'role': 'user', 'content': prompt}) if len(file_content)>0: conversation.append({'role': 'assistant', 'content': file_content}) response = openai.ChatCompletion.create(model=model_choice, messages=conversation) return response['choices'][0]['message']['content'] def link_button_with_emoji(url, title, emoji_summary): emojis = ["๐", "๐ฅ", "๐ก๏ธ", "๐ฉบ", "๐ก๏ธ", "๐ฌ", "๐", "๐งช", "๐จโโ๏ธ", "๐ฉโโ๏ธ"] random_emoji = random.choice(emojis) st.markdown(f"[{random_emoji} {emoji_summary} - {title}]({url})") # MemGPT Paper url_memgpt = "https://arxiv.org/abs/2310.08560" title_memgpt = "MemGPT" emoji_summary_memgpt = "๐ง ๐พ Memory OS" link_button_with_emoji(url_memgpt, title_memgpt, emoji_summary_memgpt) # AutoGen Paper url_autogen = "https://arxiv.org/abs/2308.08155" title_autogen = "AutoGen" emoji_summary_autogen = "๐ค๐ Multi-Agent LLM" link_button_with_emoji(url_autogen, title_autogen, emoji_summary_autogen) # Whisper Paper url_whisper = "https://arxiv.org/abs/2212.04356" title_whisper = "Whisper" emoji_summary_whisper = "๐๐งโ๐ Robust STT" link_button_with_emoji(url_whisper, title_whisper, emoji_summary_whisper) # ChatDev Paper url_chatdev = "https://arxiv.org/pdf/2307.07924.pdf" title_chatdev = "ChatDev" emoji_summary_chatdev = "๐ฌ๐ป Comm. Agents" link_button_with_emoji(url_chatdev, title_chatdev, emoji_summary_chatdev) # 2. Paper Interface with LLM def add_paper_buttons(): # Expander for MemGPT with st.expander("MemGPT ๐ง ๐พ", expanded=False): if st.button("Discuss MemGPT Features"): outline_memgpt = "Memory Hierarchy, Context Paging, Self-directed Memory Updates, Memory Editing, Memory Retrieval, Semantic & Episodic Memory, Emotional Contextual Understanding" chat_with_model("Discuss the key features of MemGPT: " + outline_memgpt, "MemGPT") # Expander for AutoGen with st.expander("AutoGen ๐ค๐", expanded=False): if st.button("Explore AutoGen Multi-Agent LLM"): outline_autogen = "Cooperative Conversations, Combining Capabilities, Complex Task Solving, Divergent Thinking, Factuality, Highly Capable Agents, Generic Abstraction" chat_with_model("Explore the key features of AutoGen: " + outline_autogen, "AutoGen") # Expander for Whisper with st.expander("Whisper ๐๐งโ๐", expanded=False): if st.button("Learn About Whisper STT"): outline_whisper = "Scaling, Deep Learning Approaches, Weak Supervision, Zero-shot Transfer Learning, Accuracy & Robustness, Pre-training Techniques, Broad Environments" chat_with_model("Learn about the key features of Whisper: " + outline_whisper, "Whisper") # Expander for ChatDev with st.expander("ChatDev ๐ฌ๐ป", expanded=False): if st.button("Deep Dive into ChatDev"): outline_chatdev = "Effective Communication, Comprehensive Software Solutions, Diverse Social Identities, Tailored Codes, Environment Dependencies, User Manuals" chat_with_model("Deep dive into the features of ChatDev: " + outline_chatdev, "ChatDev") add_paper_buttons() def generate_filename_old(prompt, file_type): central = pytz.timezone('US/Central') safe_date_time = datetime.now(central).strftime("%m%d_%H%M") # Date and time DD-HHMM safe_prompt = "".join(x for x in prompt if x.isalnum())[:90] # Limit file name size and trim whitespace return f"{safe_date_time}_{safe_prompt}.{file_type}" # Return a safe file name def generate_filename(prompt, file_type): central = pytz.timezone('US/Central') safe_date_time = datetime.now(central).strftime("%m%d_%H%M") replaced_prompt = prompt.replace(" ", "_").replace("\n", "_") safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:90] return f"{safe_date_time}_{safe_prompt}.{file_type}" def transcribe_audio(file_path, model): key = os.getenv('OPENAI_API_KEY') headers = { "Authorization": f"Bearer {key}", } with open(file_path, 'rb') as f: data = {'file': f} st.write("Read file {file_path}", file_path) OPENAI_API_URL = "https://api.openai.com/v1/audio/transcriptions" response = requests.post(OPENAI_API_URL, headers=headers, files=data, data={'model': model}) if response.status_code == 200: st.write(response.json()) chatResponse = chat_with_model(response.json().get('text'), '') # ************************************* transcript = response.json().get('text') #st.write('Responses:') #st.write(chatResponse) filename = generate_filename(transcript, 'txt') #create_file(filename, transcript, chatResponse) response = chatResponse user_prompt = transcript create_file(filename, user_prompt, response, should_save) return transcript else: st.write(response.json()) st.error("Error in API call.") return None def save_and_play_audio(audio_recorder): audio_bytes = audio_recorder() if audio_bytes: filename = generate_filename("Recording", "wav") with open(filename, 'wb') as f: f.write(audio_bytes) st.audio(audio_bytes, format="audio/wav") return filename return None # Define a context dictionary to maintain the state between exec calls context = {} def create_file(filename, prompt, response, should_save=True): if not should_save: return # Extract base filename without extension base_filename, ext = os.path.splitext(filename) # Initialize the combined content combined_content = "" # Add Prompt with markdown title and emoji combined_content += "# Prompt ๐\n" + prompt + "\n\n" # Add Response with markdown title and emoji combined_content += "# Response ๐ฌ\n" + response + "\n\n" # Check for code blocks in the response resources = re.findall(r"```([\s\S]*?)```", response) for resource in resources: # Check if the resource contains Python code if "python" in resource.lower(): # Remove the 'python' keyword from the code block cleaned_code = re.sub(r'^\s*python', '', resource, flags=re.IGNORECASE | re.MULTILINE) # Add Code Results title with markdown and emoji combined_content += "# Code Results ๐\n" # Redirect standard output to capture it original_stdout = sys.stdout sys.stdout = io.StringIO() # Execute the cleaned Python code within the context try: exec(cleaned_code, context) code_output = sys.stdout.getvalue() combined_content += f"```\n{code_output}\n```\n\n" realtimeEvalResponse = "# Code Results ๐\n" + "```" + code_output + "```\n\n" st.write(realtimeEvalResponse) except Exception as e: combined_content += f"```python\nError executing Python code: {e}\n```\n\n" # Restore the original standard output sys.stdout = original_stdout else: # Add non-Python resources with markdown and emoji combined_content += "# Resource ๐ ๏ธ\n" + "```" + resource + "```\n\n" # Save the combined content to a Markdown file if should_save: with open(f"{base_filename}-Combined.md", 'w') as file: file.write(combined_content) def create_file_old2(filename, prompt, response, should_save=True): if not should_save: return # Step 2: Extract base filename without extension base_filename, ext = os.path.splitext(filename) # Step 3: Check if the response contains Python code has_python_code = bool(re.search(r"```python([\s\S]*?)```", response)) # Step 4: Initialize the combined content combined_content = "" # Add Prompt with markdown title and emoji combined_content += "# Prompt ๐\n" + prompt + "\n\n" # Add Response with markdown title and emoji combined_content += "# Response ๐ฌ\n" + response + "\n\n" # Check for Python code or other resources and add them with markdown title and emoji resources = re.findall(r"```([\s\S]*?)```", response) for resource in resources: # Check if the resource contains Python code if "python" in resource.lower(): st.markdown('# Running python.. ') # Remove the word 'python' from the beginning of the code block cleaned_code = re.sub(r'^\s*python', '', resource, flags=re.IGNORECASE | re.MULTILINE) # Add Code Results title with markdown and emoji combined_content += "# Code Results ๐\n" # Capture standard output original_stdout = sys.stdout sys.stdout = io.StringIO() # Execute cleaned Python code and capture the output try: st.markdown('# Running exec.. ') exec(cleaned_code) code_output = sys.stdout.getvalue() combined_content += f"```\n{code_output}\n```\n\n" realtimeEvalResponse = "# Code Results ๐\n" + "```" + code_output + "```\n\n" st.write(realtimeEvalResponse) st.markdown('# Completed exec.. ') except Exception as e: combined_content += f"```python\nError executing Python code: {e}\n```\n\n" st.markdown('# Error in exec.. ' + combined_content) # Restore the original standard output sys.stdout = original_stdout else: # Add Resource title with markdown and emoji for non-Python resources combined_content += "# Resource ๐ ๏ธ\n" + "```" + resource + "```\n\n" # Write the combined content into one file with open(f"{base_filename}-Combined.md", 'w') as file: file.write(combined_content) def truncate_document(document, length): return document[:length] def divide_document(document, max_length): return [document[i:i+max_length] for i in range(0, len(document), max_length)] def get_table_download_link(file_path): with open(file_path, 'r') as file: try: data = file.read() except: st.write('') return file_path b64 = base64.b64encode(data.encode()).decode() file_name = os.path.basename(file_path) ext = os.path.splitext(file_name)[1] # get the file extension if ext == '.txt': mime_type = 'text/plain' elif ext == '.py': mime_type = 'text/plain' elif ext == '.xlsx': mime_type = 'text/plain' elif ext == '.csv': mime_type = 'text/plain' elif ext == '.htm': mime_type = 'text/html' elif ext == '.md': mime_type = 'text/markdown' else: mime_type = 'application/octet-stream' # general binary data type href = f'{file_name}' return href def CompressXML(xml_text): root = ET.fromstring(xml_text) for elem in list(root.iter()): if isinstance(elem.tag, str) and 'Comment' in elem.tag: elem.parent.remove(elem) return ET.tostring(root, encoding='unicode', method="xml") def read_file_content(file,max_length): if file.type == "application/json": content = json.load(file) return str(content) elif file.type == "text/html" or file.type == "text/htm": content = BeautifulSoup(file, "html.parser") return content.text elif file.type == "application/xml" or file.type == "text/xml": tree = ET.parse(file) root = tree.getroot() xml = CompressXML(ET.tostring(root, encoding='unicode')) return xml elif file.type == "text/markdown" or file.type == "text/md": md = mistune.create_markdown() content = md(file.read().decode()) return content elif file.type == "text/plain": return file.getvalue().decode() else: return "" def readitaloud(result): documentHTML5='''