Spaces:
Running
Running
File size: 9,433 Bytes
0bfd27d 9ca2091 0bfd27d ddcc450 0bfd27d 42866ce 0bfd27d 42866ce 0bfd27d ddcc450 0bfd27d ddcc450 0bfd27d 42866ce ddcc450 0bfd27d 9ca2091 6f98b16 0bfd27d 6f98b16 42866ce 0bfd27d 6f98b16 0bfd27d 6f98b16 0bfd27d 9ca2091 ddcc450 9ca2091 0bfd27d 9ca2091 0bfd27d 42866ce 0bfd27d 42866ce 0bfd27d 42866ce 0bfd27d 42866ce 9ca2091 42866ce 9ca2091 42866ce 9ca2091 42866ce 9ca2091 42866ce 9ca2091 42866ce 9ca2091 42866ce 0bfd27d 9ca2091 6f98b16 9ca2091 42866ce 9ca2091 6f98b16 9ca2091 6f98b16 9ca2091 42866ce 9ca2091 42866ce 9ca2091 42866ce 9ca2091 42866ce 9ca2091 42866ce 9ca2091 42866ce 9ca2091 42866ce 9ca2091 42866ce 9ca2091 42866ce 9ca2091 42866ce 9ca2091 42866ce 0bfd27d ddcc450 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
import os
import logging
import gradio as gr
import asyncio
from dotenv import load_dotenv
from langchain_community.document_loaders import ArxivLoader # Updated import
from langchain_community.vectorstores import Chroma # Updated import
from langchain_huggingface import HuggingFaceEmbeddings # Updated import
from langchain_groq import ChatGroq
from PyPDF2 import PdfReader
from huggingface_hub import login
from groq import AsyncGroq
from langchain.docstore.document import Document
# Load environment variables
load_dotenv()
HUGGING_API_KEY = os.getenv("HUGGING_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not HUGGING_API_KEY or not GROQ_API_KEY:
raise ValueError("API keys for HuggingFace or Groq are missing.")
# Configure Logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Authenticate with Hugging Face (for model downloads)
login(HUGGING_API_KEY)
# Load models and embeddings with a local embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", api_key=GROQ_API_KEY)
client = AsyncGroq(api_key=GROQ_API_KEY)
# Global state for PDF vector store
pdf_vector_store = None
current_pdf_path = None
# General Chat
async def chat_with_replit(message, history):
try:
messages = [{"role": "system", "content": "You are an assistant answering user questions."}]
for chat in history or []:
user_msg, assistant_msg = chat
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
response = await client.chat.completions.create(
messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False
)
return response.choices[0].message.content
except Exception as e:
logger.error(f"Chat error: {e}")
return "Error in chat response."
def chat_with_replit_sync(message, history):
return asyncio.run(chat_with_replit(message, history))
# ArXiv Chat
async def chat_with_replit_arxiv(message, history, doi_num):
try:
loader = ArxivLoader(query=str(doi_num), load_max_docs=10)
documents = loader.load_and_split()
if not documents:
return "No documents found for the provided arXiv number."
metadata = documents[0].metadata
vector_store = Chroma.from_documents(documents, embedding_model)
results = vector_store.similarity_search(message, k=3)
relevant_content = "\n\n".join(doc.page_content for doc in results)
messages = [
{"role": "user", "content": message},
{"role": "system", "content": f"Answer based on this arXiv paper {doi_num}.\nMetadata: {metadata}.\nRelevant Content: {relevant_content}"}
]
response = await client.chat.completions.create(
messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False
)
return response.choices[0].message.content
except Exception as e:
logger.error(f"Error in chat with ArXiv PDF: {e}")
return "Error processing chat with arXiv paper."
def chat_with_replit_arxiv_sync(message, history, doi_num):
return asyncio.run(chat_with_replit_arxiv(message, history, doi_num))
# Local PDF Chat
async def chat_with_replit_local_pdf(message, vector_store):
try:
if not vector_store:
return "Please upload a PDF first and wait for processing to complete."
results = vector_store.similarity_search(message, k=3)
relevant_content = "\n\n".join(doc.page_content for doc in results)
messages = [
{"role": "user", "content": message},
{"role": "system", "content": f"Answer based on the uploaded PDF.\nRelevant Content: {relevant_content}"}
]
response = await client.chat.completions.create(
messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False
)
return response.choices[0].message.content
except Exception as e:
logger.error(f"Error in chat with local PDF: {e}")
return "Error processing chat with local PDF."
def process_pdf(pdf_file):
global pdf_vector_store, current_pdf_path
try:
if pdf_file != current_pdf_path:
logger.info("Extracting text from PDF...")
reader = PdfReader(pdf_file)
text = "\n".join(page.extract_text() or "" for page in reader.pages)
if not text.strip():
return "Could not extract text from PDF."
documents = [Document(page_content=text, metadata={"source": pdf_file})]
logger.info("Creating vector store...")
pdf_vector_store = Chroma.from_documents(documents, embedding_model)
current_pdf_path = pdf_file
return "PDF processed successfully. You can now ask questions."
return "PDF already processed. Ask away!"
except Exception as e:
logger.error(f"Error processing PDF: {e}")
return f"Error processing PDF: {str(e)}"
# Gradio UI
with gr.Blocks() as app:
with gr.Tab(label="General Chat"):
gr.Markdown("### Chat with the Assistant")
with gr.Row():
general_chat_input = gr.Textbox(placeholder="Type your message here...", label="Your Message")
general_send_button = gr.Button("Send")
general_chat_output = gr.Markdown(label="Chat Output")
general_chat_history = gr.State([])
def update_general_chat(user_message, history):
history = history or []
history.append([user_message, ""])
return history, history
def update_general_response(history):
user_message = history[-1][0]
response = chat_with_replit_sync(user_message, history[:-1])
history[-1][1] = response
formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
return history, formatted
general_send_button.click(update_general_chat, inputs=[general_chat_input, general_chat_history],
outputs=[general_chat_history, general_chat_output])
general_send_button.click(update_general_response, inputs=general_chat_history,
outputs=[general_chat_history, general_chat_output])
with gr.Tab(label="Chat with ArXiv Paper"):
gr.Markdown("### Ask Questions About an ArXiv Paper")
with gr.Row():
arxiv_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
arxiv_doi = gr.Textbox(placeholder="Enter arXiv number, e.g. 2502.02523", label="ArXiv Number")
arxiv_send_button = gr.Button("Send")
arxiv_chat_output = gr.Markdown(label="Chat Output")
arxiv_chat_history = gr.State([])
def update_arxiv_chat(user_message, history):
history = history or []
history.append([user_message, ""])
return history, history
def update_arxiv_response(history, doi_num):
user_message = history[-1][0]
response = chat_with_replit_arxiv_sync(user_message, history[:-1], doi_num)
history[-1][1] = response
formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
return history, formatted
arxiv_send_button.click(update_arxiv_chat, inputs=[arxiv_input, arxiv_chat_history],
outputs=[arxiv_chat_history, arxiv_chat_output])
arxiv_send_button.click(update_arxiv_response, inputs=[arxiv_chat_history, arxiv_doi],
outputs=[arxiv_chat_history, arxiv_chat_output])
with gr.Tab(label="Chat with Local PDF"):
gr.Markdown("### Ask Questions About an Uploaded PDF")
pdf_file_input = gr.File(label="Upload PDF file", file_types=[".pdf"])
pdf_status = gr.Textbox(label="PDF Processing Status", interactive=False)
with gr.Row():
pdf_chat_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
pdf_send_button = gr.Button("Send")
pdf_chat_output = gr.Markdown(label="Chat Output")
pdf_chat_history = gr.State([])
def update_pdf_chat(user_message, history):
history = history or []
history.append([user_message, ""])
return history, history
def update_pdf_response(history):
user_message = history[-1][0]
response = asyncio.run(chat_with_replit_local_pdf(user_message, pdf_vector_store))
history[-1][1] = response
formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
return history, formatted
pdf_file_input.change(process_pdf, inputs=pdf_file_input, outputs=pdf_status)
pdf_send_button.click(update_pdf_chat, inputs=[pdf_chat_input, pdf_chat_history],
outputs=[pdf_chat_history, pdf_chat_output])
pdf_send_button.click(update_pdf_response, inputs=pdf_chat_history,
outputs=[pdf_chat_history, pdf_chat_output])
app.launch() |