Spaces:
Running
Running
import os | |
import logging | |
import gradio as gr | |
import asyncio | |
from dotenv import load_dotenv | |
from langchain_community.document_loaders import ArxivLoader # Updated import | |
from langchain_community.vectorstores import Chroma # Updated import | |
from langchain_huggingface import HuggingFaceEmbeddings # Updated import | |
from langchain_groq import ChatGroq | |
from PyPDF2 import PdfReader | |
from huggingface_hub import login | |
from groq import AsyncGroq | |
from langchain.docstore.document import Document | |
# Load environment variables | |
load_dotenv() | |
HUGGING_API_KEY = os.getenv("HUGGING_API_KEY") | |
GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
if not HUGGING_API_KEY or not GROQ_API_KEY: | |
raise ValueError("API keys for HuggingFace or Groq are missing.") | |
# Configure Logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Authenticate with Hugging Face (for model downloads) | |
login(HUGGING_API_KEY) | |
# Load models and embeddings with a local embedding model | |
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", api_key=GROQ_API_KEY) | |
client = AsyncGroq(api_key=GROQ_API_KEY) | |
# Global state for PDF vector store | |
pdf_vector_store = None | |
current_pdf_path = None | |
# General Chat | |
async def chat_with_replit(message, history): | |
try: | |
messages = [{"role": "system", "content": "You are an assistant answering user questions."}] | |
for chat in history or []: | |
user_msg, assistant_msg = chat | |
messages.append({"role": "user", "content": user_msg}) | |
messages.append({"role": "assistant", "content": assistant_msg}) | |
messages.append({"role": "user", "content": message}) | |
response = await client.chat.completions.create( | |
messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False | |
) | |
return response.choices[0].message.content | |
except Exception as e: | |
logger.error(f"Chat error: {e}") | |
return "Error in chat response." | |
def chat_with_replit_sync(message, history): | |
return asyncio.run(chat_with_replit(message, history)) | |
# ArXiv Chat | |
async def chat_with_replit_arxiv(message, history, doi_num): | |
try: | |
loader = ArxivLoader(query=str(doi_num), load_max_docs=10) | |
documents = loader.load_and_split() | |
if not documents: | |
return "No documents found for the provided arXiv number." | |
metadata = documents[0].metadata | |
vector_store = Chroma.from_documents(documents, embedding_model) | |
results = vector_store.similarity_search(message, k=3) | |
relevant_content = "\n\n".join(doc.page_content for doc in results) | |
messages = [ | |
{"role": "user", "content": message}, | |
{"role": "system", "content": f"Answer based on this arXiv paper {doi_num}.\nMetadata: {metadata}.\nRelevant Content: {relevant_content}"} | |
] | |
response = await client.chat.completions.create( | |
messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False | |
) | |
return response.choices[0].message.content | |
except Exception as e: | |
logger.error(f"Error in chat with ArXiv PDF: {e}") | |
return "Error processing chat with arXiv paper." | |
def chat_with_replit_arxiv_sync(message, history, doi_num): | |
return asyncio.run(chat_with_replit_arxiv(message, history, doi_num)) | |
# Local PDF Chat | |
async def chat_with_replit_local_pdf(message, vector_store): | |
try: | |
if not vector_store: | |
return "Please upload a PDF first and wait for processing to complete." | |
results = vector_store.similarity_search(message, k=3) | |
relevant_content = "\n\n".join(doc.page_content for doc in results) | |
messages = [ | |
{"role": "user", "content": message}, | |
{"role": "system", "content": f"Answer based on the uploaded PDF.\nRelevant Content: {relevant_content}"} | |
] | |
response = await client.chat.completions.create( | |
messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False | |
) | |
return response.choices[0].message.content | |
except Exception as e: | |
logger.error(f"Error in chat with local PDF: {e}") | |
return "Error processing chat with local PDF." | |
def process_pdf(pdf_file): | |
global pdf_vector_store, current_pdf_path | |
try: | |
if pdf_file != current_pdf_path: | |
logger.info("Extracting text from PDF...") | |
reader = PdfReader(pdf_file) | |
text = "\n".join(page.extract_text() or "" for page in reader.pages) | |
if not text.strip(): | |
return "Could not extract text from PDF." | |
documents = [Document(page_content=text, metadata={"source": pdf_file})] | |
logger.info("Creating vector store...") | |
pdf_vector_store = Chroma.from_documents(documents, embedding_model) | |
current_pdf_path = pdf_file | |
return "PDF processed successfully. You can now ask questions." | |
return "PDF already processed. Ask away!" | |
except Exception as e: | |
logger.error(f"Error processing PDF: {e}") | |
return f"Error processing PDF: {str(e)}" | |
# Gradio UI | |
with gr.Blocks() as app: | |
with gr.Tab(label="General Chat"): | |
gr.Markdown("### Chat with the Assistant") | |
with gr.Row(): | |
general_chat_input = gr.Textbox(placeholder="Type your message here...", label="Your Message") | |
general_send_button = gr.Button("Send") | |
general_chat_output = gr.Markdown(label="Chat Output") | |
general_chat_history = gr.State([]) | |
def update_general_chat(user_message, history): | |
history = history or [] | |
history.append([user_message, ""]) | |
return history, history | |
def update_general_response(history): | |
user_message = history[-1][0] | |
response = chat_with_replit_sync(user_message, history[:-1]) | |
history[-1][1] = response | |
formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history]) | |
return history, formatted | |
general_send_button.click(update_general_chat, inputs=[general_chat_input, general_chat_history], | |
outputs=[general_chat_history, general_chat_output]) | |
general_send_button.click(update_general_response, inputs=general_chat_history, | |
outputs=[general_chat_history, general_chat_output]) | |
with gr.Tab(label="Chat with ArXiv Paper"): | |
gr.Markdown("### Ask Questions About an ArXiv Paper") | |
with gr.Row(): | |
arxiv_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question") | |
arxiv_doi = gr.Textbox(placeholder="Enter arXiv number, e.g. 2502.02523", label="ArXiv Number") | |
arxiv_send_button = gr.Button("Send") | |
arxiv_chat_output = gr.Markdown(label="Chat Output") | |
arxiv_chat_history = gr.State([]) | |
def update_arxiv_chat(user_message, history): | |
history = history or [] | |
history.append([user_message, ""]) | |
return history, history | |
def update_arxiv_response(history, doi_num): | |
user_message = history[-1][0] | |
response = chat_with_replit_arxiv_sync(user_message, history[:-1], doi_num) | |
history[-1][1] = response | |
formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history]) | |
return history, formatted | |
arxiv_send_button.click(update_arxiv_chat, inputs=[arxiv_input, arxiv_chat_history], | |
outputs=[arxiv_chat_history, arxiv_chat_output]) | |
arxiv_send_button.click(update_arxiv_response, inputs=[arxiv_chat_history, arxiv_doi], | |
outputs=[arxiv_chat_history, arxiv_chat_output]) | |
with gr.Tab(label="Chat with Local PDF"): | |
gr.Markdown("### Ask Questions About an Uploaded PDF") | |
pdf_file_input = gr.File(label="Upload PDF file", file_types=[".pdf"]) | |
pdf_status = gr.Textbox(label="PDF Processing Status", interactive=False) | |
with gr.Row(): | |
pdf_chat_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question") | |
pdf_send_button = gr.Button("Send") | |
pdf_chat_output = gr.Markdown(label="Chat Output") | |
pdf_chat_history = gr.State([]) | |
def update_pdf_chat(user_message, history): | |
history = history or [] | |
history.append([user_message, ""]) | |
return history, history | |
def update_pdf_response(history): | |
user_message = history[-1][0] | |
response = asyncio.run(chat_with_replit_local_pdf(user_message, pdf_vector_store)) | |
history[-1][1] = response | |
formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history]) | |
return history, formatted | |
pdf_file_input.change(process_pdf, inputs=pdf_file_input, outputs=pdf_status) | |
pdf_send_button.click(update_pdf_chat, inputs=[pdf_chat_input, pdf_chat_history], | |
outputs=[pdf_chat_history, pdf_chat_output]) | |
pdf_send_button.click(update_pdf_response, inputs=pdf_chat_history, | |
outputs=[pdf_chat_history, pdf_chat_output]) | |
app.launch() |