Spaces:

tdurzynski
/

chat-with-your-data

Running

App Files Files Community

chat-with-your-data / app.py

tdurzynski

Update app.py

ddcc450 verified 2 months ago

raw

history blame contribute delete

9.43 kB

	import os
	import logging
	import gradio as gr
	import asyncio
	from dotenv import load_dotenv
	from langchain_community.document_loaders import ArxivLoader # Updated import
	from langchain_community.vectorstores import Chroma # Updated import
	from langchain_huggingface import HuggingFaceEmbeddings # Updated import
	from langchain_groq import ChatGroq
	from PyPDF2 import PdfReader
	from huggingface_hub import login
	from groq import AsyncGroq
	from langchain.docstore.document import Document

	# Load environment variables
	load_dotenv()
	HUGGING_API_KEY = os.getenv("HUGGING_API_KEY")
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")

	if not HUGGING_API_KEY or not GROQ_API_KEY:
	raise ValueError("API keys for HuggingFace or Groq are missing.")

	# Configure Logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Authenticate with Hugging Face (for model downloads)
	login(HUGGING_API_KEY)

	# Load models and embeddings with a local embedding model
	embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", api_key=GROQ_API_KEY)
	client = AsyncGroq(api_key=GROQ_API_KEY)

	# Global state for PDF vector store
	pdf_vector_store = None
	current_pdf_path = None

	# General Chat
	async def chat_with_replit(message, history):
	try:
	messages = [{"role": "system", "content": "You are an assistant answering user questions."}]
	for chat in history or []:
	user_msg, assistant_msg = chat
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": assistant_msg})
	messages.append({"role": "user", "content": message})
	response = await client.chat.completions.create(
	messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False
	)
	return response.choices[0].message.content
	except Exception as e:
	logger.error(f"Chat error: {e}")
	return "Error in chat response."

	def chat_with_replit_sync(message, history):
	return asyncio.run(chat_with_replit(message, history))

	# ArXiv Chat
	async def chat_with_replit_arxiv(message, history, doi_num):
	try:
	loader = ArxivLoader(query=str(doi_num), load_max_docs=10)
	documents = loader.load_and_split()
	if not documents:
	return "No documents found for the provided arXiv number."
	metadata = documents[0].metadata
	vector_store = Chroma.from_documents(documents, embedding_model)
	results = vector_store.similarity_search(message, k=3)
	relevant_content = "\n\n".join(doc.page_content for doc in results)
	messages = [
	{"role": "user", "content": message},
	{"role": "system", "content": f"Answer based on this arXiv paper {doi_num}.\nMetadata: {metadata}.\nRelevant Content: {relevant_content}"}
	]
	response = await client.chat.completions.create(
	messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False
	)
	return response.choices[0].message.content
	except Exception as e:
	logger.error(f"Error in chat with ArXiv PDF: {e}")
	return "Error processing chat with arXiv paper."

	def chat_with_replit_arxiv_sync(message, history, doi_num):
	return asyncio.run(chat_with_replit_arxiv(message, history, doi_num))

	# Local PDF Chat
	async def chat_with_replit_local_pdf(message, vector_store):
	try:
	if not vector_store:
	return "Please upload a PDF first and wait for processing to complete."
	results = vector_store.similarity_search(message, k=3)
	relevant_content = "\n\n".join(doc.page_content for doc in results)
	messages = [
	{"role": "user", "content": message},
	{"role": "system", "content": f"Answer based on the uploaded PDF.\nRelevant Content: {relevant_content}"}
	]
	response = await client.chat.completions.create(
	messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False
	)
	return response.choices[0].message.content
	except Exception as e:
	logger.error(f"Error in chat with local PDF: {e}")
	return "Error processing chat with local PDF."

	def process_pdf(pdf_file):
	global pdf_vector_store, current_pdf_path
	try:
	if pdf_file != current_pdf_path:
	logger.info("Extracting text from PDF...")
	reader = PdfReader(pdf_file)
	text = "\n".join(page.extract_text() or "" for page in reader.pages)
	if not text.strip():
	return "Could not extract text from PDF."
	documents = [Document(page_content=text, metadata={"source": pdf_file})]
	logger.info("Creating vector store...")
	pdf_vector_store = Chroma.from_documents(documents, embedding_model)
	current_pdf_path = pdf_file
	return "PDF processed successfully. You can now ask questions."
	return "PDF already processed. Ask away!"
	except Exception as e:
	logger.error(f"Error processing PDF: {e}")
	return f"Error processing PDF: {str(e)}"

	# Gradio UI
	with gr.Blocks() as app:
	with gr.Tab(label="General Chat"):
	gr.Markdown("### Chat with the Assistant")
	with gr.Row():
	general_chat_input = gr.Textbox(placeholder="Type your message here...", label="Your Message")
	general_send_button = gr.Button("Send")
	general_chat_output = gr.Markdown(label="Chat Output")
	general_chat_history = gr.State([])

	def update_general_chat(user_message, history):
	history = history or []
	history.append([user_message, ""])
	return history, history

	def update_general_response(history):
	user_message = history[-1][0]
	response = chat_with_replit_sync(user_message, history[:-1])
	history[-1][1] = response
	formatted = "\n\n".join([f"User: {u}\n\nAssistant: {a}" for u, a in history])
	return history, formatted

	general_send_button.click(update_general_chat, inputs=[general_chat_input, general_chat_history],
	outputs=[general_chat_history, general_chat_output])
	general_send_button.click(update_general_response, inputs=general_chat_history,
	outputs=[general_chat_history, general_chat_output])

	with gr.Tab(label="Chat with ArXiv Paper"):
	gr.Markdown("### Ask Questions About an ArXiv Paper")
	with gr.Row():
	arxiv_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
	arxiv_doi = gr.Textbox(placeholder="Enter arXiv number, e.g. 2502.02523", label="ArXiv Number")
	arxiv_send_button = gr.Button("Send")
	arxiv_chat_output = gr.Markdown(label="Chat Output")
	arxiv_chat_history = gr.State([])

	def update_arxiv_chat(user_message, history):
	history = history or []
	history.append([user_message, ""])
	return history, history

	def update_arxiv_response(history, doi_num):
	user_message = history[-1][0]
	response = chat_with_replit_arxiv_sync(user_message, history[:-1], doi_num)
	history[-1][1] = response
	formatted = "\n\n".join([f"User: {u}\n\nAssistant: {a}" for u, a in history])
	return history, formatted

	arxiv_send_button.click(update_arxiv_chat, inputs=[arxiv_input, arxiv_chat_history],
	outputs=[arxiv_chat_history, arxiv_chat_output])
	arxiv_send_button.click(update_arxiv_response, inputs=[arxiv_chat_history, arxiv_doi],
	outputs=[arxiv_chat_history, arxiv_chat_output])

	with gr.Tab(label="Chat with Local PDF"):
	gr.Markdown("### Ask Questions About an Uploaded PDF")
	pdf_file_input = gr.File(label="Upload PDF file", file_types=[".pdf"])
	pdf_status = gr.Textbox(label="PDF Processing Status", interactive=False)
	with gr.Row():
	pdf_chat_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
	pdf_send_button = gr.Button("Send")
	pdf_chat_output = gr.Markdown(label="Chat Output")
	pdf_chat_history = gr.State([])

	def update_pdf_chat(user_message, history):
	history = history or []
	history.append([user_message, ""])
	return history, history

	def update_pdf_response(history):
	user_message = history[-1][0]
	response = asyncio.run(chat_with_replit_local_pdf(user_message, pdf_vector_store))
	history[-1][1] = response
	formatted = "\n\n".join([f"User: {u}\n\nAssistant: {a}" for u, a in history])
	return history, formatted

	pdf_file_input.change(process_pdf, inputs=pdf_file_input, outputs=pdf_status)
	pdf_send_button.click(update_pdf_chat, inputs=[pdf_chat_input, pdf_chat_history],
	outputs=[pdf_chat_history, pdf_chat_output])
	pdf_send_button.click(update_pdf_response, inputs=pdf_chat_history,
	outputs=[pdf_chat_history, pdf_chat_output])

	app.launch()