File size: 9,433 Bytes
0bfd27d
 
 
9ca2091
0bfd27d
ddcc450
 
 
0bfd27d
 
 
42866ce
 
0bfd27d
 
 
 
 
 
 
42866ce
0bfd27d
 
 
 
 
ddcc450
0bfd27d
 
ddcc450
 
0bfd27d
 
 
42866ce
 
 
 
ddcc450
0bfd27d
 
 
9ca2091
6f98b16
 
 
0bfd27d
6f98b16
42866ce
0bfd27d
6f98b16
0bfd27d
 
6f98b16
0bfd27d
9ca2091
 
 
ddcc450
9ca2091
0bfd27d
 
 
9ca2091
 
0bfd27d
 
42866ce
 
0bfd27d
 
42866ce
0bfd27d
 
42866ce
0bfd27d
 
 
42866ce
9ca2091
 
 
 
 
42866ce
 
9ca2091
42866ce
 
 
 
9ca2091
 
42866ce
9ca2091
 
42866ce
9ca2091
 
 
 
 
 
42866ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ca2091
42866ce
0bfd27d
9ca2091
6f98b16
 
9ca2091
 
42866ce
9ca2091
 
 
6f98b16
 
 
 
9ca2091
6f98b16
 
 
 
 
 
9ca2091
42866ce
9ca2091
42866ce
9ca2091
 
 
 
 
 
 
42866ce
9ca2091
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42866ce
9ca2091
42866ce
9ca2091
 
 
42866ce
 
9ca2091
 
 
42866ce
9ca2091
 
 
 
 
 
 
42866ce
9ca2091
42866ce
9ca2091
 
 
 
42866ce
9ca2091
42866ce
 
 
0bfd27d
ddcc450
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
import os
import logging
import gradio as gr
import asyncio
from dotenv import load_dotenv
from langchain_community.document_loaders import ArxivLoader  # Updated import
from langchain_community.vectorstores import Chroma  # Updated import
from langchain_huggingface import HuggingFaceEmbeddings  # Updated import
from langchain_groq import ChatGroq
from PyPDF2 import PdfReader
from huggingface_hub import login
from groq import AsyncGroq
from langchain.docstore.document import Document

# Load environment variables
load_dotenv()
HUGGING_API_KEY = os.getenv("HUGGING_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

if not HUGGING_API_KEY or not GROQ_API_KEY:
    raise ValueError("API keys for HuggingFace or Groq are missing.")

# Configure Logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Authenticate with Hugging Face (for model downloads)
login(HUGGING_API_KEY)

# Load models and embeddings with a local embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", api_key=GROQ_API_KEY)
client = AsyncGroq(api_key=GROQ_API_KEY)

# Global state for PDF vector store
pdf_vector_store = None
current_pdf_path = None

# General Chat
async def chat_with_replit(message, history):
    try:
        messages = [{"role": "system", "content": "You are an assistant answering user questions."}]
        for chat in history or []:
            user_msg, assistant_msg = chat
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": assistant_msg})
        messages.append({"role": "user", "content": message})
        response = await client.chat.completions.create(
            messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False
        )
        return response.choices[0].message.content
    except Exception as e:
        logger.error(f"Chat error: {e}")
        return "Error in chat response."

def chat_with_replit_sync(message, history):
    return asyncio.run(chat_with_replit(message, history))

# ArXiv Chat
async def chat_with_replit_arxiv(message, history, doi_num):
    try:
        loader = ArxivLoader(query=str(doi_num), load_max_docs=10)
        documents = loader.load_and_split()
        if not documents:
            return "No documents found for the provided arXiv number."
        metadata = documents[0].metadata
        vector_store = Chroma.from_documents(documents, embedding_model)
        results = vector_store.similarity_search(message, k=3)
        relevant_content = "\n\n".join(doc.page_content for doc in results)
        messages = [
            {"role": "user", "content": message},
            {"role": "system", "content": f"Answer based on this arXiv paper {doi_num}.\nMetadata: {metadata}.\nRelevant Content: {relevant_content}"}
        ]
        response = await client.chat.completions.create(
            messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False
        )
        return response.choices[0].message.content
    except Exception as e:
        logger.error(f"Error in chat with ArXiv PDF: {e}")
        return "Error processing chat with arXiv paper."

def chat_with_replit_arxiv_sync(message, history, doi_num):
    return asyncio.run(chat_with_replit_arxiv(message, history, doi_num))

# Local PDF Chat
async def chat_with_replit_local_pdf(message, vector_store):
    try:
        if not vector_store:
            return "Please upload a PDF first and wait for processing to complete."
        results = vector_store.similarity_search(message, k=3)
        relevant_content = "\n\n".join(doc.page_content for doc in results)
        messages = [
            {"role": "user", "content": message},
            {"role": "system", "content": f"Answer based on the uploaded PDF.\nRelevant Content: {relevant_content}"}
        ]
        response = await client.chat.completions.create(
            messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False
        )
        return response.choices[0].message.content
    except Exception as e:
        logger.error(f"Error in chat with local PDF: {e}")
        return "Error processing chat with local PDF."

def process_pdf(pdf_file):
    global pdf_vector_store, current_pdf_path
    try:
        if pdf_file != current_pdf_path:
            logger.info("Extracting text from PDF...")
            reader = PdfReader(pdf_file)
            text = "\n".join(page.extract_text() or "" for page in reader.pages)
            if not text.strip():
                return "Could not extract text from PDF."
            documents = [Document(page_content=text, metadata={"source": pdf_file})]
            logger.info("Creating vector store...")
            pdf_vector_store = Chroma.from_documents(documents, embedding_model)
            current_pdf_path = pdf_file
            return "PDF processed successfully. You can now ask questions."
        return "PDF already processed. Ask away!"
    except Exception as e:
        logger.error(f"Error processing PDF: {e}")
        return f"Error processing PDF: {str(e)}"

# Gradio UI
with gr.Blocks() as app:
    with gr.Tab(label="General Chat"):
        gr.Markdown("### Chat with the Assistant")
        with gr.Row():
            general_chat_input = gr.Textbox(placeholder="Type your message here...", label="Your Message")
            general_send_button = gr.Button("Send")
        general_chat_output = gr.Markdown(label="Chat Output")
        general_chat_history = gr.State([])

        def update_general_chat(user_message, history):
            history = history or []
            history.append([user_message, ""])
            return history, history

        def update_general_response(history):
            user_message = history[-1][0]
            response = chat_with_replit_sync(user_message, history[:-1])
            history[-1][1] = response
            formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
            return history, formatted

        general_send_button.click(update_general_chat, inputs=[general_chat_input, general_chat_history],
                                  outputs=[general_chat_history, general_chat_output])
        general_send_button.click(update_general_response, inputs=general_chat_history,
                                  outputs=[general_chat_history, general_chat_output])

    with gr.Tab(label="Chat with ArXiv Paper"):
        gr.Markdown("### Ask Questions About an ArXiv Paper")
        with gr.Row():
            arxiv_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
            arxiv_doi = gr.Textbox(placeholder="Enter arXiv number, e.g. 2502.02523", label="ArXiv Number")
            arxiv_send_button = gr.Button("Send")
        arxiv_chat_output = gr.Markdown(label="Chat Output")
        arxiv_chat_history = gr.State([])

        def update_arxiv_chat(user_message, history):
            history = history or []
            history.append([user_message, ""])
            return history, history

        def update_arxiv_response(history, doi_num):
            user_message = history[-1][0]
            response = chat_with_replit_arxiv_sync(user_message, history[:-1], doi_num)
            history[-1][1] = response
            formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
            return history, formatted

        arxiv_send_button.click(update_arxiv_chat, inputs=[arxiv_input, arxiv_chat_history],
                                outputs=[arxiv_chat_history, arxiv_chat_output])
        arxiv_send_button.click(update_arxiv_response, inputs=[arxiv_chat_history, arxiv_doi],
                                outputs=[arxiv_chat_history, arxiv_chat_output])

    with gr.Tab(label="Chat with Local PDF"):
        gr.Markdown("### Ask Questions About an Uploaded PDF")
        pdf_file_input = gr.File(label="Upload PDF file", file_types=[".pdf"])
        pdf_status = gr.Textbox(label="PDF Processing Status", interactive=False)
        with gr.Row():
            pdf_chat_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
            pdf_send_button = gr.Button("Send")
        pdf_chat_output = gr.Markdown(label="Chat Output")
        pdf_chat_history = gr.State([])

        def update_pdf_chat(user_message, history):
            history = history or []
            history.append([user_message, ""])
            return history, history

        def update_pdf_response(history):
            user_message = history[-1][0]
            response = asyncio.run(chat_with_replit_local_pdf(user_message, pdf_vector_store))
            history[-1][1] = response
            formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
            return history, formatted

        pdf_file_input.change(process_pdf, inputs=pdf_file_input, outputs=pdf_status)
        pdf_send_button.click(update_pdf_chat, inputs=[pdf_chat_input, pdf_chat_history],
                              outputs=[pdf_chat_history, pdf_chat_output])
        pdf_send_button.click(update_pdf_response, inputs=pdf_chat_history,
                              outputs=[pdf_chat_history, pdf_chat_output])

app.launch()