danielle2003 commited on
Commit
c1c7ce6
·
verified ·
1 Parent(s): 725149f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ import os
5
+ import google.generativeai as genai
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.prompts import PromptTemplate
8
+ from langchain.chains.question_answering import load_qa_chain
9
+ from dotenv import load_dotenv
10
+
11
+ load_dotenv()
12
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
13
+
14
+ def get_pdf_text(pdf_docs):
15
+ text = ""
16
+ pdf_reader = PdfReader(pdf_docs)
17
+ for page in pdf_reader.pages:
18
+ text += page.extract_text()
19
+ return text
20
+
21
+ def get_text_chunks(text):
22
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
23
+ chunks = text_splitter.split_text(text)
24
+ return chunks
25
+
26
+ def get_vector_store(text_chunks):
27
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
28
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
29
+ vector_store = FAISS.from_texts(text_chunks, embeddings)
30
+ vector_store.save_local("faiss_index")
31
+
32
+ def get_conversational_chain():
33
+ from langchain_google_genai import ChatGoogleGenerativeAI
34
+ prompt_template = """
35
+ Answer the question as detailed as possible from the provided context and make sure to provide all the details.
36
+ If the answer is not present in the provided context, just say "Answer is not available in context". Do not provide
37
+ the wrong answer.
38
+ Context:\n{context}?\n
39
+ Question:\n{question}\n
40
+
41
+ Answer:
42
+ """
43
+ model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
44
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
45
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
46
+ return chain
47
+
48
+ def user_input(user_question):
49
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
50
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
51
+ new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
52
+ docs = new_db.similarity_search(user_question)
53
+ chain = get_conversational_chain()
54
+ response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
55
+ st.write("Reply:", response["output_text"])
56
+
57
+ def main():
58
+ st.set_page_config(page_title="Chat with PDF")
59
+ st.header("Chat with PDF using Gemini AI")
60
+
61
+ user_question = st.text_input("Ask a question about the PDF file")
62
+ if user_question:
63
+ user_input(user_question)
64
+ with st.sidebar:
65
+ st.title("Menu")
66
+ pdf_docs = st.file_uploader("Upload your PDF file here")
67
+ if st.button("Submit & Process"):
68
+ if pdf_docs:
69
+ with st.spinner("Processing..."):
70
+ raw_text = get_pdf_text(pdf_docs)
71
+ text_chunks = get_text_chunks(raw_text)
72
+ get_vector_store(text_chunks)
73
+ st.success("Processing complete")
74
+ else:
75
+ st.error("Please upload a PDF file")
76
+
77
+ if __name__ == "__main__":
78
+ main()