l-tran commited on
Commit
560a395
·
verified ·
1 Parent(s): aed4dc5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -0
app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from sentence_transformers import SentenceTransformer, util
4
+ from groq import Groq
5
+ from PyPDF2 import PdfReader
6
+ from docx import Document
7
+ from pptx import Presentation
8
+
9
+ # CSS styling for a professional look with black background
10
+ st.markdown("""
11
+ <style>
12
+ body {
13
+ background-color: #121212;
14
+ color: #ffffff;
15
+ font-family: Arial, sans-serif;
16
+ }
17
+ .title {
18
+ font-size: 36px;
19
+ font-weight: bold;
20
+ color: #e67e22;
21
+ text-align: center;
22
+ margin-bottom: 20px;
23
+ }
24
+ .subheader {
25
+ font-size: 24px;
26
+ color: #f39c12;
27
+ margin-top: 10px;
28
+ text-align: center;
29
+ }
30
+ .input-area {
31
+ color: #ecf0f1;
32
+ font-size: 16px;
33
+ }
34
+ .about-app {
35
+ margin-top: 20px;
36
+ padding: 15px;
37
+ background-color: #1e1e1e;
38
+ border-radius: 8px;
39
+ color: #bdc3c7;
40
+ }
41
+ .footer {
42
+ background-color: #1c1c1c;
43
+ color: #bdc3c7;
44
+ font-size: 14px;
45
+ text-align: center;
46
+ padding: 10px;
47
+ position: fixed;
48
+ bottom: 0;
49
+ left: 0;
50
+ width: 100%;
51
+ z-index: 1000;
52
+ }
53
+ .stTextInput > div > div > input {
54
+ background-color: #2c3e50;
55
+ color: #ecf0f1;
56
+ font-size: 16px;
57
+ border-radius: 5px;
58
+ padding: 10px;
59
+ }
60
+ </style>
61
+ """, unsafe_allow_html=True)
62
+
63
+ # Initialize retriever and Groq client
64
+ retriever = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
65
+ api_key = os.getenv("GROQ_API_KEY")
66
+
67
+ client = Groq(api_key=api_key)
68
+
69
+ # Knowledge base (documents) and embeddings
70
+ documents = [
71
+ "Retrieval-Augmented Generation (RAG) is an AI framework that combines the strengths of retrieval-based and generative models.",
72
+ "The main components of a RAG system are the retriever and the generator.",
73
+ "A key benefit of Retrieval-Augmented Generation is that it can produce more accurate responses compared to standalone generative models.",
74
+ "The retrieval process in a RAG system often relies on embedding-based models, like Sentence-BERT or DPR.",
75
+ "Common use cases of RAG include chatbots, customer support systems, and knowledge retrieval for business intelligence."
76
+ ]
77
+ document_embeddings = retriever.encode(documents, convert_to_tensor=True)
78
+
79
+ def retrieve(query, top_k=1):
80
+ query_embedding = retriever.encode(query, convert_to_tensor=True)
81
+ hits = util.semantic_search(query_embedding, document_embeddings, top_k=top_k)
82
+ top_docs = [documents[hit['corpus_id']] for hit in hits[0]]
83
+ return top_docs[0] if hits[0] else None
84
+
85
+ def generate_response(query, context):
86
+ response = client.chat.completions.create(
87
+ messages=[{
88
+ "role": "user",
89
+ "content": f"Context: {context} Question: {query} Answer:"
90
+ }],
91
+ model="gemma2-9b-it"
92
+ )
93
+ return response.choices[0].message.content
94
+
95
+ # Streamlit app layout
96
+ st.markdown('<div class="title">DocumentsReader</div>', unsafe_allow_html=True)
97
+ # About the App section
98
+ with st.expander("About App"):
99
+ st.write("""
100
+ ### About the App: Document-Based RAG Question Answering
101
+ This application, developed by **Hamaad Ayub Khan**, combines state-of-the-art **Retrieval-Augmented Generation (RAG)** technology with powerful AI models to answer questions based on the content of uploaded documents.
102
+ **Key Features:**
103
+ - Advanced Retrieval System
104
+ - Generative Answering Capability
105
+ - Multi-format Document Support
106
+ - Seamless Knowledge Base Update
107
+ - Contextually Rich Answers
108
+ **Developer Information:** Hamaad Ayub Khan created this application with a commitment to making information retrieval simple, accurate, and accessible.
109
+ **Social Links:**
110
+ - [GitHub](https://github.com/hakgs1234)
111
+ - [LinkedIn](https://linkedin.com/in/hamaadayubkhan)
112
+ """)
113
+
114
+ # Document upload and knowledge base update
115
+ uploaded_file = st.file_uploader("Upload a document", type=["pdf", "docx", "pptx", "txt"])
116
+ if uploaded_file:
117
+ if uploaded_file.type == "application/pdf":
118
+ file_text = PdfReader(uploaded_file).extract_text()
119
+ elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
120
+ file_text = "\n".join([para.text for para in Document(uploaded_file).paragraphs])
121
+ elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
122
+ file_text = "\n".join([shape.text for slide in Presentation(uploaded_file).slides for shape in slide.shapes if hasattr(shape, "text")])
123
+ elif uploaded_file.type == "text/plain":
124
+ file_text = uploaded_file.read().decode("utf-8")
125
+
126
+ documents.append(file_text)
127
+ document_embeddings = retriever.encode(documents, convert_to_tensor=True)
128
+ st.success("Document content successfully added to the knowledge base.")
129
+
130
+ # Question input and output handling
131
+ question = st.text_input("Enter your question:")
132
+
133
+ # Check if there is a question and display the answer above the input field
134
+ if question:
135
+ retrieved_context = retrieve(question)
136
+ answer = generate_response(question, retrieved_context) if retrieved_context else "I'm unable to find relevant information in the knowledge base."
137
+
138
+ # Display the answer above the input field
139
+ st.markdown("### Answer:")
140
+ st.write(answer)