Upload app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ import os
|
|
5 |
import json
|
6 |
from pathlib import Path
|
7 |
|
8 |
-
from langchain.document_loaders import
|
9 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
10 |
from langchain.embeddings import HuggingFaceEmbeddings
|
11 |
from langchain.vectorstores import FAISS
|
@@ -35,22 +35,24 @@ def preprocess_excel(file_path: str) -> pd.DataFrame:
|
|
35 |
df.dropna(how='all', inplace=True)
|
36 |
df.dropna(axis=1, how='all', inplace=True)
|
37 |
df.reset_index(drop=True, inplace=True)
|
|
|
38 |
return df
|
39 |
|
40 |
-
def
|
41 |
df.fillna("", inplace=True)
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
51 |
|
52 |
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
|
53 |
-
split_docs = splitter.split_documents(
|
54 |
|
55 |
embeddings = HuggingFaceEmbeddings(
|
56 |
model_name="sentence-transformers/all-MiniLM-l6-v2",
|
@@ -71,7 +73,7 @@ def create_qa_pipeline(vectorstore):
|
|
71 |
return qa
|
72 |
|
73 |
st.set_page_config(page_title="Excel-Aware RAG Chatbot", layout="wide")
|
74 |
-
st.title("π Excel-Aware RAG Chatbot (
|
75 |
|
76 |
with st.sidebar:
|
77 |
uploaded_file = st.file_uploader("Upload your Excel file (.xlsx or .xlsm with 'Data Base' sheet)", type=["xlsx", "xlsm"])
|
@@ -91,8 +93,8 @@ if uploaded_file is not None:
|
|
91 |
tmp_path = tmp_file.name
|
92 |
|
93 |
try:
|
94 |
-
|
95 |
-
vectorstore =
|
96 |
qa = create_qa_pipeline(vectorstore)
|
97 |
st.success("β
File processed and chatbot ready! Ask your questions below.")
|
98 |
except Exception as e:
|
@@ -103,14 +105,14 @@ if uploaded_file is not None:
|
|
103 |
for message in st.session_state.chat_history:
|
104 |
st.chat_message(message["role"], avatar=USER_AVATAR if message["role"] == "user" else BOT_AVATAR).markdown(message["content"])
|
105 |
|
106 |
-
user_prompt = st.chat_input("Ask about
|
107 |
|
108 |
if user_prompt:
|
109 |
st.session_state.chat_history.append({"role": "user", "content": user_prompt})
|
110 |
st.chat_message("user", avatar=USER_AVATAR).markdown(user_prompt)
|
111 |
|
112 |
with st.chat_message("assistant", avatar=BOT_AVATAR):
|
113 |
-
with st.spinner("
|
114 |
try:
|
115 |
response = qa.invoke({"question": user_prompt})
|
116 |
final_response = response['answer']
|
|
|
5 |
import json
|
6 |
from pathlib import Path
|
7 |
|
8 |
+
from langchain.document_loaders import Document
|
9 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
10 |
from langchain.embeddings import HuggingFaceEmbeddings
|
11 |
from langchain.vectorstores import FAISS
|
|
|
35 |
df.dropna(how='all', inplace=True)
|
36 |
df.dropna(axis=1, how='all', inplace=True)
|
37 |
df.reset_index(drop=True, inplace=True)
|
38 |
+
df.columns = df.columns.astype(str)
|
39 |
return df
|
40 |
|
41 |
+
def build_vectorstore_from_structured_records(df: pd.DataFrame):
|
42 |
df.fillna("", inplace=True)
|
43 |
+
records = []
|
44 |
+
for i, row in df.iterrows():
|
45 |
+
item_class = str(row.get("Item Class", "")).strip()
|
46 |
+
job_done = str(row.get("Job Done", "")).strip()
|
47 |
+
backlog = str(row.get("Backlog?", "")).strip()
|
48 |
+
days = str(row.get("Days in Backlog", "")).strip()
|
49 |
+
if not any([item_class, job_done, backlog, days]):
|
50 |
+
continue
|
51 |
+
sentence = f"Item Class {item_class} has status {job_done}, is in {backlog} backlog, and has {days} days."
|
52 |
+
records.append(Document(page_content=sentence, metadata={"source": f"Row {i+1}"}))
|
53 |
|
54 |
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
|
55 |
+
split_docs = splitter.split_documents(records)
|
56 |
|
57 |
embeddings = HuggingFaceEmbeddings(
|
58 |
model_name="sentence-transformers/all-MiniLM-l6-v2",
|
|
|
73 |
return qa
|
74 |
|
75 |
st.set_page_config(page_title="Excel-Aware RAG Chatbot", layout="wide")
|
76 |
+
st.title("π Excel-Aware RAG Chatbot (Structured QA)")
|
77 |
|
78 |
with st.sidebar:
|
79 |
uploaded_file = st.file_uploader("Upload your Excel file (.xlsx or .xlsm with 'Data Base' sheet)", type=["xlsx", "xlsm"])
|
|
|
93 |
tmp_path = tmp_file.name
|
94 |
|
95 |
try:
|
96 |
+
df = preprocess_excel(tmp_path)
|
97 |
+
vectorstore = build_vectorstore_from_structured_records(df)
|
98 |
qa = create_qa_pipeline(vectorstore)
|
99 |
st.success("β
File processed and chatbot ready! Ask your questions below.")
|
100 |
except Exception as e:
|
|
|
105 |
for message in st.session_state.chat_history:
|
106 |
st.chat_message(message["role"], avatar=USER_AVATAR if message["role"] == "user" else BOT_AVATAR).markdown(message["content"])
|
107 |
|
108 |
+
user_prompt = st.chat_input("Ask about item classes, backlog, or status...")
|
109 |
|
110 |
if user_prompt:
|
111 |
st.session_state.chat_history.append({"role": "user", "content": user_prompt})
|
112 |
st.chat_message("user", avatar=USER_AVATAR).markdown(user_prompt)
|
113 |
|
114 |
with st.chat_message("assistant", avatar=BOT_AVATAR):
|
115 |
+
with st.spinner("Thinking..."):
|
116 |
try:
|
117 |
response = qa.invoke({"question": user_prompt})
|
118 |
final_response = response['answer']
|