amiguel commited on
Commit
d7c0178
Β·
verified Β·
1 Parent(s): 2bdefb3

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -17
app.py CHANGED
@@ -5,7 +5,7 @@ import os
5
  import json
6
  from pathlib import Path
7
 
8
- from langchain.document_loaders import DataFrameLoader
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
  from langchain.embeddings import HuggingFaceEmbeddings
11
  from langchain.vectorstores import FAISS
@@ -35,22 +35,24 @@ def preprocess_excel(file_path: str) -> pd.DataFrame:
35
  df.dropna(how='all', inplace=True)
36
  df.dropna(axis=1, how='all', inplace=True)
37
  df.reset_index(drop=True, inplace=True)
 
38
  return df
39
 
40
- def build_vectorstore_from_dataframe(df: pd.DataFrame):
41
  df.fillna("", inplace=True)
42
- df['combined_text'] = df.apply(lambda row: ' | '.join([str(cell) for cell in row]), axis=1)
43
-
44
-
45
- docs_loader = DataFrameLoader(df[['combined_text']], page_content_column='combined_text')
46
- documents = docs_loader.load()
47
-
48
- for i, doc in enumerate(documents):
49
- doc.metadata["source"] = f"Row {i+1}"
50
-
 
51
 
52
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
53
- split_docs = splitter.split_documents(documents)
54
 
55
  embeddings = HuggingFaceEmbeddings(
56
  model_name="sentence-transformers/all-MiniLM-l6-v2",
@@ -71,7 +73,7 @@ def create_qa_pipeline(vectorstore):
71
  return qa
72
 
73
  st.set_page_config(page_title="Excel-Aware RAG Chatbot", layout="wide")
74
- st.title("πŸ“Š Excel-Aware RAG Chatbot (Professional QA)")
75
 
76
  with st.sidebar:
77
  uploaded_file = st.file_uploader("Upload your Excel file (.xlsx or .xlsm with 'Data Base' sheet)", type=["xlsx", "xlsm"])
@@ -91,8 +93,8 @@ if uploaded_file is not None:
91
  tmp_path = tmp_file.name
92
 
93
  try:
94
- cleaned_df = preprocess_excel(tmp_path)
95
- vectorstore = build_vectorstore_from_dataframe(cleaned_df)
96
  qa = create_qa_pipeline(vectorstore)
97
  st.success("βœ… File processed and chatbot ready! Ask your questions below.")
98
  except Exception as e:
@@ -103,14 +105,14 @@ if uploaded_file is not None:
103
  for message in st.session_state.chat_history:
104
  st.chat_message(message["role"], avatar=USER_AVATAR if message["role"] == "user" else BOT_AVATAR).markdown(message["content"])
105
 
106
- user_prompt = st.chat_input("Ask about inspections, delays, backlog...")
107
 
108
  if user_prompt:
109
  st.session_state.chat_history.append({"role": "user", "content": user_prompt})
110
  st.chat_message("user", avatar=USER_AVATAR).markdown(user_prompt)
111
 
112
  with st.chat_message("assistant", avatar=BOT_AVATAR):
113
- with st.spinner("Searching and generating..."):
114
  try:
115
  response = qa.invoke({"question": user_prompt})
116
  final_response = response['answer']
 
5
  import json
6
  from pathlib import Path
7
 
8
+ from langchain.document_loaders import Document
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
  from langchain.embeddings import HuggingFaceEmbeddings
11
  from langchain.vectorstores import FAISS
 
35
  df.dropna(how='all', inplace=True)
36
  df.dropna(axis=1, how='all', inplace=True)
37
  df.reset_index(drop=True, inplace=True)
38
+ df.columns = df.columns.astype(str)
39
  return df
40
 
41
+ def build_vectorstore_from_structured_records(df: pd.DataFrame):
42
  df.fillna("", inplace=True)
43
+ records = []
44
+ for i, row in df.iterrows():
45
+ item_class = str(row.get("Item Class", "")).strip()
46
+ job_done = str(row.get("Job Done", "")).strip()
47
+ backlog = str(row.get("Backlog?", "")).strip()
48
+ days = str(row.get("Days in Backlog", "")).strip()
49
+ if not any([item_class, job_done, backlog, days]):
50
+ continue
51
+ sentence = f"Item Class {item_class} has status {job_done}, is in {backlog} backlog, and has {days} days."
52
+ records.append(Document(page_content=sentence, metadata={"source": f"Row {i+1}"}))
53
 
54
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
55
+ split_docs = splitter.split_documents(records)
56
 
57
  embeddings = HuggingFaceEmbeddings(
58
  model_name="sentence-transformers/all-MiniLM-l6-v2",
 
73
  return qa
74
 
75
  st.set_page_config(page_title="Excel-Aware RAG Chatbot", layout="wide")
76
+ st.title("πŸ“Š Excel-Aware RAG Chatbot (Structured QA)")
77
 
78
  with st.sidebar:
79
  uploaded_file = st.file_uploader("Upload your Excel file (.xlsx or .xlsm with 'Data Base' sheet)", type=["xlsx", "xlsm"])
 
93
  tmp_path = tmp_file.name
94
 
95
  try:
96
+ df = preprocess_excel(tmp_path)
97
+ vectorstore = build_vectorstore_from_structured_records(df)
98
  qa = create_qa_pipeline(vectorstore)
99
  st.success("βœ… File processed and chatbot ready! Ask your questions below.")
100
  except Exception as e:
 
105
  for message in st.session_state.chat_history:
106
  st.chat_message(message["role"], avatar=USER_AVATAR if message["role"] == "user" else BOT_AVATAR).markdown(message["content"])
107
 
108
+ user_prompt = st.chat_input("Ask about item classes, backlog, or status...")
109
 
110
  if user_prompt:
111
  st.session_state.chat_history.append({"role": "user", "content": user_prompt})
112
  st.chat_message("user", avatar=USER_AVATAR).markdown(user_prompt)
113
 
114
  with st.chat_message("assistant", avatar=BOT_AVATAR):
115
+ with st.spinner("Thinking..."):
116
  try:
117
  response = qa.invoke({"question": user_prompt})
118
  final_response = response['answer']