amiguel commited on
Commit
44f9878
Β·
verified Β·
1 Parent(s): 623e43b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -22
app.py CHANGED
@@ -7,10 +7,14 @@ from langchain.document_loaders import DataFrameLoader
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.embeddings import HuggingFaceEmbeddings
9
  from langchain.vectorstores import FAISS
10
- from langchain.chains import RetrievalQA
11
  from langchain import HuggingFacePipeline
12
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
13
 
 
 
 
 
14
  def preprocess_excel(file_path: str) -> pd.DataFrame:
15
  df_raw = pd.read_excel(file_path, sheet_name='Data Base', header=None)
16
  df = df_raw.iloc[4:].copy()
@@ -48,15 +52,20 @@ def create_qa_pipeline(vectorstore):
48
  llm = HuggingFacePipeline(pipeline=gen_pipeline)
49
 
50
  retriever = vectorstore.as_retriever()
51
- qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff", return_source_documents=False)
52
  return qa
53
 
 
54
  st.set_page_config(page_title="Excel-Aware RAG Chatbot", layout="wide")
55
  st.title("πŸ“Š Excel-Aware RAG Chatbot (Professional QA)")
56
 
57
  with st.sidebar:
58
  uploaded_file = st.file_uploader("Upload your Excel file (.xlsx or .xlsm with 'Data Base' sheet)", type=["xlsx", "xlsm"])
59
 
 
 
 
 
60
  if uploaded_file is not None:
61
  with st.spinner("Processing and indexing your Excel sheet..."):
62
  with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsm") as tmp_file:
@@ -68,28 +77,36 @@ if uploaded_file is not None:
68
  vectorstore = build_vectorstore_from_dataframe(cleaned_df)
69
  qa = create_qa_pipeline(vectorstore)
70
  st.success("βœ… File processed and chatbot ready! Ask your questions below.")
 
 
 
 
71
 
72
- if "chat_history" not in st.session_state:
73
- st.session_state.chat_history = []
 
74
 
75
- with st.chat_message("assistant"):
76
- st.markdown("How can I help you with the inspection data?")
77
 
78
- user_prompt = st.chat_input("Ask a question like 'How many backlog items are marked Yes?' or 'List overdue inspections'.")
 
 
79
 
80
- if user_prompt:
81
- st.chat_message("user").markdown(user_prompt)
82
- with st.chat_message("assistant"):
83
- with st.spinner("Thinking..."):
84
- try:
85
- answer = qa.run(user_prompt)
86
- st.markdown(f"**Answer:** {answer}")
87
- st.session_state.chat_history.append((user_prompt, answer))
88
- except Exception as e:
89
- st.error(f"Error: {e}")
90
- except Exception as e:
91
- st.error(f"Error processing file: {e}")
92
- finally:
93
- os.remove(tmp_path)
 
 
94
  else:
95
- st.info("Upload a file to get started.")
 
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.embeddings import HuggingFaceEmbeddings
9
  from langchain.vectorstores import FAISS
10
+ from langchain.chains import RetrievalQAWithSourcesChain
11
  from langchain import HuggingFacePipeline
12
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
13
 
14
+ # Custom avatars
15
+ USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
16
+ BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
17
+
18
  def preprocess_excel(file_path: str) -> pd.DataFrame:
19
  df_raw = pd.read_excel(file_path, sheet_name='Data Base', header=None)
20
  df = df_raw.iloc[4:].copy()
 
52
  llm = HuggingFacePipeline(pipeline=gen_pipeline)
53
 
54
  retriever = vectorstore.as_retriever()
55
+ qa = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=retriever)
56
  return qa
57
 
58
+ # Streamlit app layout
59
  st.set_page_config(page_title="Excel-Aware RAG Chatbot", layout="wide")
60
  st.title("πŸ“Š Excel-Aware RAG Chatbot (Professional QA)")
61
 
62
  with st.sidebar:
63
  uploaded_file = st.file_uploader("Upload your Excel file (.xlsx or .xlsm with 'Data Base' sheet)", type=["xlsx", "xlsm"])
64
 
65
+ # Persistent chat history
66
+ if "chat_history" not in st.session_state:
67
+ st.session_state.chat_history = []
68
+
69
  if uploaded_file is not None:
70
  with st.spinner("Processing and indexing your Excel sheet..."):
71
  with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsm") as tmp_file:
 
77
  vectorstore = build_vectorstore_from_dataframe(cleaned_df)
78
  qa = create_qa_pipeline(vectorstore)
79
  st.success("βœ… File processed and chatbot ready! Ask your questions below.")
80
+ except Exception as e:
81
+ st.error(f"❌ Error processing file: {e}")
82
+ finally:
83
+ os.remove(tmp_path)
84
 
85
+ # Show previous messages
86
+ for message in st.session_state.chat_history:
87
+ st.chat_message(message["role"], avatar=USER_AVATAR if message["role"] == "user" else BOT_AVATAR).markdown(message["content"])
88
 
89
+ user_prompt = st.chat_input("Ask about inspections, delays, backlogs...")
 
90
 
91
+ if user_prompt:
92
+ st.session_state.chat_history.append({"role": "user", "content": user_prompt})
93
+ st.chat_message("user", avatar=USER_AVATAR).markdown(user_prompt)
94
 
95
+ with st.chat_message("assistant", avatar=BOT_AVATAR):
96
+ with st.spinner("Searching and generating..."):
97
+ try:
98
+ response = qa.run(user_prompt)
99
+ final_response = response['answer']
100
+ placeholder = st.empty()
101
+ streamed = ""
102
+
103
+ for word in final_response.split():
104
+ streamed += word + " "
105
+ placeholder.markdown(streamed + "β–Œ")
106
+
107
+ placeholder.markdown(f"**{final_response.strip()}**")
108
+ st.session_state.chat_history.append({"role": "assistant", "content": final_response})
109
+ except Exception as e:
110
+ st.error(f"❌ Error: {e}")
111
  else:
112
+ st.info("Upload a file on the left to get started.")