amiguel commited on
Commit
03bf821
·
verified ·
1 Parent(s): af7a4c4

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -1
app.py CHANGED
@@ -40,10 +40,20 @@ def preprocess_excel(file_path: str) -> pd.DataFrame:
40
  def build_vectorstore_from_dataframe(df: pd.DataFrame):
41
  df.fillna("", inplace=True)
42
  df['combined_text'] = df.apply(lambda row: ' | '.join([str(cell) for cell in row]), axis=1)
43
- docs_loader = DataFrameLoader(df[['combined_text']], page_content_column='combined_text')
 
 
 
 
 
44
  documents = docs_loader.load()
 
 
 
 
45
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
46
  split_docs = splitter.split_documents(documents)
 
47
  embeddings = HuggingFaceEmbeddings(
48
  model_name="sentence-transformers/all-MiniLM-l6-v2",
49
  model_kwargs={"device": "cpu"},
 
40
  def build_vectorstore_from_dataframe(df: pd.DataFrame):
41
  df.fillna("", inplace=True)
42
  df['combined_text'] = df.apply(lambda row: ' | '.join([str(cell) for cell in row]), axis=1)
43
+
44
+ docs_loader = DataFrameLoader(
45
+ df[['combined_text']].rename(columns={"combined_text": "text"}),
46
+ page_content_column="text",
47
+ metadata_columns=["combined_text"]
48
+ )
49
  documents = docs_loader.load()
50
+
51
+ for i, doc in enumerate(documents):
52
+ doc.metadata["source"] = f"Row {i+1}"
53
+
54
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
55
  split_docs = splitter.split_documents(documents)
56
+
57
  embeddings = HuggingFaceEmbeddings(
58
  model_name="sentence-transformers/all-MiniLM-l6-v2",
59
  model_kwargs={"device": "cpu"},