Upload app.py
Browse files
app.py
CHANGED
@@ -40,10 +40,20 @@ def preprocess_excel(file_path: str) -> pd.DataFrame:
|
|
40 |
def build_vectorstore_from_dataframe(df: pd.DataFrame):
|
41 |
df.fillna("", inplace=True)
|
42 |
df['combined_text'] = df.apply(lambda row: ' | '.join([str(cell) for cell in row]), axis=1)
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
44 |
documents = docs_loader.load()
|
|
|
|
|
|
|
|
|
45 |
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
|
46 |
split_docs = splitter.split_documents(documents)
|
|
|
47 |
embeddings = HuggingFaceEmbeddings(
|
48 |
model_name="sentence-transformers/all-MiniLM-l6-v2",
|
49 |
model_kwargs={"device": "cpu"},
|
|
|
40 |
def build_vectorstore_from_dataframe(df: pd.DataFrame):
|
41 |
df.fillna("", inplace=True)
|
42 |
df['combined_text'] = df.apply(lambda row: ' | '.join([str(cell) for cell in row]), axis=1)
|
43 |
+
|
44 |
+
docs_loader = DataFrameLoader(
|
45 |
+
df[['combined_text']].rename(columns={"combined_text": "text"}),
|
46 |
+
page_content_column="text",
|
47 |
+
metadata_columns=["combined_text"]
|
48 |
+
)
|
49 |
documents = docs_loader.load()
|
50 |
+
|
51 |
+
for i, doc in enumerate(documents):
|
52 |
+
doc.metadata["source"] = f"Row {i+1}"
|
53 |
+
|
54 |
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
|
55 |
split_docs = splitter.split_documents(documents)
|
56 |
+
|
57 |
embeddings = HuggingFaceEmbeddings(
|
58 |
model_name="sentence-transformers/all-MiniLM-l6-v2",
|
59 |
model_kwargs={"device": "cpu"},
|