Spaces:
Running
Running
Refactor email processing: improve logging for email retrieval and remove commented-out code
Browse files- app/controllers/mail.py +2 -37
app/controllers/mail.py
CHANGED
@@ -78,7 +78,7 @@ def list_emails(service, messages):
|
|
78 |
for message in messages:
|
79 |
msg = service.users().messages().get(userId="me", id=message["id"], format="full").execute()
|
80 |
metadata = {}
|
81 |
-
if
|
82 |
logger.info("Email already exists in the database.")
|
83 |
continue
|
84 |
for header in msg["payload"]["headers"]:
|
@@ -215,45 +215,10 @@ def collect(service, query=(datetime.today() - timedelta(days=10)).strftime("aft
|
|
215 |
# query = "subject:Re: Smartcareers algorithm debug and improvement'"
|
216 |
emails = search_emails(service, query)
|
217 |
if emails:
|
218 |
-
|
219 |
logger.info("Found %d emails after two_weeks_ago:\n", len(emails))
|
220 |
list_emails(service, emails)
|
221 |
logger.info("Listing emails...")
|
222 |
return f"{len(emails)} emails added to the collection."
|
223 |
else:
|
224 |
logger.info("No emails found after two weeks ago.")
|
225 |
-
|
226 |
-
|
227 |
-
# def get_documents(self):
|
228 |
-
# """
|
229 |
-
# Main function to list emails from the database.
|
230 |
-
|
231 |
-
# This function lists all emails stored in the database.
|
232 |
-
|
233 |
-
# Returns:
|
234 |
-
# None
|
235 |
-
# """
|
236 |
-
# data = vectorstore.get()
|
237 |
-
# df = pd.DataFrame(
|
238 |
-
# {"ids": data["ids"], "documents": data["documents"], "metadatas": data["metadatas"]}
|
239 |
-
# )
|
240 |
-
# df.to_excel("collection_data.xlsx", index=False)
|
241 |
-
# df = pd.concat(
|
242 |
-
# [df.drop("metadatas", axis=1), df["metadatas"].apply(pd.Series)], axis=1
|
243 |
-
# ).to_excel("collection_data_expand.xlsx", index=False)
|
244 |
-
|
245 |
-
|
246 |
-
# def get(self):
|
247 |
-
# """
|
248 |
-
# Main function to list emails from the database.
|
249 |
-
|
250 |
-
# This function lists all emails stored in the database.
|
251 |
-
|
252 |
-
# Returns:
|
253 |
-
# None
|
254 |
-
# """
|
255 |
-
# data = vectorstore.get()
|
256 |
-
# df = pd.DataFrame(
|
257 |
-
# {"id": data["ids"], "documents": data["documents"], "metadatas": data["metadatas"]}
|
258 |
-
# )
|
259 |
-
# return df.to_dict(orient="records")
|
|
|
78 |
for message in messages:
|
79 |
msg = service.users().messages().get(userId="me", id=message["id"], format="full").execute()
|
80 |
metadata = {}
|
81 |
+
if msg["id"] in vectorstore.index_to_docstore_id:
|
82 |
logger.info("Email already exists in the database.")
|
83 |
continue
|
84 |
for header in msg["payload"]["headers"]:
|
|
|
215 |
# query = "subject:Re: Smartcareers algorithm debug and improvement'"
|
216 |
emails = search_emails(service, query)
|
217 |
if emails:
|
218 |
+
logger.info("Found %d emails:\n", len(emails))
|
219 |
logger.info("Found %d emails after two_weeks_ago:\n", len(emails))
|
220 |
list_emails(service, emails)
|
221 |
logger.info("Listing emails...")
|
222 |
return f"{len(emails)} emails added to the collection."
|
223 |
else:
|
224 |
logger.info("No emails found after two weeks ago.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|