Spaces:
Running
Running
Enhance email processing: add check for existing emails, improve logging, and update requirements
Browse files- app/controllers/mail.py +7 -4
- app/main.py +1 -1
- app/requirements.txt +1 -0
app/controllers/mail.py
CHANGED
@@ -68,9 +68,9 @@ def list_emails(service, messages):
|
|
68 |
- Deletes temporary files created during attachment processing.
|
69 |
|
70 |
Notes:
|
71 |
-
- The function assumes the existence of a global `service` object for Gmail API
|
72 |
- The `vectorstore.add_documents` method is used to store the processed documents.
|
73 |
-
- Attachments are temporarily saved in
|
74 |
- The function logs information about attachments being downloaded.
|
75 |
"""
|
76 |
ids = []
|
@@ -78,6 +78,9 @@ def list_emails(service, messages):
|
|
78 |
for message in messages:
|
79 |
msg = service.users().messages().get(userId="me", id=message["id"], format="full").execute()
|
80 |
metadata = {}
|
|
|
|
|
|
|
81 |
for header in msg["payload"]["headers"]:
|
82 |
if header["name"] == "From":
|
83 |
metadata["from"] = header["value"]
|
@@ -85,7 +88,7 @@ def list_emails(service, messages):
|
|
85 |
metadata["to"] = header["value"]
|
86 |
elif header["name"] == "Subject":
|
87 |
metadata["subject"] = header["value"]
|
88 |
-
|
89 |
elif header["name"] == "Cc":
|
90 |
metadata["cc"] = header["value"]
|
91 |
metadata["date"] = datetime.fromtimestamp(int(msg["internalDate"]) / 1000).strftime(
|
@@ -150,7 +153,7 @@ def list_emails(service, messages):
|
|
150 |
for event in calendar.events:
|
151 |
documents.append(
|
152 |
Document(
|
153 |
-
page_content=f"Event: {event.name}\
|
154 |
metadata={
|
155 |
"attachment": part["filename"],
|
156 |
"mimeType": part["mimeType"],
|
|
|
68 |
- Deletes temporary files created during attachment processing.
|
69 |
|
70 |
Notes:
|
71 |
+
- The function assumes the existence of a global `service` object for Gmail API.
|
72 |
- The `vectorstore.add_documents` method is used to store the processed documents.
|
73 |
+
- Attachments are temporarily saved in `ATTACHMENTS_DIR` and deleted after processing.
|
74 |
- The function logs information about attachments being downloaded.
|
75 |
"""
|
76 |
ids = []
|
|
|
78 |
for message in messages:
|
79 |
msg = service.users().messages().get(userId="me", id=message["id"], format="full").execute()
|
80 |
metadata = {}
|
81 |
+
if vectorstore.docstore.contains(msg["id"]):
|
82 |
+
logger.info("Email already exists in the database.")
|
83 |
+
continue
|
84 |
for header in msg["payload"]["headers"]:
|
85 |
if header["name"] == "From":
|
86 |
metadata["from"] = header["value"]
|
|
|
88 |
metadata["to"] = header["value"]
|
89 |
elif header["name"] == "Subject":
|
90 |
metadata["subject"] = header["value"]
|
91 |
+
logger.info("subject: %s", metadata["subject"])
|
92 |
elif header["name"] == "Cc":
|
93 |
metadata["cc"] = header["value"]
|
94 |
metadata["date"] = datetime.fromtimestamp(int(msg["internalDate"]) / 1000).strftime(
|
|
|
153 |
for event in calendar.events:
|
154 |
documents.append(
|
155 |
Document(
|
156 |
+
page_content=f"Event: {event.name}\n\Description: {event.description}\nStart: {event.begin}\nEnd: {event.end}",
|
157 |
metadata={
|
158 |
"attachment": part["filename"],
|
159 |
"mimeType": part["mimeType"],
|
app/main.py
CHANGED
@@ -60,7 +60,7 @@ class SessionMiddleware(BaseHTTPMiddleware):
|
|
60 |
|
61 |
logging.basicConfig(
|
62 |
format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
|
63 |
-
logging.getLogger().setLevel(logging.
|
64 |
|
65 |
app = FastAPI(docs_url="/")
|
66 |
|
|
|
60 |
|
61 |
logging.basicConfig(
|
62 |
format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
|
63 |
+
logging.getLogger().setLevel(logging.INFO)
|
64 |
|
65 |
app = FastAPI(docs_url="/")
|
66 |
|
app/requirements.txt
CHANGED
@@ -117,6 +117,7 @@ orjson==3.10.15
|
|
117 |
overrides==7.7.0
|
118 |
packaging==24.2
|
119 |
pandas==2.2.3
|
|
|
120 |
pdfminer.six==20250327
|
121 |
pi_heif==0.22.0
|
122 |
pillow==11.1.0
|
|
|
117 |
overrides==7.7.0
|
118 |
packaging==24.2
|
119 |
pandas==2.2.3
|
120 |
+
pdf2image==1.17.0
|
121 |
pdfminer.six==20250327
|
122 |
pi_heif==0.22.0
|
123 |
pillow==11.1.0
|