gavinzli commited on
Commit
0254a24
·
1 Parent(s): 9cec6f9

Enhance email processing: add check for existing emails, improve logging, and update requirements

Browse files
Files changed (3) hide show
  1. app/controllers/mail.py +7 -4
  2. app/main.py +1 -1
  3. app/requirements.txt +1 -0
app/controllers/mail.py CHANGED
@@ -68,9 +68,9 @@ def list_emails(service, messages):
68
  - Deletes temporary files created during attachment processing.
69
 
70
  Notes:
71
- - The function assumes the existence of a global `service` object for Gmail API interactions.
72
  - The `vectorstore.add_documents` method is used to store the processed documents.
73
- - Attachments are temporarily saved in a directory specified by `ATTACHMENTS_DIR` and deleted after processing.
74
  - The function logs information about attachments being downloaded.
75
  """
76
  ids = []
@@ -78,6 +78,9 @@ def list_emails(service, messages):
78
  for message in messages:
79
  msg = service.users().messages().get(userId="me", id=message["id"], format="full").execute()
80
  metadata = {}
 
 
 
81
  for header in msg["payload"]["headers"]:
82
  if header["name"] == "From":
83
  metadata["from"] = header["value"]
@@ -85,7 +88,7 @@ def list_emails(service, messages):
85
  metadata["to"] = header["value"]
86
  elif header["name"] == "Subject":
87
  metadata["subject"] = header["value"]
88
- print(f"subject: {metadata['subject']}")
89
  elif header["name"] == "Cc":
90
  metadata["cc"] = header["value"]
91
  metadata["date"] = datetime.fromtimestamp(int(msg["internalDate"]) / 1000).strftime(
@@ -150,7 +153,7 @@ def list_emails(service, messages):
150
  for event in calendar.events:
151
  documents.append(
152
  Document(
153
- page_content=f"Event: {event.name}\nDescription: {event.description}\nStart: {event.begin}\nEnd: {event.end}",
154
  metadata={
155
  "attachment": part["filename"],
156
  "mimeType": part["mimeType"],
 
68
  - Deletes temporary files created during attachment processing.
69
 
70
  Notes:
71
+ - The function assumes the existence of a global `service` object for Gmail API.
72
  - The `vectorstore.add_documents` method is used to store the processed documents.
73
+ - Attachments are temporarily saved in `ATTACHMENTS_DIR` and deleted after processing.
74
  - The function logs information about attachments being downloaded.
75
  """
76
  ids = []
 
78
  for message in messages:
79
  msg = service.users().messages().get(userId="me", id=message["id"], format="full").execute()
80
  metadata = {}
81
+ if vectorstore.docstore.contains(msg["id"]):
82
+ logger.info("Email already exists in the database.")
83
+ continue
84
  for header in msg["payload"]["headers"]:
85
  if header["name"] == "From":
86
  metadata["from"] = header["value"]
 
88
  metadata["to"] = header["value"]
89
  elif header["name"] == "Subject":
90
  metadata["subject"] = header["value"]
91
+ logger.info("subject: %s", metadata["subject"])
92
  elif header["name"] == "Cc":
93
  metadata["cc"] = header["value"]
94
  metadata["date"] = datetime.fromtimestamp(int(msg["internalDate"]) / 1000).strftime(
 
153
  for event in calendar.events:
154
  documents.append(
155
  Document(
156
+ page_content=f"Event: {event.name}\n\Description: {event.description}\nStart: {event.begin}\nEnd: {event.end}",
157
  metadata={
158
  "attachment": part["filename"],
159
  "mimeType": part["mimeType"],
app/main.py CHANGED
@@ -60,7 +60,7 @@ class SessionMiddleware(BaseHTTPMiddleware):
60
 
61
  logging.basicConfig(
62
  format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
63
- logging.getLogger().setLevel(logging.ERROR)
64
 
65
  app = FastAPI(docs_url="/")
66
 
 
60
 
61
  logging.basicConfig(
62
  format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
63
+ logging.getLogger().setLevel(logging.INFO)
64
 
65
  app = FastAPI(docs_url="/")
66
 
app/requirements.txt CHANGED
@@ -117,6 +117,7 @@ orjson==3.10.15
117
  overrides==7.7.0
118
  packaging==24.2
119
  pandas==2.2.3
 
120
  pdfminer.six==20250327
121
  pi_heif==0.22.0
122
  pillow==11.1.0
 
117
  overrides==7.7.0
118
  packaging==24.2
119
  pandas==2.2.3
120
+ pdf2image==1.17.0
121
  pdfminer.six==20250327
122
  pi_heif==0.22.0
123
  pillow==11.1.0