gavinzli commited on
Commit
db83efb
·
1 Parent(s): ea7d339

Implement OAuth 2.0 authentication flow and session management; remove token.pickle file

Browse files
app/controllers/mail.py CHANGED
@@ -6,7 +6,7 @@ from datetime import datetime, timedelta
6
  from venv import logger
7
  from ics import Calendar
8
 
9
- import pandas as pd
10
  from langchain_core.documents import Document
11
  from langchain_community.document_loaders import (
12
  PyPDFLoader,
@@ -16,7 +16,7 @@ from langchain_community.document_loaders import (
16
  )
17
 
18
  from models.db import vectorstore
19
- from models.mails import build_gmail_service
20
 
21
  SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
22
  EMAIL_PATTERN = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
@@ -24,10 +24,8 @@ EMAIL_PATTERN = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
24
  ATTACHMENTS_DIR = "cache"
25
  os.makedirs(ATTACHMENTS_DIR, exist_ok=True)
26
 
27
- service = build_gmail_service()
28
-
29
-
30
- def search_emails(query):
31
  """Search emails based on a query."""
32
  result = service.users().messages().list(userId="me", q=query).execute()
33
  messages = []
@@ -43,13 +41,13 @@ def search_emails(query):
43
  return messages
44
 
45
 
46
- def list_emails(messages):
47
  """
48
  Processes a list of email messages, extracts metadata, decodes content, and handles attachments.
49
 
50
  Args:
51
  messages (list): A list of email message dictionaries, where each dictionary contains
52
- at least an 'id' key representing the email's unique identifier.
53
 
54
  Returns:
55
  None: The function processes the emails and adds the extracted documents to a vector store.
@@ -94,7 +92,7 @@ def list_emails(messages):
94
  )
95
  metadata["user_id"] = service.users().getProfile(userId="me").execute().get("emailAddress")
96
  metadata["msg_id"] = msg["id"]
97
- print(metadata, msg["payload"]["mimeType"])
98
  ids = []
99
  documents = []
100
  mime_types = []
@@ -199,7 +197,7 @@ def list_emails(messages):
199
  vectorstore.add_documents(documents=documents, ids=ids)
200
 
201
 
202
- def collect(query=(datetime.today() - timedelta(days=21)).strftime("after:%Y/%m/%d")):
203
  """
204
  Main function to search and list emails from Gmail.
205
 
@@ -211,47 +209,47 @@ def collect(query=(datetime.today() - timedelta(days=21)).strftime("after:%Y/%m/
211
  None
212
  """
213
  query = "subject:Re: Smartcareers algorithm debug and improvement'"
214
- emails = search_emails(query)
215
  if emails:
216
  print("Found %d emails:\n", len(emails))
217
  logger.info("Found %d emails after two_weeks_ago:\n", len(emails))
218
- list_emails(emails)
219
  logger.info("Listing emails...")
220
  return f"{len(emails)} emails added to the collection."
221
  else:
222
  logger.info("No emails found after two weeks ago.")
223
 
224
 
225
- def get_documents():
226
- """
227
- Main function to list emails from the database.
228
 
229
- This function lists all emails stored in the database.
230
 
231
- Returns:
232
- None
233
- """
234
- data = vectorstore.get()
235
- df = pd.DataFrame(
236
- {"ids": data["ids"], "documents": data["documents"], "metadatas": data["metadatas"]}
237
- )
238
- df.to_excel("collection_data.xlsx", index=False)
239
- df = pd.concat(
240
- [df.drop("metadatas", axis=1), df["metadatas"].apply(pd.Series)], axis=1
241
- ).to_excel("collection_data_expand.xlsx", index=False)
242
 
243
 
244
- def get():
245
- """
246
- Main function to list emails from the database.
247
 
248
- This function lists all emails stored in the database.
249
 
250
- Returns:
251
- None
252
- """
253
- data = vectorstore.get()
254
- df = pd.DataFrame(
255
- {"id": data["ids"], "documents": data["documents"], "metadatas": data["metadatas"]}
256
- )
257
- return df.to_dict(orient="records")
 
6
  from venv import logger
7
  from ics import Calendar
8
 
9
+ # import pandas as pd
10
  from langchain_core.documents import Document
11
  from langchain_community.document_loaders import (
12
  PyPDFLoader,
 
16
  )
17
 
18
  from models.db import vectorstore
19
+ # from models.mails import build_gmail_service
20
 
21
  SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
22
  EMAIL_PATTERN = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
 
24
  ATTACHMENTS_DIR = "cache"
25
  os.makedirs(ATTACHMENTS_DIR, exist_ok=True)
26
 
27
+ # service = build_gmail_service()
28
+ def search_emails(service, query):
 
 
29
  """Search emails based on a query."""
30
  result = service.users().messages().list(userId="me", q=query).execute()
31
  messages = []
 
41
  return messages
42
 
43
 
44
+ def list_emails(service, messages):
45
  """
46
  Processes a list of email messages, extracts metadata, decodes content, and handles attachments.
47
 
48
  Args:
49
  messages (list): A list of email message dictionaries, where each dictionary contains
50
+ at least an 'id' key representing the email's unique identifier.
51
 
52
  Returns:
53
  None: The function processes the emails and adds the extracted documents to a vector store.
 
92
  )
93
  metadata["user_id"] = service.users().getProfile(userId="me").execute().get("emailAddress")
94
  metadata["msg_id"] = msg["id"]
95
+ # print(metadata, msg["payload"]["mimeType"])
96
  ids = []
97
  documents = []
98
  mime_types = []
 
197
  vectorstore.add_documents(documents=documents, ids=ids)
198
 
199
 
200
+ def collect(service, query=(datetime.today() - timedelta(days=21)).strftime("after:%Y/%m/%d")):
201
  """
202
  Main function to search and list emails from Gmail.
203
 
 
209
  None
210
  """
211
  query = "subject:Re: Smartcareers algorithm debug and improvement'"
212
+ emails = search_emails(service, query)
213
  if emails:
214
  print("Found %d emails:\n", len(emails))
215
  logger.info("Found %d emails after two_weeks_ago:\n", len(emails))
216
+ list_emails(service, emails)
217
  logger.info("Listing emails...")
218
  return f"{len(emails)} emails added to the collection."
219
  else:
220
  logger.info("No emails found after two weeks ago.")
221
 
222
 
223
+ # def get_documents(self):
224
+ # """
225
+ # Main function to list emails from the database.
226
 
227
+ # This function lists all emails stored in the database.
228
 
229
+ # Returns:
230
+ # None
231
+ # """
232
+ # data = vectorstore.get()
233
+ # df = pd.DataFrame(
234
+ # {"ids": data["ids"], "documents": data["documents"], "metadatas": data["metadatas"]}
235
+ # )
236
+ # df.to_excel("collection_data.xlsx", index=False)
237
+ # df = pd.concat(
238
+ # [df.drop("metadatas", axis=1), df["metadatas"].apply(pd.Series)], axis=1
239
+ # ).to_excel("collection_data_expand.xlsx", index=False)
240
 
241
 
242
+ # def get(self):
243
+ # """
244
+ # Main function to list emails from the database.
245
 
246
+ # This function lists all emails stored in the database.
247
 
248
+ # Returns:
249
+ # None
250
+ # """
251
+ # data = vectorstore.get()
252
+ # df = pd.DataFrame(
253
+ # {"id": data["ids"], "documents": data["documents"], "metadatas": data["metadatas"]}
254
+ # )
255
+ # return df.to_dict(orient="records")
app/main.py CHANGED
@@ -1,14 +1,72 @@
1
  """Module to handle the main FastAPI application and its endpoints."""
2
  import logging
3
- from fastapi import FastAPI
 
4
  from fastapi.middleware.cors import CORSMiddleware
5
- from router import content, mail
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  app = FastAPI(docs_url="/")
9
 
10
  app.include_router(content.router)
11
  app.include_router(mail.router)
 
12
 
13
  origins = [
14
  "*"
@@ -21,11 +79,7 @@ app.add_middleware(
21
  allow_methods=["*"],
22
  allow_headers=["*"],
23
  )
24
-
25
- logging.basicConfig(
26
- format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
27
- logging.getLogger().setLevel(logging.ERROR)
28
-
29
 
30
  @app.get("/_health")
31
  def health():
 
1
  """Module to handle the main FastAPI application and its endpoints."""
2
  import logging
3
+
4
+ from fastapi import FastAPI, Request
5
  from fastapi.middleware.cors import CORSMiddleware
6
+ from jose import jwt
7
+ from router import auth, content, mail
8
+ from starlette.middleware.base import BaseHTTPMiddleware
9
+
10
+ SECRET_KEY = "your-secret-key"
11
+ ALGORITHM = "HS256"
12
+
13
+ class SessionMiddleware(BaseHTTPMiddleware):
14
+ """
15
+ Middleware to manage session data using JWT (JSON Web Tokens) stored in cookies.
16
+
17
+ This middleware intercepts incoming HTTP requests to extract session data from a
18
+ "session_token" cookie. If the cookie exists and contains a valid JWT, the session
19
+ data is decoded and attached to the request's state. If the cookie is missing or
20
+ invalid, an empty session is initialized.
21
 
22
+ After processing the request, the middleware encodes the updated session data into
23
+ a new JWT and sets it as a "session_token" cookie in the response.
24
+
25
+ Attributes:
26
+ SECRET_KEY (str): The secret key used to encode and decode the JWT.
27
+ ALGORITHM (str): The algorithm used for encoding and decoding the JWT.
28
+
29
+ Methods:
30
+ dispatch(request: Request, call_next): Intercepts the request to manage session
31
+ data and modifies the response to include the updated session token.
32
+
33
+ Cookie Parameters:
34
+ session_token (str): A JWT containing session data. This cookie is HTTP-only
35
+ and has a maximum age of 3600 seconds (1 hour).
36
+
37
+ Raises:
38
+ jwt.JWTError: If the session token cannot be decoded due to invalid signature
39
+ or other issues.
40
+ """
41
+ async def dispatch(self, request: Request, call_next):
42
+ session_token = request.cookies.get("session_token")
43
+ if session_token:
44
+ try:
45
+ session_data = jwt.decode(session_token, SECRET_KEY, algorithms=[ALGORITHM])
46
+ except jwt.JWTError:
47
+ session_data = {}
48
+ else:
49
+ session_data = {}
50
+ request.state.session = session_data
51
+ response = await call_next(request)
52
+ session_token = jwt.encode(request.state.session, SECRET_KEY, algorithm=ALGORITHM)
53
+ response.set_cookie(
54
+ key="session_token",
55
+ value=session_token,
56
+ httponly=True,
57
+ max_age=3600
58
+ )
59
+ return response
60
+
61
+ logging.basicConfig(
62
+ format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
63
+ logging.getLogger().setLevel(logging.ERROR)
64
 
65
  app = FastAPI(docs_url="/")
66
 
67
  app.include_router(content.router)
68
  app.include_router(mail.router)
69
+ app.include_router(auth.router)
70
 
71
  origins = [
72
  "*"
 
79
  allow_methods=["*"],
80
  allow_headers=["*"],
81
  )
82
+ app.add_middleware(SessionMiddleware)
 
 
 
 
83
 
84
  @app.get("/_health")
85
  def health():
app/router/auth.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Module for defining the main routes of the API."""
2
+ import os
3
+ import json
4
+ import pickle
5
+ from fastapi import APIRouter, Request
6
+ from fastapi.responses import JSONResponse
7
+ from google_auth_oauthlib.flow import InstalledAppFlow
8
+ from googleapiclient.discovery import build
9
+
10
+ router = APIRouter(tags=["auth"])
11
+
12
+ CLIENT_ID = os.environ.get("CLIENT_ID")
13
+ CLIENT_SECRET = os.environ.get("CLIENT_SECRET")
14
+ REDIRECT_URI = os.environ.get("REDIRECT_URI")
15
+
16
+ SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
17
+
18
+ # Client config for OAuth flow
19
+ CLIENT_CONFIG = {
20
+ "web": {
21
+ "client_id": CLIENT_ID,
22
+ "client_secret": CLIENT_SECRET,
23
+ "redirect_uris": [REDIRECT_URI],
24
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
25
+ "token_uri": "https://oauth2.googleapis.com/token",
26
+ }
27
+ }
28
+
29
+ @router.get("/auth/google/url")
30
+ async def get_auth_url():
31
+ """
32
+ Handles the generation of a Google OAuth 2.0 authorization URL.
33
+
34
+ This endpoint initializes an OAuth 2.0 flow using the provided client configuration
35
+ and scopes, sets the redirect URI, and generates an authorization URL for the user
36
+ to grant access.
37
+
38
+ Returns:
39
+ dict: A dictionary containing the generated authorization URL under the key "url".
40
+ """
41
+ flow = InstalledAppFlow.from_client_config(CLIENT_CONFIG, SCOPES)
42
+ flow.redirect_uri = REDIRECT_URI
43
+ auth_url, _ = flow.authorization_url(access_type="offline", prompt="consent")
44
+ return JSONResponse({"url": auth_url})
45
+
46
+ @router.get("/auth/google/callback")
47
+ async def google_callback(code: str, request: Request):
48
+ flow = InstalledAppFlow.from_client_config(CLIENT_CONFIG, SCOPES)
49
+ flow.redirect_uri = REDIRECT_URI
50
+ flow.fetch_token(code=code)
51
+ credentials = flow.credentials
52
+ request.state.session["credential"] = json.loads(credentials.to_json())
53
+ # cred_dict = (request.state.session.get("credential"))
54
+ # cred = Credentials(
55
+ # token=cred_dict["token"],
56
+ # refresh_token=cred_dict["refresh_token"],
57
+ # token_uri=cred_dict["token_uri"],
58
+ # client_id=cred_dict["client_id"],
59
+ # client_secret=cred_dict["client_secret"],
60
+ # scopes=cred_dict["scopes"],
61
+ # )
62
+ # service = build("gmail", "v1", credentials=Credentials(
63
+ # token=cred_dict["token"],
64
+ # refresh_token=cred_dict["refresh_token"],
65
+ # token_uri=cred_dict["token_uri"],
66
+ # client_id=cred_dict["client_id"],
67
+ # client_secret=cred_dict["client_secret"],
68
+ # scopes=cred_dict["scopes"],
69
+ # ))
70
+ service = build("gmail", "v1", credentials=credentials)
71
+ profile = service.users().getProfile(userId="me").execute()
72
+ print(({"profile": profile}))
73
+ with open(f"{profile['emailAddress']}.pickle", "wb") as token:
74
+ pickle.dump(credentials, token)
75
+ return JSONResponse(profile)
app/router/mail.py CHANGED
@@ -1,12 +1,17 @@
1
  """Module for defining the main routes of the API."""
2
- from fastapi import APIRouter
 
 
3
  from fastapi.responses import JSONResponse
 
4
  from controllers import mail
 
 
5
 
6
  router = APIRouter(prefix="/mail", tags=["mail"])
7
 
8
  @router.post("")
9
- def collect():
10
  """
11
  Handles the chat POST request.
12
 
@@ -16,19 +21,34 @@ def collect():
16
  Returns:
17
  str: The generated response from the chat function.
18
  """
19
- mail.collect()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  return JSONResponse(content={"message": "Mail collected successfully."})
21
 
22
- @router.get("")
23
- def get():
24
- """
25
- Handles the chat POST request.
26
 
27
- Args:
28
- query (ReqData): The request data containing the query parameters.
29
 
30
- Returns:
31
- str: The generated response from the chat function.
32
- """
33
- result = mail.get()
34
- return JSONResponse(content= result)
 
1
  """Module for defining the main routes of the API."""
2
+ import os
3
+ import pickle
4
+ from fastapi import APIRouter, Request
5
  from fastapi.responses import JSONResponse
6
+
7
  from controllers import mail
8
+ from google.oauth2.credentials import Credentials
9
+ from googleapiclient.discovery import build
10
 
11
  router = APIRouter(prefix="/mail", tags=["mail"])
12
 
13
  @router.post("")
14
+ def collect(emailAddress: str, request: Request):
15
  """
16
  Handles the chat POST request.
17
 
 
21
  Returns:
22
  str: The generated response from the chat function.
23
  """
24
+ # emailAddress = req['emailAddress']
25
+ if os.path.exists(f"{emailAddress}.pickle"):
26
+ with open(f"{emailAddress}.pickle", "rb") as token:
27
+ credentials = pickle.load(token)
28
+ else:
29
+ cred_dict = request.state.session.get("credential")
30
+ credentials = Credentials(
31
+ token=cred_dict["token"],
32
+ refresh_token=cred_dict["refresh_token"],
33
+ token_uri=cred_dict["token_uri"],
34
+ client_id=cred_dict["client_id"],
35
+ client_secret=cred_dict["client_secret"],
36
+ scopes=cred_dict["scopes"],
37
+ )
38
+ mailservice = build("gmail", "v1", credentials=credentials)
39
+ mail.collect(mailservice)
40
  return JSONResponse(content={"message": "Mail collected successfully."})
41
 
42
+ # @router.get("")
43
+ # def get():
44
+ # """
45
+ # Handles the chat POST request.
46
 
47
+ # Args:
48
+ # query (ReqData): The request data containing the query parameters.
49
 
50
+ # Returns:
51
+ # str: The generated response from the chat function.
52
+ # """
53
+ # # result = mail.get()
54
+ # return JSONResponse(content= result)
app/token.pickle DELETED
Binary file (1.01 kB)