Spaces:
Running
Running
Implement OAuth 2.0 authentication flow and session management; remove token.pickle file
Browse files- app/controllers/mail.py +37 -39
- app/main.py +61 -7
- app/router/auth.py +75 -0
- app/router/mail.py +34 -14
- app/token.pickle +0 -0
app/controllers/mail.py
CHANGED
@@ -6,7 +6,7 @@ from datetime import datetime, timedelta
|
|
6 |
from venv import logger
|
7 |
from ics import Calendar
|
8 |
|
9 |
-
import pandas as pd
|
10 |
from langchain_core.documents import Document
|
11 |
from langchain_community.document_loaders import (
|
12 |
PyPDFLoader,
|
@@ -16,7 +16,7 @@ from langchain_community.document_loaders import (
|
|
16 |
)
|
17 |
|
18 |
from models.db import vectorstore
|
19 |
-
from models.mails import build_gmail_service
|
20 |
|
21 |
SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
|
22 |
EMAIL_PATTERN = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
|
@@ -24,10 +24,8 @@ EMAIL_PATTERN = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
|
|
24 |
ATTACHMENTS_DIR = "cache"
|
25 |
os.makedirs(ATTACHMENTS_DIR, exist_ok=True)
|
26 |
|
27 |
-
service = build_gmail_service()
|
28 |
-
|
29 |
-
|
30 |
-
def search_emails(query):
|
31 |
"""Search emails based on a query."""
|
32 |
result = service.users().messages().list(userId="me", q=query).execute()
|
33 |
messages = []
|
@@ -43,13 +41,13 @@ def search_emails(query):
|
|
43 |
return messages
|
44 |
|
45 |
|
46 |
-
def list_emails(messages):
|
47 |
"""
|
48 |
Processes a list of email messages, extracts metadata, decodes content, and handles attachments.
|
49 |
|
50 |
Args:
|
51 |
messages (list): A list of email message dictionaries, where each dictionary contains
|
52 |
-
|
53 |
|
54 |
Returns:
|
55 |
None: The function processes the emails and adds the extracted documents to a vector store.
|
@@ -94,7 +92,7 @@ def list_emails(messages):
|
|
94 |
)
|
95 |
metadata["user_id"] = service.users().getProfile(userId="me").execute().get("emailAddress")
|
96 |
metadata["msg_id"] = msg["id"]
|
97 |
-
print(metadata, msg["payload"]["mimeType"])
|
98 |
ids = []
|
99 |
documents = []
|
100 |
mime_types = []
|
@@ -199,7 +197,7 @@ def list_emails(messages):
|
|
199 |
vectorstore.add_documents(documents=documents, ids=ids)
|
200 |
|
201 |
|
202 |
-
def collect(query=(datetime.today() - timedelta(days=21)).strftime("after:%Y/%m/%d")):
|
203 |
"""
|
204 |
Main function to search and list emails from Gmail.
|
205 |
|
@@ -211,47 +209,47 @@ def collect(query=(datetime.today() - timedelta(days=21)).strftime("after:%Y/%m/
|
|
211 |
None
|
212 |
"""
|
213 |
query = "subject:Re: Smartcareers algorithm debug and improvement'"
|
214 |
-
emails = search_emails(query)
|
215 |
if emails:
|
216 |
print("Found %d emails:\n", len(emails))
|
217 |
logger.info("Found %d emails after two_weeks_ago:\n", len(emails))
|
218 |
-
list_emails(emails)
|
219 |
logger.info("Listing emails...")
|
220 |
return f"{len(emails)} emails added to the collection."
|
221 |
else:
|
222 |
logger.info("No emails found after two weeks ago.")
|
223 |
|
224 |
|
225 |
-
def get_documents():
|
226 |
-
|
227 |
-
|
228 |
|
229 |
-
|
230 |
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
|
243 |
|
244 |
-
def get():
|
245 |
-
|
246 |
-
|
247 |
|
248 |
-
|
249 |
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
|
|
6 |
from venv import logger
|
7 |
from ics import Calendar
|
8 |
|
9 |
+
# import pandas as pd
|
10 |
from langchain_core.documents import Document
|
11 |
from langchain_community.document_loaders import (
|
12 |
PyPDFLoader,
|
|
|
16 |
)
|
17 |
|
18 |
from models.db import vectorstore
|
19 |
+
# from models.mails import build_gmail_service
|
20 |
|
21 |
SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
|
22 |
EMAIL_PATTERN = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
|
|
|
24 |
ATTACHMENTS_DIR = "cache"
|
25 |
os.makedirs(ATTACHMENTS_DIR, exist_ok=True)
|
26 |
|
27 |
+
# service = build_gmail_service()
|
28 |
+
def search_emails(service, query):
|
|
|
|
|
29 |
"""Search emails based on a query."""
|
30 |
result = service.users().messages().list(userId="me", q=query).execute()
|
31 |
messages = []
|
|
|
41 |
return messages
|
42 |
|
43 |
|
44 |
+
def list_emails(service, messages):
|
45 |
"""
|
46 |
Processes a list of email messages, extracts metadata, decodes content, and handles attachments.
|
47 |
|
48 |
Args:
|
49 |
messages (list): A list of email message dictionaries, where each dictionary contains
|
50 |
+
at least an 'id' key representing the email's unique identifier.
|
51 |
|
52 |
Returns:
|
53 |
None: The function processes the emails and adds the extracted documents to a vector store.
|
|
|
92 |
)
|
93 |
metadata["user_id"] = service.users().getProfile(userId="me").execute().get("emailAddress")
|
94 |
metadata["msg_id"] = msg["id"]
|
95 |
+
# print(metadata, msg["payload"]["mimeType"])
|
96 |
ids = []
|
97 |
documents = []
|
98 |
mime_types = []
|
|
|
197 |
vectorstore.add_documents(documents=documents, ids=ids)
|
198 |
|
199 |
|
200 |
+
def collect(service, query=(datetime.today() - timedelta(days=21)).strftime("after:%Y/%m/%d")):
|
201 |
"""
|
202 |
Main function to search and list emails from Gmail.
|
203 |
|
|
|
209 |
None
|
210 |
"""
|
211 |
query = "subject:Re: Smartcareers algorithm debug and improvement'"
|
212 |
+
emails = search_emails(service, query)
|
213 |
if emails:
|
214 |
print("Found %d emails:\n", len(emails))
|
215 |
logger.info("Found %d emails after two_weeks_ago:\n", len(emails))
|
216 |
+
list_emails(service, emails)
|
217 |
logger.info("Listing emails...")
|
218 |
return f"{len(emails)} emails added to the collection."
|
219 |
else:
|
220 |
logger.info("No emails found after two weeks ago.")
|
221 |
|
222 |
|
223 |
+
# def get_documents(self):
|
224 |
+
# """
|
225 |
+
# Main function to list emails from the database.
|
226 |
|
227 |
+
# This function lists all emails stored in the database.
|
228 |
|
229 |
+
# Returns:
|
230 |
+
# None
|
231 |
+
# """
|
232 |
+
# data = vectorstore.get()
|
233 |
+
# df = pd.DataFrame(
|
234 |
+
# {"ids": data["ids"], "documents": data["documents"], "metadatas": data["metadatas"]}
|
235 |
+
# )
|
236 |
+
# df.to_excel("collection_data.xlsx", index=False)
|
237 |
+
# df = pd.concat(
|
238 |
+
# [df.drop("metadatas", axis=1), df["metadatas"].apply(pd.Series)], axis=1
|
239 |
+
# ).to_excel("collection_data_expand.xlsx", index=False)
|
240 |
|
241 |
|
242 |
+
# def get(self):
|
243 |
+
# """
|
244 |
+
# Main function to list emails from the database.
|
245 |
|
246 |
+
# This function lists all emails stored in the database.
|
247 |
|
248 |
+
# Returns:
|
249 |
+
# None
|
250 |
+
# """
|
251 |
+
# data = vectorstore.get()
|
252 |
+
# df = pd.DataFrame(
|
253 |
+
# {"id": data["ids"], "documents": data["documents"], "metadatas": data["metadatas"]}
|
254 |
+
# )
|
255 |
+
# return df.to_dict(orient="records")
|
app/main.py
CHANGED
@@ -1,14 +1,72 @@
|
|
1 |
"""Module to handle the main FastAPI application and its endpoints."""
|
2 |
import logging
|
3 |
-
|
|
|
4 |
from fastapi.middleware.cors import CORSMiddleware
|
5 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
app = FastAPI(docs_url="/")
|
9 |
|
10 |
app.include_router(content.router)
|
11 |
app.include_router(mail.router)
|
|
|
12 |
|
13 |
origins = [
|
14 |
"*"
|
@@ -21,11 +79,7 @@ app.add_middleware(
|
|
21 |
allow_methods=["*"],
|
22 |
allow_headers=["*"],
|
23 |
)
|
24 |
-
|
25 |
-
logging.basicConfig(
|
26 |
-
format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
|
27 |
-
logging.getLogger().setLevel(logging.ERROR)
|
28 |
-
|
29 |
|
30 |
@app.get("/_health")
|
31 |
def health():
|
|
|
1 |
"""Module to handle the main FastAPI application and its endpoints."""
|
2 |
import logging
|
3 |
+
|
4 |
+
from fastapi import FastAPI, Request
|
5 |
from fastapi.middleware.cors import CORSMiddleware
|
6 |
+
from jose import jwt
|
7 |
+
from router import auth, content, mail
|
8 |
+
from starlette.middleware.base import BaseHTTPMiddleware
|
9 |
+
|
10 |
+
SECRET_KEY = "your-secret-key"
|
11 |
+
ALGORITHM = "HS256"
|
12 |
+
|
13 |
+
class SessionMiddleware(BaseHTTPMiddleware):
|
14 |
+
"""
|
15 |
+
Middleware to manage session data using JWT (JSON Web Tokens) stored in cookies.
|
16 |
+
|
17 |
+
This middleware intercepts incoming HTTP requests to extract session data from a
|
18 |
+
"session_token" cookie. If the cookie exists and contains a valid JWT, the session
|
19 |
+
data is decoded and attached to the request's state. If the cookie is missing or
|
20 |
+
invalid, an empty session is initialized.
|
21 |
|
22 |
+
After processing the request, the middleware encodes the updated session data into
|
23 |
+
a new JWT and sets it as a "session_token" cookie in the response.
|
24 |
+
|
25 |
+
Attributes:
|
26 |
+
SECRET_KEY (str): The secret key used to encode and decode the JWT.
|
27 |
+
ALGORITHM (str): The algorithm used for encoding and decoding the JWT.
|
28 |
+
|
29 |
+
Methods:
|
30 |
+
dispatch(request: Request, call_next): Intercepts the request to manage session
|
31 |
+
data and modifies the response to include the updated session token.
|
32 |
+
|
33 |
+
Cookie Parameters:
|
34 |
+
session_token (str): A JWT containing session data. This cookie is HTTP-only
|
35 |
+
and has a maximum age of 3600 seconds (1 hour).
|
36 |
+
|
37 |
+
Raises:
|
38 |
+
jwt.JWTError: If the session token cannot be decoded due to invalid signature
|
39 |
+
or other issues.
|
40 |
+
"""
|
41 |
+
async def dispatch(self, request: Request, call_next):
|
42 |
+
session_token = request.cookies.get("session_token")
|
43 |
+
if session_token:
|
44 |
+
try:
|
45 |
+
session_data = jwt.decode(session_token, SECRET_KEY, algorithms=[ALGORITHM])
|
46 |
+
except jwt.JWTError:
|
47 |
+
session_data = {}
|
48 |
+
else:
|
49 |
+
session_data = {}
|
50 |
+
request.state.session = session_data
|
51 |
+
response = await call_next(request)
|
52 |
+
session_token = jwt.encode(request.state.session, SECRET_KEY, algorithm=ALGORITHM)
|
53 |
+
response.set_cookie(
|
54 |
+
key="session_token",
|
55 |
+
value=session_token,
|
56 |
+
httponly=True,
|
57 |
+
max_age=3600
|
58 |
+
)
|
59 |
+
return response
|
60 |
+
|
61 |
+
logging.basicConfig(
|
62 |
+
format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
|
63 |
+
logging.getLogger().setLevel(logging.ERROR)
|
64 |
|
65 |
app = FastAPI(docs_url="/")
|
66 |
|
67 |
app.include_router(content.router)
|
68 |
app.include_router(mail.router)
|
69 |
+
app.include_router(auth.router)
|
70 |
|
71 |
origins = [
|
72 |
"*"
|
|
|
79 |
allow_methods=["*"],
|
80 |
allow_headers=["*"],
|
81 |
)
|
82 |
+
app.add_middleware(SessionMiddleware)
|
|
|
|
|
|
|
|
|
83 |
|
84 |
@app.get("/_health")
|
85 |
def health():
|
app/router/auth.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Module for defining the main routes of the API."""
|
2 |
+
import os
|
3 |
+
import json
|
4 |
+
import pickle
|
5 |
+
from fastapi import APIRouter, Request
|
6 |
+
from fastapi.responses import JSONResponse
|
7 |
+
from google_auth_oauthlib.flow import InstalledAppFlow
|
8 |
+
from googleapiclient.discovery import build
|
9 |
+
|
10 |
+
router = APIRouter(tags=["auth"])
|
11 |
+
|
12 |
+
CLIENT_ID = os.environ.get("CLIENT_ID")
|
13 |
+
CLIENT_SECRET = os.environ.get("CLIENT_SECRET")
|
14 |
+
REDIRECT_URI = os.environ.get("REDIRECT_URI")
|
15 |
+
|
16 |
+
SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
|
17 |
+
|
18 |
+
# Client config for OAuth flow
|
19 |
+
CLIENT_CONFIG = {
|
20 |
+
"web": {
|
21 |
+
"client_id": CLIENT_ID,
|
22 |
+
"client_secret": CLIENT_SECRET,
|
23 |
+
"redirect_uris": [REDIRECT_URI],
|
24 |
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
25 |
+
"token_uri": "https://oauth2.googleapis.com/token",
|
26 |
+
}
|
27 |
+
}
|
28 |
+
|
29 |
+
@router.get("/auth/google/url")
|
30 |
+
async def get_auth_url():
|
31 |
+
"""
|
32 |
+
Handles the generation of a Google OAuth 2.0 authorization URL.
|
33 |
+
|
34 |
+
This endpoint initializes an OAuth 2.0 flow using the provided client configuration
|
35 |
+
and scopes, sets the redirect URI, and generates an authorization URL for the user
|
36 |
+
to grant access.
|
37 |
+
|
38 |
+
Returns:
|
39 |
+
dict: A dictionary containing the generated authorization URL under the key "url".
|
40 |
+
"""
|
41 |
+
flow = InstalledAppFlow.from_client_config(CLIENT_CONFIG, SCOPES)
|
42 |
+
flow.redirect_uri = REDIRECT_URI
|
43 |
+
auth_url, _ = flow.authorization_url(access_type="offline", prompt="consent")
|
44 |
+
return JSONResponse({"url": auth_url})
|
45 |
+
|
46 |
+
@router.get("/auth/google/callback")
|
47 |
+
async def google_callback(code: str, request: Request):
|
48 |
+
flow = InstalledAppFlow.from_client_config(CLIENT_CONFIG, SCOPES)
|
49 |
+
flow.redirect_uri = REDIRECT_URI
|
50 |
+
flow.fetch_token(code=code)
|
51 |
+
credentials = flow.credentials
|
52 |
+
request.state.session["credential"] = json.loads(credentials.to_json())
|
53 |
+
# cred_dict = (request.state.session.get("credential"))
|
54 |
+
# cred = Credentials(
|
55 |
+
# token=cred_dict["token"],
|
56 |
+
# refresh_token=cred_dict["refresh_token"],
|
57 |
+
# token_uri=cred_dict["token_uri"],
|
58 |
+
# client_id=cred_dict["client_id"],
|
59 |
+
# client_secret=cred_dict["client_secret"],
|
60 |
+
# scopes=cred_dict["scopes"],
|
61 |
+
# )
|
62 |
+
# service = build("gmail", "v1", credentials=Credentials(
|
63 |
+
# token=cred_dict["token"],
|
64 |
+
# refresh_token=cred_dict["refresh_token"],
|
65 |
+
# token_uri=cred_dict["token_uri"],
|
66 |
+
# client_id=cred_dict["client_id"],
|
67 |
+
# client_secret=cred_dict["client_secret"],
|
68 |
+
# scopes=cred_dict["scopes"],
|
69 |
+
# ))
|
70 |
+
service = build("gmail", "v1", credentials=credentials)
|
71 |
+
profile = service.users().getProfile(userId="me").execute()
|
72 |
+
print(({"profile": profile}))
|
73 |
+
with open(f"{profile['emailAddress']}.pickle", "wb") as token:
|
74 |
+
pickle.dump(credentials, token)
|
75 |
+
return JSONResponse(profile)
|
app/router/mail.py
CHANGED
@@ -1,12 +1,17 @@
|
|
1 |
"""Module for defining the main routes of the API."""
|
2 |
-
|
|
|
|
|
3 |
from fastapi.responses import JSONResponse
|
|
|
4 |
from controllers import mail
|
|
|
|
|
5 |
|
6 |
router = APIRouter(prefix="/mail", tags=["mail"])
|
7 |
|
8 |
@router.post("")
|
9 |
-
def collect():
|
10 |
"""
|
11 |
Handles the chat POST request.
|
12 |
|
@@ -16,19 +21,34 @@ def collect():
|
|
16 |
Returns:
|
17 |
str: The generated response from the chat function.
|
18 |
"""
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
return JSONResponse(content={"message": "Mail collected successfully."})
|
21 |
|
22 |
-
@router.get("")
|
23 |
-
def get():
|
24 |
-
|
25 |
-
|
26 |
|
27 |
-
|
28 |
-
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
1 |
"""Module for defining the main routes of the API."""
|
2 |
+
import os
|
3 |
+
import pickle
|
4 |
+
from fastapi import APIRouter, Request
|
5 |
from fastapi.responses import JSONResponse
|
6 |
+
|
7 |
from controllers import mail
|
8 |
+
from google.oauth2.credentials import Credentials
|
9 |
+
from googleapiclient.discovery import build
|
10 |
|
11 |
router = APIRouter(prefix="/mail", tags=["mail"])
|
12 |
|
13 |
@router.post("")
|
14 |
+
def collect(emailAddress: str, request: Request):
|
15 |
"""
|
16 |
Handles the chat POST request.
|
17 |
|
|
|
21 |
Returns:
|
22 |
str: The generated response from the chat function.
|
23 |
"""
|
24 |
+
# emailAddress = req['emailAddress']
|
25 |
+
if os.path.exists(f"{emailAddress}.pickle"):
|
26 |
+
with open(f"{emailAddress}.pickle", "rb") as token:
|
27 |
+
credentials = pickle.load(token)
|
28 |
+
else:
|
29 |
+
cred_dict = request.state.session.get("credential")
|
30 |
+
credentials = Credentials(
|
31 |
+
token=cred_dict["token"],
|
32 |
+
refresh_token=cred_dict["refresh_token"],
|
33 |
+
token_uri=cred_dict["token_uri"],
|
34 |
+
client_id=cred_dict["client_id"],
|
35 |
+
client_secret=cred_dict["client_secret"],
|
36 |
+
scopes=cred_dict["scopes"],
|
37 |
+
)
|
38 |
+
mailservice = build("gmail", "v1", credentials=credentials)
|
39 |
+
mail.collect(mailservice)
|
40 |
return JSONResponse(content={"message": "Mail collected successfully."})
|
41 |
|
42 |
+
# @router.get("")
|
43 |
+
# def get():
|
44 |
+
# """
|
45 |
+
# Handles the chat POST request.
|
46 |
|
47 |
+
# Args:
|
48 |
+
# query (ReqData): The request data containing the query parameters.
|
49 |
|
50 |
+
# Returns:
|
51 |
+
# str: The generated response from the chat function.
|
52 |
+
# """
|
53 |
+
# # result = mail.get()
|
54 |
+
# return JSONResponse(content= result)
|
app/token.pickle
DELETED
Binary file (1.01 kB)
|
|