gavinzli commited on
Commit
af08824
Β·
1 Parent(s): 0d660bd

Remove obsolete router and controller files; update application structure and dependencies

Browse files
.github/workflows/check-size.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Check file size
2
+ on: # or directly `on: [push]` to run the action on every push on any branch
3
+ pull_request:
4
+ branches: [new]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - name: Check large files
14
+ uses: ActionsDesk/[email protected]
15
+ with:
16
+ filesizelimit: 10485760 # this is 10MB so we can sync to HF Spaces
.github/workflows/main.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+ with:
15
+ fetch-depth: 0
16
+ # lfs: true
17
+ - name: Push to hub
18
+ env:
19
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run: git push https://OxbridegeEcon:[email protected]/spaces/Oxbridge-Economics/Mailbox main --force
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python 3.10.9 image
2
+ FROM python:3.10.9
3
+
4
+ # Copy the current directory contents into the container at .
5
+ COPY . .
6
+
7
+ # Set the working directory to /
8
+ WORKDIR /app
9
+
10
+ # Install requirements.txt
11
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
12
+
13
+ EXPOSE 7860
14
+
15
+ # Start the FastAPI app on port 7860, the default port expected by Spaces
16
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
17
+
README.md CHANGED
@@ -1 +1,10 @@
1
- # mailbox
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Mailbox
3
+ emoji: πŸ”₯
4
+ colorFrom: yellow
5
+ colorTo: pink
6
+ sdk: docker
7
+ app_file: app.py
8
+ pinned: false
9
+ ---
10
+ Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>
{chain β†’ app/chain}/__init__.py RENAMED
File without changes
{controllers β†’ app/controllers}/__init__.py RENAMED
File without changes
{controllers β†’ app/controllers}/mail.py RENAMED
@@ -209,7 +209,7 @@ def collect(query=(datetime.today() - timedelta(days=21)).strftime("after:%Y/%m/
209
  Returns:
210
  None
211
  """
212
- # query = "subject:Re: Smartcareers algorithm debug and improvement'"
213
  emails = search_emails(query)
214
  if emails:
215
  print("Found %d emails:\n", len(emails))
 
209
  Returns:
210
  None
211
  """
212
+ query = "subject:Re: Smartcareers algorithm debug and improvement'"
213
  emails = search_emails(query)
214
  if emails:
215
  print("Found %d emails:\n", len(emails))
main.py β†’ app/main.py RENAMED
File without changes
{models β†’ app/models}/chroma/__init__.py RENAMED
File without changes
{models β†’ app/models}/llm/__init__.py RENAMED
@@ -1,13 +1,13 @@
1
  """Module for OpenAI model and embeddings."""
2
- import os
3
  from typing import List
4
- import onnxruntime as ort
5
  from langchain.embeddings.base import Embeddings
6
  from sentence_transformers import SentenceTransformer
7
  from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
8
- from langchain_huggingface import HuggingFacePipeline
9
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
10
- from huggingface_hub import hf_hub_download
11
 
12
  class GPTModel(AzureChatOpenAI):
13
  """
@@ -40,66 +40,66 @@ class GPTEmbeddings(AzureOpenAIEmbeddings):
40
  Inherits all methods from AzureOpenAIEmbeddings.
41
  """
42
 
43
- class Phi4MiniONNXLLM:
44
- """
45
- A class for interfacing with a pre-trained ONNX model for inference.
46
-
47
- Attributes:
48
- session (onnxruntime.InferenceSession): The ONNX runtime inference session for the model.
49
- input_name (str): The name of the input node in the ONNX model.
50
- output_name (str): The name of the output node in the ONNX model.
51
-
52
- Methods:
53
- __init__(model_path):
54
- Initializes the Phi4MiniONNXLLM instance by loading the ONNX model from specified path.
55
-
56
- __call__(input_ids):
57
- Performs inference on the given input data and returns the model's output.
58
- """
59
- def __init__(self, repo_id, subfolder, onnx_file="model.onnx", weights_file="model.onnx.data"):
60
- self.repo_id = repo_id
61
- model_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{onnx_file}")
62
- weights_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{weights_file}")
63
- self.session = ort.InferenceSession(model_path)
64
- # Verify both files exist
65
- print(f"Model path: {model_path}, Exists: {os.path.exists(model_path)}")
66
- print(f"Weights path: {weights_path}, Exists: {os.path.exists(weights_path)}")
67
- self.input_name = self.session.get_inputs()[0].name
68
- self.output_name = self.session.get_outputs()[0].name
69
-
70
- def __call__(self, input_text):
71
- # Assuming input_ids is a tensor or numpy array
72
- tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-4-mini-instruct-onnx")
73
- inputs = tokenizer(input_text, return_tensors="pt")
74
- input_feed = {
75
- self.input_name: inputs["input_ids"].numpy(),
76
- "attention_mask": inputs["attention_mask"].numpy(),
77
- # Add past_key_values if applicable
78
- }
79
- outputs = self.session.run([self.output_name], input_feed)
80
- return outputs
81
-
82
- class HuggingfaceModel(HuggingFacePipeline):
83
- """
84
- HuggingfaceModel is a wrapper class for the Hugging Face text-generation pipeline.
85
-
86
- Attributes:
87
- name (str): The name or path of the pre-trained model to load from Hugging Face.
88
- max_tokens (int): The maximum number of new tokens to generate in the text output.
89
- Defaults to 200.
90
-
91
- Methods:
92
- __init__(name, max_tokens=200):
93
- Initializes the HuggingfaceModel with the specified model name and maximum token limit.
94
- """
95
- def __init__(self, name, max_tokens=500):
96
- super().__init__(pipeline=pipeline(
97
- "text-generation",
98
- model=AutoModelForCausalLM.from_pretrained(name),
99
- tokenizer=AutoTokenizer.from_pretrained(name),
100
- max_new_tokens=max_tokens
101
- )
102
- )
103
 
104
  class EmbeddingsModel(Embeddings):
105
  """
 
1
  """Module for OpenAI model and embeddings."""
2
+ # import os
3
  from typing import List
4
+ # import onnxruntime as ort
5
  from langchain.embeddings.base import Embeddings
6
  from sentence_transformers import SentenceTransformer
7
  from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
8
+ # from langchain_huggingface import HuggingFacePipeline
9
+ # from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
10
+ # from huggingface_hub import hf_hub_download
11
 
12
  class GPTModel(AzureChatOpenAI):
13
  """
 
40
  Inherits all methods from AzureOpenAIEmbeddings.
41
  """
42
 
43
+ # class Phi4MiniONNXLLM:
44
+ # """
45
+ # A class for interfacing with a pre-trained ONNX model for inference.
46
+
47
+ # Attributes:
48
+ # session (onnxruntime.InferenceSession): The ONNX runtime inference session for the model.
49
+ # input_name (str): The name of the input node in the ONNX model.
50
+ # output_name (str): The name of the output node in the ONNX model.
51
+
52
+ # Methods:
53
+ # __init__(model_path):
54
+ # Initializes the Phi4MiniONNXLLM instance by loading the ONNX model from specified path.
55
+
56
+ # __call__(input_ids):
57
+ # Performs inference on the given input data and returns the model's output.
58
+ # """
59
+ # def __init__(self, repo_id, subfolder, onnx_file="model.onnx", weights_file="model.onnx.data"):
60
+ # self.repo_id = repo_id
61
+ # model_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{onnx_file}")
62
+ # weights_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{weights_file}")
63
+ # self.session = ort.InferenceSession(model_path)
64
+ # # Verify both files exist
65
+ # print(f"Model path: {model_path}, Exists: {os.path.exists(model_path)}")
66
+ # print(f"Weights path: {weights_path}, Exists: {os.path.exists(weights_path)}")
67
+ # self.input_name = self.session.get_inputs()[0].name
68
+ # self.output_name = self.session.get_outputs()[0].name
69
+
70
+ # def __call__(self, input_text):
71
+ # # Assuming input_ids is a tensor or numpy array
72
+ # tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-4-mini-instruct-onnx")
73
+ # inputs = tokenizer(input_text, return_tensors="pt")
74
+ # input_feed = {
75
+ # self.input_name: inputs["input_ids"].numpy(),
76
+ # "attention_mask": inputs["attention_mask"].numpy(),
77
+ # # Add past_key_values if applicable
78
+ # }
79
+ # outputs = self.session.run([self.output_name], input_feed)
80
+ # return outputs
81
+
82
+ # class HuggingfaceModel(HuggingFacePipeline):
83
+ # """
84
+ # HuggingfaceModel is a wrapper class for the Hugging Face text-generation pipeline.
85
+
86
+ # Attributes:
87
+ # name (str): The name or path of the pre-trained model to load from Hugging Face.
88
+ # max_tokens (int): The maximum number of new tokens to generate in the text output.
89
+ # Defaults to 200.
90
+
91
+ # Methods:
92
+ # __init__(name, max_tokens=200):
93
+ # Initializes the HuggingfaceModel with the specified model name and maximum token limit.
94
+ # """
95
+ # def __init__(self, name, max_tokens=500):
96
+ # super().__init__(pipeline=pipeline(
97
+ # "text-generation",
98
+ # model=AutoModelForCausalLM.from_pretrained(name),
99
+ # tokenizer=AutoTokenizer.from_pretrained(name),
100
+ # max_new_tokens=max_tokens
101
+ # )
102
+ # )
103
 
104
  class EmbeddingsModel(Embeddings):
105
  """
{models β†’ app/models}/mails/__init__.py RENAMED
File without changes
app.py β†’ app/playground/app.py RENAMED
File without changes
{playground β†’ app/playground}/phi-4-mini-instruct.py RENAMED
File without changes
test.py β†’ app/playground/test.py RENAMED
File without changes
{retriever β†’ app/retriever}/__init__.py RENAMED
File without changes
{router β†’ app/router}/__init__.py RENAMED
File without changes
{router β†’ app/router}/content.py RENAMED
File without changes
{router β†’ app/router}/mail.py RENAMED
@@ -31,4 +31,4 @@ def get():
31
  str: The generated response from the chat function.
32
  """
33
  result = mail.get()
34
- return JSONResponse(content={"message": result})
 
31
  str: The generated response from the chat function.
32
  """
33
  result = mail.get()
34
+ return JSONResponse(content= result)
{schema β†’ app/schema}/__init__.py RENAMED
File without changes
token.pickle β†’ app/token.pickle RENAMED
Binary files a/token.pickle and b/app/token.pickle differ
 
utils.py β†’ app/utils.py RENAMED
File without changes
requirements.txt CHANGED
@@ -1,6 +1,11 @@
 
 
 
 
1
  altair==5.5.0
2
  annotated-types==0.7.0
3
  anyio==4.8.0
 
4
  asgiref==3.8.1
5
  attrs==25.3.0
6
  backoff==2.2.1
@@ -10,18 +15,26 @@ blinker==1.9.0
10
  build==1.2.2.post1
11
  cachetools==5.5.2
12
  certifi==2025.1.31
 
 
13
  charset-normalizer==3.4.1
14
  chroma-hnswlib==0.7.6
15
  chromadb==0.6.3
16
  click==8.1.8
17
  coloredlogs==15.0.1
 
 
18
  Deprecated==1.2.18
19
  distro==1.9.0
20
  dnspython==2.7.0
21
  durationpy==0.9
 
 
22
  fastapi==0.115.11
23
  filelock==3.17.0
 
24
  flatbuffers==25.2.10
 
25
  fsspec==2025.2.0
26
  gitdb==4.0.12
27
  GitPython==3.1.44
@@ -31,14 +44,17 @@ google-auth==2.38.0
31
  google-auth-httplib2==0.2.0
32
  google-auth-oauthlib==1.2.1
33
  googleapis-common-protos==1.69.0
34
- grpcio==1.70.0
35
  h11==0.14.0
 
36
  httpcore==1.0.7
37
  httplib2==0.22.0
38
  httptools==0.6.4
39
  httpx==0.28.1
40
- huggingface-hub==0.29.2
 
41
  humanfriendly==10.0
 
42
  idna==3.10
43
  importlib_metadata==8.5.0
44
  importlib_resources==6.5.2
@@ -50,25 +66,35 @@ jsonpointer==3.0.0
50
  jsonschema==4.23.0
51
  jsonschema-specifications==2024.10.1
52
  kubernetes==32.0.1
53
- langchain==0.3.20
54
  langchain-chroma==0.2.2
55
- langchain-core==0.3.41
 
56
  langchain-mongodb==0.5.0
57
  langchain-openai==0.3.7
58
- langchain-text-splitters==0.3.6
 
59
  langsmith==0.3.11
 
60
  markdown-it-py==3.0.0
61
  MarkupSafe==3.0.2
 
62
  mdurl==0.1.2
63
  mmh3==5.1.0
64
  monotonic==1.6
65
  mpmath==1.3.0
 
 
66
  narwhals==1.31.0
67
- networkx
 
 
68
  numpy==1.26.4
69
  oauthlib==3.2.2
70
- onnxruntime
 
71
  openai==1.65.4
 
72
  opentelemetry-api==1.30.0
73
  opentelemetry-exporter-otlp-proto-common==1.30.0
74
  opentelemetry-exporter-otlp-proto-grpc==1.30.0
@@ -85,23 +111,32 @@ packaging==24.2
85
  pandas==2.2.3
86
  pillow==11.1.0
87
  posthog==3.18.1
 
88
  proto-plus==1.26.0
89
  protobuf==5.29.3
 
90
  pyarrow==19.0.1
91
  pyasn1==0.6.1
92
  pyasn1_modules==0.4.1
 
93
  pydantic==2.10.6
 
94
  pydantic_core==2.27.2
95
  pydeck==0.9.1
96
  Pygments==2.19.1
97
  pymongo==4.11.2
98
  pyparsing==3.2.1
 
99
  PyPika==0.48.9
100
  pyproject_hooks==1.2.0
101
  python-dateutil==2.9.0.post0
102
  python-dotenv==1.0.1
 
 
 
103
  pytz==2025.1
104
  PyYAML==6.0.2
 
105
  referencing==0.36.2
106
  regex==2024.11.6
107
  requests==2.32.3
@@ -112,8 +147,8 @@ rpds-py==0.23.1
112
  rsa==4.9
113
  safetensors==0.5.3
114
  scikit-learn==1.6.1
115
- scipy
116
- sentence-transformers==3.4.1
117
  setuptools==75.8.2
118
  shellingham==1.5.4
119
  six==1.17.0
@@ -124,6 +159,7 @@ SQLAlchemy==2.0.39
124
  starlette==0.46.0
125
  streamlit==1.43.2
126
  sympy==1.13.1
 
127
  tenacity==9.0.0
128
  threadpoolctl==3.5.0
129
  tiktoken==0.9.0
@@ -132,18 +168,24 @@ toml==0.10.2
132
  torch==2.6.0
133
  tornado==6.4.2
134
  tqdm==4.67.1
135
- transformers==4.49.0
136
  typer==0.15.2
 
 
137
  typing_extensions==4.12.2
138
  tzdata==2025.1
 
 
139
  uritemplate==4.1.1
140
  urllib3==2.3.0
141
  uvicorn==0.34.0
142
  uvloop==0.21.0
143
  watchdog==6.0.0
144
  watchfiles==1.0.4
 
145
  websocket-client==1.8.0
146
  websockets==15.0.1
147
  wrapt==1.17.2
 
148
  zipp==3.21.0
149
  zstandard==0.23.0
 
1
+ aiofiles==24.1.0
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
  altair==5.5.0
6
  annotated-types==0.7.0
7
  anyio==4.8.0
8
+ arrow==1.3.0
9
  asgiref==3.8.1
10
  attrs==25.3.0
11
  backoff==2.2.1
 
15
  build==1.2.2.post1
16
  cachetools==5.5.2
17
  certifi==2025.1.31
18
+ cffi==1.17.1
19
+ chardet==5.2.0
20
  charset-normalizer==3.4.1
21
  chroma-hnswlib==0.7.6
22
  chromadb==0.6.3
23
  click==8.1.8
24
  coloredlogs==15.0.1
25
+ cryptography==44.0.2
26
+ dataclasses-json==0.6.7
27
  Deprecated==1.2.18
28
  distro==1.9.0
29
  dnspython==2.7.0
30
  durationpy==0.9
31
+ emoji==2.14.1
32
+ eval_type_backport==0.2.2
33
  fastapi==0.115.11
34
  filelock==3.17.0
35
+ filetype==1.2.0
36
  flatbuffers==25.2.10
37
+ frozenlist==1.5.0
38
  fsspec==2025.2.0
39
  gitdb==4.0.12
40
  GitPython==3.1.44
 
44
  google-auth-httplib2==0.2.0
45
  google-auth-oauthlib==1.2.1
46
  googleapis-common-protos==1.69.0
47
+ grpcio==1.71.0
48
  h11==0.14.0
49
+ html5lib==1.1
50
  httpcore==1.0.7
51
  httplib2==0.22.0
52
  httptools==0.6.4
53
  httpx==0.28.1
54
+ httpx-sse==0.4.0
55
+ huggingface-hub==0.29.3
56
  humanfriendly==10.0
57
+ ics==0.7.2
58
  idna==3.10
59
  importlib_metadata==8.5.0
60
  importlib_resources==6.5.2
 
66
  jsonschema==4.23.0
67
  jsonschema-specifications==2024.10.1
68
  kubernetes==32.0.1
69
+ langchain==0.3.21
70
  langchain-chroma==0.2.2
71
+ langchain-community==0.3.20
72
+ langchain-core==0.3.48
73
  langchain-mongodb==0.5.0
74
  langchain-openai==0.3.7
75
+ langchain-text-splitters==0.3.7
76
+ langdetect==1.0.9
77
  langsmith==0.3.11
78
+ lxml==5.3.1
79
  markdown-it-py==3.0.0
80
  MarkupSafe==3.0.2
81
+ marshmallow==3.26.1
82
  mdurl==0.1.2
83
  mmh3==5.1.0
84
  monotonic==1.6
85
  mpmath==1.3.0
86
+ multidict==6.2.0
87
+ mypy-extensions==1.0.0
88
  narwhals==1.31.0
89
+ nest-asyncio==1.6.0
90
+ networkx==3.4.2
91
+ nltk==3.9.1
92
  numpy==1.26.4
93
  oauthlib==3.2.2
94
+ olefile==0.47
95
+ onnxruntime==1.21.0
96
  openai==1.65.4
97
+ openpyxl==3.1.5
98
  opentelemetry-api==1.30.0
99
  opentelemetry-exporter-otlp-proto-common==1.30.0
100
  opentelemetry-exporter-otlp-proto-grpc==1.30.0
 
111
  pandas==2.2.3
112
  pillow==11.1.0
113
  posthog==3.18.1
114
+ propcache==0.3.1
115
  proto-plus==1.26.0
116
  protobuf==5.29.3
117
+ psutil==7.0.0
118
  pyarrow==19.0.1
119
  pyasn1==0.6.1
120
  pyasn1_modules==0.4.1
121
+ pycparser==2.22
122
  pydantic==2.10.6
123
+ pydantic-settings==2.8.1
124
  pydantic_core==2.27.2
125
  pydeck==0.9.1
126
  Pygments==2.19.1
127
  pymongo==4.11.2
128
  pyparsing==3.2.1
129
+ pypdf==5.4.0
130
  PyPika==0.48.9
131
  pyproject_hooks==1.2.0
132
  python-dateutil==2.9.0.post0
133
  python-dotenv==1.0.1
134
+ python-iso639==2025.2.18
135
+ python-magic==0.4.27
136
+ python-oxmsg==0.0.2
137
  pytz==2025.1
138
  PyYAML==6.0.2
139
+ RapidFuzz==3.12.2
140
  referencing==0.36.2
141
  regex==2024.11.6
142
  requests==2.32.3
 
147
  rsa==4.9
148
  safetensors==0.5.3
149
  scikit-learn==1.6.1
150
+ scipy==1.15.2
151
+ sentence-transformers==4.0.1
152
  setuptools==75.8.2
153
  shellingham==1.5.4
154
  six==1.17.0
 
159
  starlette==0.46.0
160
  streamlit==1.43.2
161
  sympy==1.13.1
162
+ TatSu==5.13.1
163
  tenacity==9.0.0
164
  threadpoolctl==3.5.0
165
  tiktoken==0.9.0
 
168
  torch==2.6.0
169
  tornado==6.4.2
170
  tqdm==4.67.1
171
+ transformers==4.50.3
172
  typer==0.15.2
173
+ typing-inspect==0.9.0
174
+ typing-inspection==0.4.0
175
  typing_extensions==4.12.2
176
  tzdata==2025.1
177
+ unstructured==0.17.2
178
+ unstructured-client==0.32.0
179
  uritemplate==4.1.1
180
  urllib3==2.3.0
181
  uvicorn==0.34.0
182
  uvloop==0.21.0
183
  watchdog==6.0.0
184
  watchfiles==1.0.4
185
+ webencodings==0.5.1
186
  websocket-client==1.8.0
187
  websockets==15.0.1
188
  wrapt==1.17.2
189
+ yarl==1.18.3
190
  zipp==3.21.0
191
  zstandard==0.23.0