Spaces:

Oxbridge-Economics
/

Mailbox

Running

App Files Files Community

gavinzli commited on Mar 30

Commit

af08824

1 Parent(s): 0d660bd

Remove obsolete router and controller files; update application structure and dependencies

Browse files

Files changed (22) hide show

.github/workflows/check-size.yml +16 -0
.github/workflows/main.yml +20 -0
Dockerfile +17 -0
README.md +10 -1
{chain → app/chain}/__init__.py +0 -0
{controllers → app/controllers}/__init__.py +0 -0
{controllers → app/controllers}/mail.py +1 -1
main.py → app/main.py +0 -0
{models → app/models}/chroma/__init__.py +0 -0
{models → app/models}/llm/__init__.py +65 -65
{models → app/models}/mails/__init__.py +0 -0
app.py → app/playground/app.py +0 -0
{playground → app/playground}/phi-4-mini-instruct.py +0 -0
test.py → app/playground/test.py +0 -0
{retriever → app/retriever}/__init__.py +0 -0
{router → app/router}/__init__.py +0 -0
{router → app/router}/content.py +0 -0
{router → app/router}/mail.py +1 -1
{schema → app/schema}/__init__.py +0 -0
token.pickle → app/token.pickle +0 -0
utils.py → app/utils.py +0 -0
requirements.txt +52 -10

.github/workflows/check-size.yml ADDED Viewed

	@@ -0,0 +1,16 @@

+name: Check file size
+on:               # or directly `on: [push]` to run the action on every push on any branch
+  pull_request:
+    branches: [new]
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check large files
+        uses: ActionsDesk/[email protected]
+        with:
+          filesizelimit: 10485760 # this is 10MB so we can sync to HF Spaces

.github/workflows/main.yml ADDED Viewed

	@@ -0,0 +1,20 @@

+name: Sync to Hugging Face hub
+on:
+  push:
+    branches: [main]
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          # lfs: true
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push https://OxbridegeEcon:[email protected]/spaces/Oxbridge-Economics/Mailbox main --force

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+# Use the official Python 3.10.9 image
+FROM python:3.10.9
+# Copy the current directory contents into the container at .
+COPY . .
+# Set the working directory to /
+WORKDIR /app
+# Install requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+EXPOSE 7860
+# Start the FastAPI app on port 7860, the default port expected by Spaces
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

	@@ -1 +1,10 @@
1	- ~~# mailbox~~

+---
+title: Mailbox
+emoji: 🔥
+colorFrom: yellow
+colorTo: pink
+sdk: docker
+app_file: app.py
+pinned: false
+---
+Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>

{chain → app/chain}/__init__.py RENAMED Viewed

File without changes

{controllers → app/controllers}/__init__.py RENAMED Viewed

File without changes

{controllers → app/controllers}/mail.py RENAMED Viewed

@@ -209,7 +209,7 @@ def collect(query=(datetime.today() - timedelta(days=21)).strftime("after:%Y/%m/
     Returns:
         None
     """
-    # query = "subject:Re: Smartcareers algorithm debug and improvement'"
     emails = search_emails(query)
     if emails:
         print("Found %d emails:\n", len(emails))

     Returns:
         None
     """
+    query = "subject:Re: Smartcareers algorithm debug and improvement'"
     emails = search_emails(query)
     if emails:
         print("Found %d emails:\n", len(emails))

main.py → app/main.py RENAMED Viewed

File without changes

{models → app/models}/chroma/__init__.py RENAMED Viewed

File without changes

{models → app/models}/llm/__init__.py RENAMED Viewed

@@ -1,13 +1,13 @@
 """Module for OpenAI model and embeddings."""
-import os
 from typing import List
-import onnxruntime as ort
 from langchain.embeddings.base import Embeddings
 from sentence_transformers import SentenceTransformer
 from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
-from langchain_huggingface import HuggingFacePipeline
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-from huggingface_hub import hf_hub_download
 class GPTModel(AzureChatOpenAI):
     """
@@ -40,66 +40,66 @@ class GPTEmbeddings(AzureOpenAIEmbeddings):
         Inherits all methods from AzureOpenAIEmbeddings.
     """
-class Phi4MiniONNXLLM:
-    """
-    A class for interfacing with a pre-trained ONNX model for inference.
-    Attributes:
-        session (onnxruntime.InferenceSession): The ONNX runtime inference session for the model.
-        input_name (str): The name of the input node in the ONNX model.
-        output_name (str): The name of the output node in the ONNX model.
-    Methods:
-        __init__(model_path):
-            Initializes the Phi4MiniONNXLLM instance by loading the ONNX model from specified path.
-        __call__(input_ids):
-            Performs inference on the given input data and returns the model's output.
-    """
-    def __init__(self, repo_id, subfolder, onnx_file="model.onnx", weights_file="model.onnx.data"):
-        self.repo_id = repo_id
-        model_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{onnx_file}")
-        weights_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{weights_file}")
-        self.session = ort.InferenceSession(model_path)
-        # Verify both files exist
-        print(f"Model path: {model_path}, Exists: {os.path.exists(model_path)}")
-        print(f"Weights path: {weights_path}, Exists: {os.path.exists(weights_path)}")
-        self.input_name = self.session.get_inputs()[0].name
-        self.output_name = self.session.get_outputs()[0].name
-    def __call__(self, input_text):
-        # Assuming input_ids is a tensor or numpy array
-        tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-4-mini-instruct-onnx")
-        inputs = tokenizer(input_text, return_tensors="pt")
-        input_feed = {
-            self.input_name: inputs["input_ids"].numpy(),
-            "attention_mask": inputs["attention_mask"].numpy(),
-            # Add past_key_values if applicable
-        }
-        outputs = self.session.run([self.output_name], input_feed)
-        return outputs
-class HuggingfaceModel(HuggingFacePipeline):
-    """
-    HuggingfaceModel is a wrapper class for the Hugging Face text-generation pipeline.
-    Attributes:
-        name (str): The name or path of the pre-trained model to load from Hugging Face.
-        max_tokens (int): The maximum number of new tokens to generate in the text output.
-        Defaults to 200.
-    Methods:
-        __init__(name, max_tokens=200):
-            Initializes the HuggingfaceModel with the specified model name and maximum token limit.
-    """
-    def __init__(self, name, max_tokens=500):
-        super().__init__(pipeline=pipeline(
-            "text-generation",
-            model=AutoModelForCausalLM.from_pretrained(name),
-            tokenizer=AutoTokenizer.from_pretrained(name),
-            max_new_tokens=max_tokens
-            )
-        )
 class EmbeddingsModel(Embeddings):
     """

 """Module for OpenAI model and embeddings."""
+# import os
 from typing import List
+# import onnxruntime as ort
 from langchain.embeddings.base import Embeddings
 from sentence_transformers import SentenceTransformer
 from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
+# from langchain_huggingface import HuggingFacePipeline
+# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+# from huggingface_hub import hf_hub_download
 class GPTModel(AzureChatOpenAI):
     """
         Inherits all methods from AzureOpenAIEmbeddings.
     """
+# class Phi4MiniONNXLLM:
+#     """
+#     A class for interfacing with a pre-trained ONNX model for inference.
+#     Attributes:
+#         session (onnxruntime.InferenceSession): The ONNX runtime inference session for the model.
+#         input_name (str): The name of the input node in the ONNX model.
+#         output_name (str): The name of the output node in the ONNX model.
+#     Methods:
+#         __init__(model_path):
+#             Initializes the Phi4MiniONNXLLM instance by loading the ONNX model from specified path.
+#         __call__(input_ids):
+#             Performs inference on the given input data and returns the model's output.
+#     """
+#     def __init__(self, repo_id, subfolder, onnx_file="model.onnx", weights_file="model.onnx.data"):
+#         self.repo_id = repo_id
+#         model_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{onnx_file}")
+#         weights_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{weights_file}")
+#         self.session = ort.InferenceSession(model_path)
+#         # Verify both files exist
+#         print(f"Model path: {model_path}, Exists: {os.path.exists(model_path)}")
+#         print(f"Weights path: {weights_path}, Exists: {os.path.exists(weights_path)}")
+#         self.input_name = self.session.get_inputs()[0].name
+#         self.output_name = self.session.get_outputs()[0].name
+#     def __call__(self, input_text):
+#         # Assuming input_ids is a tensor or numpy array
+#         tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-4-mini-instruct-onnx")
+#         inputs = tokenizer(input_text, return_tensors="pt")
+#         input_feed = {
+#             self.input_name: inputs["input_ids"].numpy(),
+#             "attention_mask": inputs["attention_mask"].numpy(),
+#             # Add past_key_values if applicable
+#         }
+#         outputs = self.session.run([self.output_name], input_feed)
+#         return outputs
+# class HuggingfaceModel(HuggingFacePipeline):
+#     """
+#     HuggingfaceModel is a wrapper class for the Hugging Face text-generation pipeline.
+#     Attributes:
+#         name (str): The name or path of the pre-trained model to load from Hugging Face.
+#         max_tokens (int): The maximum number of new tokens to generate in the text output.
+#         Defaults to 200.
+#     Methods:
+#         __init__(name, max_tokens=200):
+#             Initializes the HuggingfaceModel with the specified model name and maximum token limit.
+#     """
+#     def __init__(self, name, max_tokens=500):
+#         super().__init__(pipeline=pipeline(
+#             "text-generation",
+#             model=AutoModelForCausalLM.from_pretrained(name),
+#             tokenizer=AutoTokenizer.from_pretrained(name),
+#             max_new_tokens=max_tokens
+#             )
+#         )
 class EmbeddingsModel(Embeddings):
     """

{models → app/models}/mails/__init__.py RENAMED Viewed

File without changes

app.py → app/playground/app.py RENAMED Viewed

File without changes

{playground → app/playground}/phi-4-mini-instruct.py RENAMED Viewed

File without changes

test.py → app/playground/test.py RENAMED Viewed

File without changes

{retriever → app/retriever}/__init__.py RENAMED Viewed

File without changes

{router → app/router}/__init__.py RENAMED Viewed

File without changes

{router → app/router}/content.py RENAMED Viewed

File without changes

{router → app/router}/mail.py RENAMED Viewed

@@ -31,4 +31,4 @@ def get():
         str: The generated response from the chat function.
     """
     result = mail.get()
-    return JSONResponse(content={"message": result})

         str: The generated response from the chat function.
     """
     result = mail.get()
+    return JSONResponse(content= result)

{schema → app/schema}/__init__.py RENAMED Viewed

File without changes

token.pickle → app/token.pickle RENAMED Viewed

Binary files a/token.pickle and b/app/token.pickle differ

utils.py → app/utils.py RENAMED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -1,6 +1,11 @@
 altair==5.5.0
 annotated-types==0.7.0
 anyio==4.8.0
 asgiref==3.8.1
 attrs==25.3.0
 backoff==2.2.1
@@ -10,18 +15,26 @@ blinker==1.9.0
 build==1.2.2.post1
 cachetools==5.5.2
 certifi==2025.1.31
 charset-normalizer==3.4.1
 chroma-hnswlib==0.7.6
 chromadb==0.6.3
 click==8.1.8
 coloredlogs==15.0.1
 Deprecated==1.2.18
 distro==1.9.0
 dnspython==2.7.0
 durationpy==0.9
 fastapi==0.115.11
 filelock==3.17.0
 flatbuffers==25.2.10
 fsspec==2025.2.0
 gitdb==4.0.12
 GitPython==3.1.44
@@ -31,14 +44,17 @@ google-auth==2.38.0
 google-auth-httplib2==0.2.0
 google-auth-oauthlib==1.2.1
 googleapis-common-protos==1.69.0
-grpcio==1.70.0
 h11==0.14.0
 httpcore==1.0.7
 httplib2==0.22.0
 httptools==0.6.4
 httpx==0.28.1
-huggingface-hub==0.29.2
 humanfriendly==10.0
 idna==3.10
 importlib_metadata==8.5.0
 importlib_resources==6.5.2
@@ -50,25 +66,35 @@ jsonpointer==3.0.0
 jsonschema==4.23.0
 jsonschema-specifications==2024.10.1
 kubernetes==32.0.1
-langchain==0.3.20
 langchain-chroma==0.2.2
-langchain-core==0.3.41
 langchain-mongodb==0.5.0
 langchain-openai==0.3.7
-langchain-text-splitters==0.3.6
 langsmith==0.3.11
 markdown-it-py==3.0.0
 MarkupSafe==3.0.2
 mdurl==0.1.2
 mmh3==5.1.0
 monotonic==1.6
 mpmath==1.3.0
 narwhals==1.31.0
-networkx
 numpy==1.26.4
 oauthlib==3.2.2
-onnxruntime
 openai==1.65.4
 opentelemetry-api==1.30.0
 opentelemetry-exporter-otlp-proto-common==1.30.0
 opentelemetry-exporter-otlp-proto-grpc==1.30.0
@@ -85,23 +111,32 @@ packaging==24.2
 pandas==2.2.3
 pillow==11.1.0
 posthog==3.18.1
 proto-plus==1.26.0
 protobuf==5.29.3
 pyarrow==19.0.1
 pyasn1==0.6.1
 pyasn1_modules==0.4.1
 pydantic==2.10.6
 pydantic_core==2.27.2
 pydeck==0.9.1
 Pygments==2.19.1
 pymongo==4.11.2
 pyparsing==3.2.1
 PyPika==0.48.9
 pyproject_hooks==1.2.0
 python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
 pytz==2025.1
 PyYAML==6.0.2
 referencing==0.36.2
 regex==2024.11.6
 requests==2.32.3
@@ -112,8 +147,8 @@ rpds-py==0.23.1
 rsa==4.9
 safetensors==0.5.3
 scikit-learn==1.6.1
-scipy
-sentence-transformers==3.4.1
 setuptools==75.8.2
 shellingham==1.5.4
 six==1.17.0
@@ -124,6 +159,7 @@ SQLAlchemy==2.0.39
 starlette==0.46.0
 streamlit==1.43.2
 sympy==1.13.1
 tenacity==9.0.0
 threadpoolctl==3.5.0
 tiktoken==0.9.0
@@ -132,18 +168,24 @@ toml==0.10.2
 torch==2.6.0
 tornado==6.4.2
 tqdm==4.67.1
-transformers==4.49.0
 typer==0.15.2
 typing_extensions==4.12.2
 tzdata==2025.1
 uritemplate==4.1.1
 urllib3==2.3.0
 uvicorn==0.34.0
 uvloop==0.21.0
 watchdog==6.0.0
 watchfiles==1.0.4
 websocket-client==1.8.0
 websockets==15.0.1
 wrapt==1.17.2
 zipp==3.21.0
 zstandard==0.23.0

+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.11.14
+aiosignal==1.3.2
 altair==5.5.0
 annotated-types==0.7.0
 anyio==4.8.0
+arrow==1.3.0
 asgiref==3.8.1
 attrs==25.3.0
 backoff==2.2.1
 build==1.2.2.post1
 cachetools==5.5.2
 certifi==2025.1.31
+cffi==1.17.1
+chardet==5.2.0
 charset-normalizer==3.4.1
 chroma-hnswlib==0.7.6
 chromadb==0.6.3
 click==8.1.8
 coloredlogs==15.0.1
+cryptography==44.0.2
+dataclasses-json==0.6.7
 Deprecated==1.2.18
 distro==1.9.0
 dnspython==2.7.0
 durationpy==0.9
+emoji==2.14.1
+eval_type_backport==0.2.2
 fastapi==0.115.11
 filelock==3.17.0
+filetype==1.2.0
 flatbuffers==25.2.10
+frozenlist==1.5.0
 fsspec==2025.2.0
 gitdb==4.0.12
 GitPython==3.1.44
 google-auth-httplib2==0.2.0
 google-auth-oauthlib==1.2.1
 googleapis-common-protos==1.69.0
+grpcio==1.71.0
 h11==0.14.0
+html5lib==1.1
 httpcore==1.0.7
 httplib2==0.22.0
 httptools==0.6.4
 httpx==0.28.1
+httpx-sse==0.4.0
+huggingface-hub==0.29.3
 humanfriendly==10.0
+ics==0.7.2
 idna==3.10
 importlib_metadata==8.5.0
 importlib_resources==6.5.2
 jsonschema==4.23.0
 jsonschema-specifications==2024.10.1
 kubernetes==32.0.1
+langchain==0.3.21
 langchain-chroma==0.2.2
+langchain-community==0.3.20
+langchain-core==0.3.48
 langchain-mongodb==0.5.0
 langchain-openai==0.3.7
+langchain-text-splitters==0.3.7
+langdetect==1.0.9
 langsmith==0.3.11
+lxml==5.3.1
 markdown-it-py==3.0.0
 MarkupSafe==3.0.2
+marshmallow==3.26.1
 mdurl==0.1.2
 mmh3==5.1.0
 monotonic==1.6
 mpmath==1.3.0
+multidict==6.2.0
+mypy-extensions==1.0.0
 narwhals==1.31.0
+nest-asyncio==1.6.0
+networkx==3.4.2
+nltk==3.9.1
 numpy==1.26.4
 oauthlib==3.2.2
+olefile==0.47
+onnxruntime==1.21.0
 openai==1.65.4
+openpyxl==3.1.5
 opentelemetry-api==1.30.0
 opentelemetry-exporter-otlp-proto-common==1.30.0
 opentelemetry-exporter-otlp-proto-grpc==1.30.0
 pandas==2.2.3
 pillow==11.1.0
 posthog==3.18.1
+propcache==0.3.1
 proto-plus==1.26.0
 protobuf==5.29.3
+psutil==7.0.0
 pyarrow==19.0.1
 pyasn1==0.6.1
 pyasn1_modules==0.4.1
+pycparser==2.22
 pydantic==2.10.6
+pydantic-settings==2.8.1
 pydantic_core==2.27.2
 pydeck==0.9.1
 Pygments==2.19.1
 pymongo==4.11.2
 pyparsing==3.2.1
+pypdf==5.4.0
 PyPika==0.48.9
 pyproject_hooks==1.2.0
 python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
+python-iso639==2025.2.18
+python-magic==0.4.27
+python-oxmsg==0.0.2
 pytz==2025.1
 PyYAML==6.0.2
+RapidFuzz==3.12.2
 referencing==0.36.2
 regex==2024.11.6
 requests==2.32.3
 rsa==4.9
 safetensors==0.5.3
 scikit-learn==1.6.1
+scipy==1.15.2
+sentence-transformers==4.0.1
 setuptools==75.8.2
 shellingham==1.5.4
 six==1.17.0
 starlette==0.46.0
 streamlit==1.43.2
 sympy==1.13.1
+TatSu==5.13.1
 tenacity==9.0.0
 threadpoolctl==3.5.0
 tiktoken==0.9.0
 torch==2.6.0
 tornado==6.4.2
 tqdm==4.67.1
+transformers==4.50.3
 typer==0.15.2
+typing-inspect==0.9.0
+typing-inspection==0.4.0
 typing_extensions==4.12.2
 tzdata==2025.1
+unstructured==0.17.2
+unstructured-client==0.32.0
 uritemplate==4.1.1
 urllib3==2.3.0
 uvicorn==0.34.0
 uvloop==0.21.0
 watchdog==6.0.0
 watchfiles==1.0.4
+webencodings==0.5.1
 websocket-client==1.8.0
 websockets==15.0.1
 wrapt==1.17.2
+yarl==1.18.3
 zipp==3.21.0
 zstandard==0.23.0