File size: 6,131 Bytes
b5deaf1
af08824
ad04a72
af08824
ad04a72
 
b5deaf1
af08824
 
 
b5deaf1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e83b975
af08824
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e83b975
ad04a72
 
 
 
 
 
 
 
 
 
 
 
 
 
83747e9
ad04a72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e83b975
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
"""Module for OpenAI model and embeddings."""
# import os
from typing import List
# import onnxruntime as ort
from langchain.embeddings.base import Embeddings
from sentence_transformers import SentenceTransformer
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
# from langchain_huggingface import HuggingFacePipeline
# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
# from huggingface_hub import hf_hub_download

class GPTModel(AzureChatOpenAI):
    """
    GPTModel class that extends AzureChatOpenAI.

    This class initializes a GPT model with specific deployment settings and a callback function.

    Attributes:
        callback (function): The callback function to be used with the model.

    Methods:
        __init__(callback):
            Initializes the GPTModel with the specified callback function.
    """
    def __init__(self):
        super().__init__(
        deployment_name="gpt-4o",
        streaming=True, temperature=0)

class GPTEmbeddings(AzureOpenAIEmbeddings):
    """
    GPTEmbeddings class that extends AzureOpenAIEmbeddings.

    This class is designed to handle embeddings using GPT model provided by Azure OpenAI services.

    Attributes:
        Inherits all attributes from AzureOpenAIEmbeddings.

    Methods:
        Inherits all methods from AzureOpenAIEmbeddings.
    """

# class Phi4MiniONNXLLM:
#     """
#     A class for interfacing with a pre-trained ONNX model for inference.

#     Attributes:
#         session (onnxruntime.InferenceSession): The ONNX runtime inference session for the model.
#         input_name (str): The name of the input node in the ONNX model.
#         output_name (str): The name of the output node in the ONNX model.

#     Methods:
#         __init__(model_path):
#             Initializes the Phi4MiniONNXLLM instance by loading the ONNX model from specified path.

#         __call__(input_ids):
#             Performs inference on the given input data and returns the model's output.
#     """
#     def __init__(self, repo_id, subfolder, onnx_file="model.onnx", weights_file="model.onnx.data"):
#         self.repo_id = repo_id
#         model_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{onnx_file}")
#         weights_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{weights_file}")
#         self.session = ort.InferenceSession(model_path)
#         # Verify both files exist
#         print(f"Model path: {model_path}, Exists: {os.path.exists(model_path)}")
#         print(f"Weights path: {weights_path}, Exists: {os.path.exists(weights_path)}")
#         self.input_name = self.session.get_inputs()[0].name
#         self.output_name = self.session.get_outputs()[0].name

#     def __call__(self, input_text):
#         # Assuming input_ids is a tensor or numpy array
#         tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-4-mini-instruct-onnx")
#         inputs = tokenizer(input_text, return_tensors="pt")
#         input_feed = {
#             self.input_name: inputs["input_ids"].numpy(),
#             "attention_mask": inputs["attention_mask"].numpy(),
#             # Add past_key_values if applicable
#         }
#         outputs = self.session.run([self.output_name], input_feed)
#         return outputs

# class HuggingfaceModel(HuggingFacePipeline):
#     """
#     HuggingfaceModel is a wrapper class for the Hugging Face text-generation pipeline.

#     Attributes:
#         name (str): The name or path of the pre-trained model to load from Hugging Face.
#         max_tokens (int): The maximum number of new tokens to generate in the text output.
#         Defaults to 200.

#     Methods:
#         __init__(name, max_tokens=200):
#             Initializes the HuggingfaceModel with the specified model name and maximum token limit.
#     """
#     def __init__(self, name, max_tokens=500):
#         super().__init__(pipeline=pipeline(
#             "text-generation",
#             model=AutoModelForCausalLM.from_pretrained(name),
#             tokenizer=AutoTokenizer.from_pretrained(name),
#             max_new_tokens=max_tokens
#             )
#         )

class EmbeddingsModel(Embeddings):
    """
    A model for generating embeddings using SentenceTransformer.

    Attributes:
        model (SentenceTransformer): The SentenceTransformer model used for generating embeddings.
    """
    def __init__(self, model_name: str):
        """
        Initializes the Chroma model with the specified model name.

        Args:
            model_name (str): The name of the model to be used for sentence transformation.
        """
        self.model = SentenceTransformer(model_name)

    def embed_documents(self, documents: List[str]) -> List[List[float]]:
        """
        Embed a list of documents into a list of vectors.

        Args:
            documents (List[str]): A list of documents to be embedded.

        Returns:
            List[List[float]]: A list of vectors representing the embedded documents.
        """
        return self.model.encode(documents).tolist()

    def embed_query(self, query: str) -> List[float]:
        """
        Embed a query string into a list of floats using the model's encoding.

        Args:
            query (str): The query string to be embedded.

        Returns:
            List[float]: The embedded representation of the query as a list of floats.
        """
        return self.model.encode([query]).tolist()[0]

# model_name = "microsoft/phi-1_5"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(model_name)
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=200)

# phi4_llm = HuggingFacePipeline(pipeline=pipe)

# tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", pad_token_id=50256)
# model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
# pipe = pipeline(
#     "text-generation", model=model, tokenizer=tokenizer,
#       max_new_tokens=10, truncation=True,  # Truncate input sequences
# )
# phi4_llm = HuggingFacePipeline(pipeline=pipe)