Spaces:
Running
Running
File size: 6,131 Bytes
b5deaf1 af08824 ad04a72 af08824 ad04a72 b5deaf1 af08824 b5deaf1 e83b975 af08824 e83b975 ad04a72 83747e9 ad04a72 e83b975 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
"""Module for OpenAI model and embeddings."""
# import os
from typing import List
# import onnxruntime as ort
from langchain.embeddings.base import Embeddings
from sentence_transformers import SentenceTransformer
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
# from langchain_huggingface import HuggingFacePipeline
# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
# from huggingface_hub import hf_hub_download
class GPTModel(AzureChatOpenAI):
"""
GPTModel class that extends AzureChatOpenAI.
This class initializes a GPT model with specific deployment settings and a callback function.
Attributes:
callback (function): The callback function to be used with the model.
Methods:
__init__(callback):
Initializes the GPTModel with the specified callback function.
"""
def __init__(self):
super().__init__(
deployment_name="gpt-4o",
streaming=True, temperature=0)
class GPTEmbeddings(AzureOpenAIEmbeddings):
"""
GPTEmbeddings class that extends AzureOpenAIEmbeddings.
This class is designed to handle embeddings using GPT model provided by Azure OpenAI services.
Attributes:
Inherits all attributes from AzureOpenAIEmbeddings.
Methods:
Inherits all methods from AzureOpenAIEmbeddings.
"""
# class Phi4MiniONNXLLM:
# """
# A class for interfacing with a pre-trained ONNX model for inference.
# Attributes:
# session (onnxruntime.InferenceSession): The ONNX runtime inference session for the model.
# input_name (str): The name of the input node in the ONNX model.
# output_name (str): The name of the output node in the ONNX model.
# Methods:
# __init__(model_path):
# Initializes the Phi4MiniONNXLLM instance by loading the ONNX model from specified path.
# __call__(input_ids):
# Performs inference on the given input data and returns the model's output.
# """
# def __init__(self, repo_id, subfolder, onnx_file="model.onnx", weights_file="model.onnx.data"):
# self.repo_id = repo_id
# model_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{onnx_file}")
# weights_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{weights_file}")
# self.session = ort.InferenceSession(model_path)
# # Verify both files exist
# print(f"Model path: {model_path}, Exists: {os.path.exists(model_path)}")
# print(f"Weights path: {weights_path}, Exists: {os.path.exists(weights_path)}")
# self.input_name = self.session.get_inputs()[0].name
# self.output_name = self.session.get_outputs()[0].name
# def __call__(self, input_text):
# # Assuming input_ids is a tensor or numpy array
# tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-4-mini-instruct-onnx")
# inputs = tokenizer(input_text, return_tensors="pt")
# input_feed = {
# self.input_name: inputs["input_ids"].numpy(),
# "attention_mask": inputs["attention_mask"].numpy(),
# # Add past_key_values if applicable
# }
# outputs = self.session.run([self.output_name], input_feed)
# return outputs
# class HuggingfaceModel(HuggingFacePipeline):
# """
# HuggingfaceModel is a wrapper class for the Hugging Face text-generation pipeline.
# Attributes:
# name (str): The name or path of the pre-trained model to load from Hugging Face.
# max_tokens (int): The maximum number of new tokens to generate in the text output.
# Defaults to 200.
# Methods:
# __init__(name, max_tokens=200):
# Initializes the HuggingfaceModel with the specified model name and maximum token limit.
# """
# def __init__(self, name, max_tokens=500):
# super().__init__(pipeline=pipeline(
# "text-generation",
# model=AutoModelForCausalLM.from_pretrained(name),
# tokenizer=AutoTokenizer.from_pretrained(name),
# max_new_tokens=max_tokens
# )
# )
class EmbeddingsModel(Embeddings):
"""
A model for generating embeddings using SentenceTransformer.
Attributes:
model (SentenceTransformer): The SentenceTransformer model used for generating embeddings.
"""
def __init__(self, model_name: str):
"""
Initializes the Chroma model with the specified model name.
Args:
model_name (str): The name of the model to be used for sentence transformation.
"""
self.model = SentenceTransformer(model_name)
def embed_documents(self, documents: List[str]) -> List[List[float]]:
"""
Embed a list of documents into a list of vectors.
Args:
documents (List[str]): A list of documents to be embedded.
Returns:
List[List[float]]: A list of vectors representing the embedded documents.
"""
return self.model.encode(documents).tolist()
def embed_query(self, query: str) -> List[float]:
"""
Embed a query string into a list of floats using the model's encoding.
Args:
query (str): The query string to be embedded.
Returns:
List[float]: The embedded representation of the query as a list of floats.
"""
return self.model.encode([query]).tolist()[0]
# model_name = "microsoft/phi-1_5"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(model_name)
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=200)
# phi4_llm = HuggingFacePipeline(pipeline=pipe)
# tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", pad_token_id=50256)
# model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
# pipe = pipeline(
# "text-generation", model=model, tokenizer=tokenizer,
# max_new_tokens=10, truncation=True, # Truncate input sequences
# )
# phi4_llm = HuggingFacePipeline(pipeline=pipe)
|