Spaces:
Running
Running
"""Module for OpenAI model and embeddings.""" | |
# import os | |
from typing import List | |
# import onnxruntime as ort | |
from langchain.embeddings.base import Embeddings | |
from sentence_transformers import SentenceTransformer | |
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings | |
# from langchain_huggingface import HuggingFacePipeline | |
# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
# from huggingface_hub import hf_hub_download | |
class GPTModel(AzureChatOpenAI): | |
""" | |
GPTModel class that extends AzureChatOpenAI. | |
This class initializes a GPT model with specific deployment settings and a callback function. | |
Attributes: | |
callback (function): The callback function to be used with the model. | |
Methods: | |
__init__(callback): | |
Initializes the GPTModel with the specified callback function. | |
""" | |
def __init__(self): | |
super().__init__( | |
deployment_name="gpt-4o", | |
streaming=True, temperature=0) | |
class GPTEmbeddings(AzureOpenAIEmbeddings): | |
""" | |
GPTEmbeddings class that extends AzureOpenAIEmbeddings. | |
This class is designed to handle embeddings using GPT model provided by Azure OpenAI services. | |
Attributes: | |
Inherits all attributes from AzureOpenAIEmbeddings. | |
Methods: | |
Inherits all methods from AzureOpenAIEmbeddings. | |
""" | |
# class Phi4MiniONNXLLM: | |
# """ | |
# A class for interfacing with a pre-trained ONNX model for inference. | |
# Attributes: | |
# session (onnxruntime.InferenceSession): The ONNX runtime inference session for the model. | |
# input_name (str): The name of the input node in the ONNX model. | |
# output_name (str): The name of the output node in the ONNX model. | |
# Methods: | |
# __init__(model_path): | |
# Initializes the Phi4MiniONNXLLM instance by loading the ONNX model from specified path. | |
# __call__(input_ids): | |
# Performs inference on the given input data and returns the model's output. | |
# """ | |
# def __init__(self, repo_id, subfolder, onnx_file="model.onnx", weights_file="model.onnx.data"): | |
# self.repo_id = repo_id | |
# model_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{onnx_file}") | |
# weights_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{weights_file}") | |
# self.session = ort.InferenceSession(model_path) | |
# # Verify both files exist | |
# print(f"Model path: {model_path}, Exists: {os.path.exists(model_path)}") | |
# print(f"Weights path: {weights_path}, Exists: {os.path.exists(weights_path)}") | |
# self.input_name = self.session.get_inputs()[0].name | |
# self.output_name = self.session.get_outputs()[0].name | |
# def __call__(self, input_text): | |
# # Assuming input_ids is a tensor or numpy array | |
# tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-4-mini-instruct-onnx") | |
# inputs = tokenizer(input_text, return_tensors="pt") | |
# input_feed = { | |
# self.input_name: inputs["input_ids"].numpy(), | |
# "attention_mask": inputs["attention_mask"].numpy(), | |
# # Add past_key_values if applicable | |
# } | |
# outputs = self.session.run([self.output_name], input_feed) | |
# return outputs | |
# class HuggingfaceModel(HuggingFacePipeline): | |
# """ | |
# HuggingfaceModel is a wrapper class for the Hugging Face text-generation pipeline. | |
# Attributes: | |
# name (str): The name or path of the pre-trained model to load from Hugging Face. | |
# max_tokens (int): The maximum number of new tokens to generate in the text output. | |
# Defaults to 200. | |
# Methods: | |
# __init__(name, max_tokens=200): | |
# Initializes the HuggingfaceModel with the specified model name and maximum token limit. | |
# """ | |
# def __init__(self, name, max_tokens=500): | |
# super().__init__(pipeline=pipeline( | |
# "text-generation", | |
# model=AutoModelForCausalLM.from_pretrained(name), | |
# tokenizer=AutoTokenizer.from_pretrained(name), | |
# max_new_tokens=max_tokens | |
# ) | |
# ) | |
class EmbeddingsModel(Embeddings): | |
""" | |
A model for generating embeddings using SentenceTransformer. | |
Attributes: | |
model (SentenceTransformer): The SentenceTransformer model used for generating embeddings. | |
""" | |
def __init__(self, model_name: str): | |
""" | |
Initializes the Chroma model with the specified model name. | |
Args: | |
model_name (str): The name of the model to be used for sentence transformation. | |
""" | |
self.model = SentenceTransformer(model_name) | |
def embed_documents(self, documents: List[str]) -> List[List[float]]: | |
""" | |
Embed a list of documents into a list of vectors. | |
Args: | |
documents (List[str]): A list of documents to be embedded. | |
Returns: | |
List[List[float]]: A list of vectors representing the embedded documents. | |
""" | |
return self.model.encode(documents).tolist() | |
def embed_query(self, query: str) -> List[float]: | |
""" | |
Embed a query string into a list of floats using the model's encoding. | |
Args: | |
query (str): The query string to be embedded. | |
Returns: | |
List[float]: The embedded representation of the query as a list of floats. | |
""" | |
return self.model.encode([query]).tolist()[0] | |
# model_name = "microsoft/phi-1_5" | |
# tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# model = AutoModelForCausalLM.from_pretrained(model_name) | |
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=200) | |
# phi4_llm = HuggingFacePipeline(pipeline=pipe) | |
# tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", pad_token_id=50256) | |
# model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2") | |
# pipe = pipeline( | |
# "text-generation", model=model, tokenizer=tokenizer, | |
# max_new_tokens=10, truncation=True, # Truncate input sequences | |
# ) | |
# phi4_llm = HuggingFacePipeline(pipeline=pipe) | |