Spaces:

Oxbridge-Economics
/

Mailbox

Running

App Files Files Community

Mailbox / app /models /llm /__init__.py

gavinzli

Update attachments directory to use cache and simplify embeddings model initialization

83747e9 about 1 month ago

raw

history blame contribute delete

6.13 kB

	"""Module for OpenAI model and embeddings."""
	# import os
	from typing import List
	# import onnxruntime as ort
	from langchain.embeddings.base import Embeddings
	from sentence_transformers import SentenceTransformer
	from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
	# from langchain_huggingface import HuggingFacePipeline
	# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	# from huggingface_hub import hf_hub_download

	class GPTModel(AzureChatOpenAI):
	"""
	GPTModel class that extends AzureChatOpenAI.

	This class initializes a GPT model with specific deployment settings and a callback function.

	Attributes:
	callback (function): The callback function to be used with the model.

	Methods:
	__init__(callback):
	Initializes the GPTModel with the specified callback function.
	"""
	def __init__(self):
	super().__init__(
	deployment_name="gpt-4o",
	streaming=True, temperature=0)

	class GPTEmbeddings(AzureOpenAIEmbeddings):
	"""
	GPTEmbeddings class that extends AzureOpenAIEmbeddings.

	This class is designed to handle embeddings using GPT model provided by Azure OpenAI services.

	Attributes:
	Inherits all attributes from AzureOpenAIEmbeddings.

	Methods:
	Inherits all methods from AzureOpenAIEmbeddings.
	"""

	# class Phi4MiniONNXLLM:
	# """
	# A class for interfacing with a pre-trained ONNX model for inference.

	# Attributes:
	# session (onnxruntime.InferenceSession): The ONNX runtime inference session for the model.
	# input_name (str): The name of the input node in the ONNX model.
	# output_name (str): The name of the output node in the ONNX model.

	# Methods:
	# __init__(model_path):
	# Initializes the Phi4MiniONNXLLM instance by loading the ONNX model from specified path.

	# __call__(input_ids):
	# Performs inference on the given input data and returns the model's output.
	# """
	# def __init__(self, repo_id, subfolder, onnx_file="model.onnx", weights_file="model.onnx.data"):
	# self.repo_id = repo_id
	# model_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{onnx_file}")
	# weights_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{weights_file}")
	# self.session = ort.InferenceSession(model_path)
	# # Verify both files exist
	# print(f"Model path: {model_path}, Exists: {os.path.exists(model_path)}")
	# print(f"Weights path: {weights_path}, Exists: {os.path.exists(weights_path)}")
	# self.input_name = self.session.get_inputs()[0].name
	# self.output_name = self.session.get_outputs()[0].name

	# def __call__(self, input_text):
	# # Assuming input_ids is a tensor or numpy array
	# tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-4-mini-instruct-onnx")
	# inputs = tokenizer(input_text, return_tensors="pt")
	# input_feed = {
	# self.input_name: inputs["input_ids"].numpy(),
	# "attention_mask": inputs["attention_mask"].numpy(),
	# # Add past_key_values if applicable
	# }
	# outputs = self.session.run([self.output_name], input_feed)
	# return outputs

	# class HuggingfaceModel(HuggingFacePipeline):
	# """
	# HuggingfaceModel is a wrapper class for the Hugging Face text-generation pipeline.

	# Attributes:
	# name (str): The name or path of the pre-trained model to load from Hugging Face.
	# max_tokens (int): The maximum number of new tokens to generate in the text output.
	# Defaults to 200.

	# Methods:
	# __init__(name, max_tokens=200):
	# Initializes the HuggingfaceModel with the specified model name and maximum token limit.
	# """
	# def __init__(self, name, max_tokens=500):
	# super().__init__(pipeline=pipeline(
	# "text-generation",
	# model=AutoModelForCausalLM.from_pretrained(name),
	# tokenizer=AutoTokenizer.from_pretrained(name),
	# max_new_tokens=max_tokens
	# )
	# )

	class EmbeddingsModel(Embeddings):
	"""
	A model for generating embeddings using SentenceTransformer.

	Attributes:
	model (SentenceTransformer): The SentenceTransformer model used for generating embeddings.
	"""
	def __init__(self, model_name: str):
	"""
	Initializes the Chroma model with the specified model name.

	Args:
	model_name (str): The name of the model to be used for sentence transformation.
	"""
	self.model = SentenceTransformer(model_name)

	def embed_documents(self, documents: List[str]) -> List[List[float]]:
	"""
	Embed a list of documents into a list of vectors.

	Args:
	documents (List[str]): A list of documents to be embedded.

	Returns:
	List[List[float]]: A list of vectors representing the embedded documents.
	"""
	return self.model.encode(documents).tolist()

	def embed_query(self, query: str) -> List[float]:
	"""
	Embed a query string into a list of floats using the model's encoding.

	Args:
	query (str): The query string to be embedded.

	Returns:
	List[float]: The embedded representation of the query as a list of floats.
	"""
	return self.model.encode([query]).tolist()[0]

	# model_name = "microsoft/phi-1_5"
	# tokenizer = AutoTokenizer.from_pretrained(model_name)
	# model = AutoModelForCausalLM.from_pretrained(model_name)
	# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=200)

	# phi4_llm = HuggingFacePipeline(pipeline=pipe)

	# tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", pad_token_id=50256)
	# model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
	# pipe = pipeline(
	# "text-generation", model=model, tokenizer=tokenizer,
	# max_new_tokens=10, truncation=True, # Truncate input sequences
	# )
	# phi4_llm = HuggingFacePipeline(pipeline=pipe)