LISA-demo / embeddings.py
Kadi-IAM's picture
Clean code and add readme
1a20a59
"""
Load embedding models from huggingface.
"""
import torch
from langchain.embeddings import HuggingFaceEmbeddings
def get_hf_embeddings(model_name=None):
"""Get huggingface embedding by name."""
if model_name is None:
# Some candiates
# "BAAI/bge-m3" (good, though large and slow)
# "BAAI/bge-base-en-v1.5" -> also good
# "sentence-transformers/all-mpnet-base-v2"
# "maidalun1020/bce-embedding-base_v1"
# "intfloat/multilingual-e5-large"
# Ref: https://huggingface.co./spaces/mteb/leaderboard
# https://huggingface.co./maidalun1020/bce-embedding-base_v1
model_name = "BAAI/bge-large-en-v1.5"
embeddings = HuggingFaceEmbeddings(model_name=model_name)
return embeddings
def get_jinaai_embeddings(
model_name="jinaai/jina-embeddings-v2-base-en", device="auto"
):
"""Get jinaai embedding."""
# device: cpu or cuda
if device == "auto":
device = "cuda" if torch.cuda.is_available() else "cpu"
# For jinaai. Ref: https://github.com/langchain-ai/langchain/issues/6080
from transformers import AutoModel
model = AutoModel.from_pretrained(
model_name, trust_remote_code=True
) # -> will yield error, need bug fixing
model_name = model_name
model_kwargs = {"device": device, "trust_remote_code": True}
embeddings = HuggingFaceEmbeddings(
model_name=model_name,
model_kwargs=model_kwargs,
)
return embeddings