File size: 1,681 Bytes
1a20a59 2fafc94 1a20a59 2fafc94 1a20a59 e607fab 2fafc94 1a20a59 2fafc94 1a20a59 2fafc94 646f8c2 2fafc94 1a20a59 2fafc94 1a20a59 646f8c2 2fafc94 1a20a59 e607fab 1a20a59 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
"""
Load LLMs from huggingface, Groq, etc.
"""
from transformers import (
# AutoModelForCausalLM,
AutoTokenizer,
pipeline,
)
from langchain.llms import HuggingFacePipeline
from langchain_groq import ChatGroq
from langchain.llms import HuggingFaceTextGenInference
# from langchain.chat_models import ChatOpenAI # oai model
def get_llm_hf_online(inference_api_url=""):
"""Get LLM using huggingface inference."""
if not inference_api_url: # default api url
inference_api_url = (
"https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
)
llm = HuggingFaceTextGenInference(
verbose=True, # Provides detailed logs of operation
max_new_tokens=1024, # Maximum number of token that can be generated.
top_p=0.95, # Threshold for controlling randomness in text generation process.
temperature=0.1,
inference_server_url=inference_api_url,
timeout=10, # Timeout for connection with the url
)
return llm
def get_llm_hf_local(model_path):
"""Get local LLM from huggingface."""
model = LlamaForCausalLM.from_pretrained(model_path, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_path)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=2048, # better setting?
model_kwargs={"temperature": 0.1}, # better setting?
)
llm = HuggingFacePipeline(pipeline=pipe)
return llm
def get_groq_chat(model_name="llama-3.1-70b-versatile"):
"""Get LLM from Groq."""
llm = ChatGroq(temperature=0, model_name=model_name)
return llm
|