Spaces:

LISA-Kadi
/

LISA-demo

Running

File size: 1,681 Bytes

1a20a59
 
 
 
2fafc94
1a20a59
2fafc94
 
 
1a20a59
e607fab
2fafc94
 
1a20a59
 
2fafc94
 
 
1a20a59
2fafc94
 
 
 
 
 
 
 
 
646f8c2
 
2fafc94
 
 
 
 
 
 
1a20a59
 
 
2fafc94
 
 
 
 
 
1a20a59
646f8c2
2fafc94
 
 
 
 
 
1a20a59
 
e607fab
 
1a20a59

"""
Load LLMs from huggingface, Groq, etc.
"""

from transformers import (
    # AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
)
from langchain.llms import HuggingFacePipeline
from langchain_groq import ChatGroq
from langchain.llms import HuggingFaceTextGenInference

# from langchain.chat_models import ChatOpenAI  # oai model


def get_llm_hf_online(inference_api_url=""):
    """Get LLM using huggingface inference."""

    if not inference_api_url:  # default api url
        inference_api_url = (
            "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
        )

    llm = HuggingFaceTextGenInference(
        verbose=True,  # Provides detailed logs of operation
        max_new_tokens=1024,  # Maximum number of token that can be generated.
        top_p=0.95,  # Threshold for controlling randomness in text generation process.
        temperature=0.1,
        inference_server_url=inference_api_url,
        timeout=10,  # Timeout for connection  with the url
    )

    return llm


def get_llm_hf_local(model_path):
    """Get local LLM from huggingface."""

    model = LlamaForCausalLM.from_pretrained(model_path, device_map="auto")
    tokenizer = AutoTokenizer.from_pretrained(model_path)

    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=2048,  # better setting?
        model_kwargs={"temperature": 0.1},  # better setting?
    )
    llm = HuggingFacePipeline(pipeline=pipe)

    return llm


def get_groq_chat(model_name="llama-3.1-70b-versatile"):
    """Get LLM from Groq."""

    llm = ChatGroq(temperature=0, model_name=model_name)
    return llm