File size: 595 Bytes
f2ff742
 
 
 
 
 
 
 
 
 
e8fd69f
 
 
 
 
f2ff742
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
from ctransformers import AutoModelForCausalLM

#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # the device to load the model onto
device = 'cpu'


# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7b-Chat-GGUF", model_file="llama-2-7b-chat.Q4_K_M.gguf", model_type="llama", gpu_layers=0)


def generate_answer(query, sample_num):
    results = list()
    for _ in range(sample_num):
        results.append(llm(query))
    return results