File size: 405 Bytes
7ed7879
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
from ipex_llm.transformers import AutoModelForCausalLM
from transformers import LlamaTokenizer


llm = AutoModelForCausalLM.from_pretrained("checkpoints\\Llama-2-7b-chat-hf",load_in_low_bit="sym_int4")
llm.save_low_bit("checkpoints\\Llama-2-7b-chat-hf-INT4")

tokenizer = LlamaTokenizer.from_pretrained("checkpoints\\Llama-2-7b-chat-hf\\")
tokenizer.save_pretrained("checkpoints\\Llama-2-7b-chat-hf-INT4")