File size: 405 Bytes
7ed7879 |
1 2 3 4 5 6 7 8 9 |
from ipex_llm.transformers import AutoModelForCausalLM
from transformers import LlamaTokenizer
llm = AutoModelForCausalLM.from_pretrained("checkpoints\\Llama-2-7b-chat-hf",load_in_low_bit="sym_int4")
llm.save_low_bit("checkpoints\\Llama-2-7b-chat-hf-INT4")
tokenizer = LlamaTokenizer.from_pretrained("checkpoints\\Llama-2-7b-chat-hf\\")
tokenizer.save_pretrained("checkpoints\\Llama-2-7b-chat-hf-INT4") |