from ipex_llm.transformers import AutoModelForCausalLM | |
from transformers import LlamaTokenizer | |
llm = AutoModelForCausalLM.from_pretrained("checkpoints\\Llama-2-7b-chat-hf",load_in_low_bit="sym_int4") | |
llm.save_low_bit("checkpoints\\Llama-2-7b-chat-hf-INT4") | |
tokenizer = LlamaTokenizer.from_pretrained("checkpoints\\Llama-2-7b-chat-hf\\") | |
tokenizer.save_pretrained("checkpoints\\Llama-2-7b-chat-hf-INT4") |