joey1101 commited on
Commit
7ed7879
·
verified ·
1 Parent(s): 38823b1

Create LLM_low_bit_optimize.py

Browse files
Files changed (1) hide show
  1. LLM_low_bit_optimize.py +9 -0
LLM_low_bit_optimize.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from ipex_llm.transformers import AutoModelForCausalLM
2
+ from transformers import LlamaTokenizer
3
+
4
+
5
+ llm = AutoModelForCausalLM.from_pretrained("checkpoints\\Llama-2-7b-chat-hf",load_in_low_bit="sym_int4")
6
+ llm.save_low_bit("checkpoints\\Llama-2-7b-chat-hf-INT4")
7
+
8
+ tokenizer = LlamaTokenizer.from_pretrained("checkpoints\\Llama-2-7b-chat-hf\\")
9
+ tokenizer.save_pretrained("checkpoints\\Llama-2-7b-chat-hf-INT4")