This repository contains the Model Checkpoint of GLM-4-32B-0414-GPTQ-4bits
Base model: GLM-4-32B-0414
Quantization method: GPTQ
Repository of quantization method: https://github.com/modelcloud/gptqmodel
Eora Method Dataset
from datasets import load_dataset
def question_answering_format(question, answer):
return f"Question: {question}\nAnswer: {answer}"
def multiple_choices_question_answering_format(question, choices, answer):
return f"{question.strip()}\nA. {choices[0]}\nB. {choices[1]}\nC. {choices[2]}\nD. {choices[3]}\nAnswer: {answer}"
## An example of using ARC for construting the EoRA calibration set
def construct_c4():
calibration_dataset = load_dataset(
"/mnt/ceph/develop/jiawei/code_dataset/c4",
data_files="en.noblocklist/c4-train.00001-of-01024.json.gz",
split="train", download_mode="force_redownload"
).select(range(1024))["text"]
return calibration_dataset
def construct_ARC():
nsamples = 1024
arc_easy_calibration_dataset = load_dataset('ai2_arc', 'ARC-Easy', split='train').select(range(nsamples))
arc_challenge_calibration_dataset = load_dataset('ai2_arc', 'ARC-Challenge', split='train').select(range(nsamples))
dataset = []
for example in arc_easy_calibration_dataset:
answer = example['choices']['text'][example['choices']['label'].index(example['answerKey'])]
question = example['question']
dataset.append(question_answering_format(question=question,answer=answer))
for example in arc_challenge_calibration_dataset:
answer = example['choices']['text'][example['choices']['label'].index(example['answerKey'])]
question = example['question']
dataset.append(question_answering_format(question=question,answer=answer))
## we recommend also include some examples from C4 to avoid overfitting to the downstream data
c4_dataset = load_dataset(
"allenai/c4",
data_files="en/c4-train.00001-of-01024.json.gz",
split="train"
).select(range(nsamples))["text"]
return dataset + c4_dataset
def multiple_identity_format(instruction, input_q, output):
return f"{instruction.strip()} {input_q}\n {output}"
def construct_mmlu():
mmlu_calibration_dataset = load_dataset('/mnt/ceph/develop/jiawei/code_dataset/mmlu', 'all', split='validation')
dataset = []
for example in mmlu_calibration_dataset:
question = example['question']
choices = example['choices']
answer = ['A','B','C','D'][example['answer']]
dataset.append(multiple_choices_question_answering_format(question, choices, answer))
identity_dataset = load_dataset(
"json",
data_files="/mnt/ceph/develop/jiawei/GPTQModel/examples/eora/identity.json",
split="train"
)
for example in identity_dataset:
instruction = example['instruction']
input_q = example['input']
output = example['output']
dataset.append(multiple_identity_format(instruction, input_q, output))
## we recommend also include some examples from C4 to avoid overfitting to the downstream data
c4_dataset = load_dataset(
"/mnt/ceph/develop/jiawei/code_dataset/c4",
data_files="en.noblocklist/c4-train.00001-of-01024.json.gz",
split="train"
).select(range(1024))["text"]
return dataset + c4_dataset
- quantization
python examples/eora/eora_generation.py THUDM/GLM-4-32B-0414 --bits 4 --quant_save_path glide-the/GLM-4-32B-0414-GPTQ-4bits --eora_dataset mmlu --eora_save_path glide-the/GLM-4-32B-0414-GPTQ-4bits-eora_rank64_c4 --eora_rank 64
- inference
python examples/eora/eora_load_and_inference.py --quantized_model glide-the/GLM-4-32B-0414-GPTQ-4bits
Usage transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("glide-the/GLM-4-32B-0414-GPTQ-4bits")
quantized_model = AutoModelForCausalLM.from_pretrained("glide-the/GLM-4-32B-0414-GPTQ-4bits")
print(tokenizer.decode(quantized_model.generate(**tokenizer("""从以下文本中提取 (公司, 创始人) 对:
示例1:
文本:"马云创立了阿里巴巴。"
输出:[("阿里巴巴", "马云")]
示例2:
文本:"比尔·盖茨是微软的联合创始人。"
输出:[("微软", "比尔·盖茨")]
待抽取:
文本:"乔布斯创办了苹果公司。"
输出:""", return_tensors="pt").to(quantized_model.device))[0]))
- Downloads last month
- 1
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support