This repository contains the Model Checkpoint of GLM-4-32B-0414-GPTQ-4bits

Base model: GLM-4-32B-0414

Quantization method: GPTQ

Repository of quantization method: https://github.com/modelcloud/gptqmodel

Eora Method Dataset


from datasets import load_dataset


def question_answering_format(question, answer):

    return f"Question: {question}\nAnswer: {answer}"

def multiple_choices_question_answering_format(question, choices, answer):
    return f"{question.strip()}\nA. {choices[0]}\nB. {choices[1]}\nC. {choices[2]}\nD. {choices[3]}\nAnswer: {answer}"

## An example of using ARC for construting the EoRA calibration set

def construct_c4():
    calibration_dataset = load_dataset(
      "/mnt/ceph/develop/jiawei/code_dataset/c4",
      data_files="en.noblocklist/c4-train.00001-of-01024.json.gz",
      split="train", download_mode="force_redownload"
    ).select(range(1024))["text"]
    return calibration_dataset

def construct_ARC():
    nsamples = 1024
    arc_easy_calibration_dataset = load_dataset('ai2_arc', 'ARC-Easy', split='train').select(range(nsamples))
    arc_challenge_calibration_dataset = load_dataset('ai2_arc', 'ARC-Challenge', split='train').select(range(nsamples))
    dataset = []

    for example in arc_easy_calibration_dataset:
        answer = example['choices']['text'][example['choices']['label'].index(example['answerKey'])]
        question = example['question']
        dataset.append(question_answering_format(question=question,answer=answer))

    for example in arc_challenge_calibration_dataset:
        answer = example['choices']['text'][example['choices']['label'].index(example['answerKey'])]
        question = example['question']
        dataset.append(question_answering_format(question=question,answer=answer))

    ## we recommend also include some examples from C4 to avoid overfitting to the downstream data
    c4_dataset = load_dataset(
        "allenai/c4",
        data_files="en/c4-train.00001-of-01024.json.gz",
        split="train"
    ).select(range(nsamples))["text"]

    return dataset + c4_dataset

def multiple_identity_format(instruction, input_q, output):
    return f"{instruction.strip()} {input_q}\n {output}"
def construct_mmlu():

    mmlu_calibration_dataset = load_dataset('/mnt/ceph/develop/jiawei/code_dataset/mmlu', 'all', split='validation')
    dataset = []
    for example in mmlu_calibration_dataset:
        question = example['question']
        choices = example['choices']
        answer = ['A','B','C','D'][example['answer']]
        dataset.append(multiple_choices_question_answering_format(question, choices, answer))
    identity_dataset = load_dataset(
        "json", 
        data_files="/mnt/ceph/develop/jiawei/GPTQModel/examples/eora/identity.json",
        split="train"
    )

    for example in identity_dataset:
        instruction = example['instruction']
        input_q = example['input']
        output = example['output']
        dataset.append(multiple_identity_format(instruction, input_q, output))

    ## we recommend also include some examples from C4 to avoid overfitting to the downstream data
    c4_dataset = load_dataset(
        "/mnt/ceph/develop/jiawei/code_dataset/c4",
        data_files="en.noblocklist/c4-train.00001-of-01024.json.gz",
        split="train"
    ).select(range(1024))["text"] 
    
    return dataset + c4_dataset

  1. quantization

python examples/eora/eora_generation.py THUDM/GLM-4-32B-0414 --bits 4 --quant_save_path glide-the/GLM-4-32B-0414-GPTQ-4bits --eora_dataset mmlu --eora_save_path glide-the/GLM-4-32B-0414-GPTQ-4bits-eora_rank64_c4 --eora_rank 64 
  1. inference


python examples/eora/eora_load_and_inference.py --quantized_model glide-the/GLM-4-32B-0414-GPTQ-4bits   

Usage transformers


from transformers import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("glide-the/GLM-4-32B-0414-GPTQ-4bits")
quantized_model = AutoModelForCausalLM.from_pretrained("glide-the/GLM-4-32B-0414-GPTQ-4bits")

print(tokenizer.decode(quantized_model.generate(**tokenizer("""从以下文本中提取 (公司, 创始人) 对:

示例1:
文本:"马云创立了阿里巴巴。"
输出:[("阿里巴巴", "马云")]

示例2:
文本:"比尔·盖茨是微软的联合创始人。"
输出:[("微软", "比尔·盖茨")]

待抽取:
文本:"乔布斯创办了苹果公司。"
输出:""", return_tensors="pt").to(quantized_model.device))[0]))
Downloads last month
1
Safetensors
Model size
5.97B params
Tensor type
I32
·
BF16
·
FP16
·
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support