Update README.md
Browse files
README.md
CHANGED
@@ -1,28 +1,22 @@
|
|
1 |
---
|
2 |
frameworks:
|
3 |
- Pytorch
|
4 |
-
license:
|
5 |
tasks:
|
6 |
- text-generation
|
7 |
---
|
8 |
-
# Model Card for CodeFuse-DeepSeek-33B
|
9 |
-
|
10 |
-
<img src="https://modelscope.cn/api/v1/models/codefuse-ai/CodeFuse-DeepSeek-33B/repo?Revision=master&FilePath=LOGO.jpg&View=true" width="800"/>
|
11 |
-
<p>
|
12 |
|
13 |
[[中文]](#chinese) [[English]](#english)
|
14 |
|
15 |
-
#### Clone with HTTP
|
16 |
-
```bash
|
17 |
-
git clone https://www.modelscope.cn/codefuse-ai/CodeFuse-DeepSeek-33B-4bits.git
|
18 |
-
```
|
19 |
-
|
20 |
<a id="english"></a>
|
21 |
|
22 |
## Model Description
|
23 |
|
24 |
CodeFuse-DeepSeek-33B-4bits is the 4-bit quantized version of [CodeFuse-DeepSeek-33B](https://modelscope.cn/models/codefuse-ai/CodeFuse-DeepSeek-33B/summary) which is a 33B Code-LLM finetuned by QLoRA on multiple code-related tasks on the base model DeepSeek-Coder-33B.
|
25 |
-
|
|
|
26 |
|
27 |
<br>
|
28 |
|
@@ -30,7 +24,7 @@ After undergoing 4-bit quantization, the CodeFuse-DeepSeek-33B-4bits model can b
|
|
30 |
|
31 |
🔥🔥🔥 2024-01-12 CodeFuse-DeepSeek-33B-4bits has been released. Despite the quantization process, the model still achieves a remarkable 78.05% accuracy (greedy decoding) on the HumanEval pass@1 metric.
|
32 |
|
33 |
-
🔥🔥🔥 2024-01-12 CodeFuse-DeepSeek-33B has been released,
|
34 |
|
35 |
🔥🔥 2023-11-10 CodeFuse-CodeGeeX2-6B has been released, achieving a pass@1 (greedy decoding) score of 45.12% on HumanEval, which is a 9.22% increase compared to CodeGeeX2 35.9%.
|
36 |
|
@@ -42,7 +36,7 @@ After undergoing 4-bit quantization, the CodeFuse-DeepSeek-33B-4bits model can b
|
|
42 |
|
43 |
🔥🔥🔥 2023-09-26 We are pleased to announce the release of the [4-bit quantized version](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B-4bits/summary) of [CodeFuse-CodeLlama-34B](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B/summary). Despite the quantization process, the model still achieves a remarkable 73.8% accuracy (greedy decoding) on the HumanEval pass@1 metric.
|
44 |
|
45 |
-
🔥🔥🔥 2023-09-11 [CodeFuse-CodeLlama34B](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B/summary) has
|
46 |
|
47 |
<br>
|
48 |
|
@@ -140,7 +134,7 @@ In this format, the system section is optional and the conversation can be eithe
|
|
140 |
import os
|
141 |
import torch
|
142 |
import time
|
143 |
-
from
|
144 |
from auto_gptq import AutoGPTQForCausalLM
|
145 |
|
146 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
@@ -149,7 +143,7 @@ def load_model_tokenizer(model_path):
|
|
149 |
"""
|
150 |
Load model and tokenizer based on the given model name or local path of downloaded model.
|
151 |
"""
|
152 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
153 |
trust_remote_code=True,
|
154 |
use_fast=False,
|
155 |
lagecy=False)
|
@@ -157,7 +151,7 @@ def load_model_tokenizer(model_path):
|
|
157 |
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids("<|end▁of▁sentence|>")
|
158 |
tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids("<|end▁of▁sentence|>")
|
159 |
|
160 |
-
model = AutoGPTQForCausalLM.from_quantized(
|
161 |
inject_fused_attention=False,
|
162 |
inject_fused_mlp=False,
|
163 |
use_safetensors=False,
|
@@ -170,7 +164,7 @@ def load_model_tokenizer(model_path):
|
|
170 |
|
171 |
def inference(model, tokenizer, prompt):
|
172 |
"""
|
173 |
-
Uset the given model and tokenizer to generate an answer for the
|
174 |
"""
|
175 |
st = time.time()
|
176 |
prompt = prompt if prompt.endswith('\n') else f'{prompt}\n'
|
@@ -198,8 +192,6 @@ def inference(model, tokenizer, prompt):
|
|
198 |
|
199 |
|
200 |
if __name__ == "__main__":
|
201 |
-
model_dir = snapshot_download('codefuse-ai/CodeFuse-DeepSeek-33B-4bits', revision='v1.0.0')
|
202 |
-
|
203 |
prompt = 'Please write a QuickSort program in Python'
|
204 |
|
205 |
model, tokenizer = load_model_tokenizer(model_dir)
|
@@ -332,16 +324,16 @@ User prompt...
|
|
332 |
import os
|
333 |
import torch
|
334 |
import time
|
335 |
-
from
|
336 |
from auto_gptq import AutoGPTQForCausalLM
|
337 |
|
338 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
339 |
|
340 |
def load_model_tokenizer(model_path):
|
341 |
"""
|
342 |
-
Load model and tokenizer based on the given model name or local path of downloaded model.
|
343 |
"""
|
344 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
345 |
trust_remote_code=True,
|
346 |
use_fast=False,
|
347 |
lagecy=False)
|
@@ -349,7 +341,7 @@ def load_model_tokenizer(model_path):
|
|
349 |
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids("<|end▁of▁sentence|>")
|
350 |
tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids("<|end▁of▁sentence|>")
|
351 |
|
352 |
-
model = AutoGPTQForCausalLM.from_quantized(
|
353 |
inject_fused_attention=False,
|
354 |
inject_fused_mlp=False,
|
355 |
use_safetensors=False,
|
@@ -390,7 +382,6 @@ def inference(model, tokenizer, prompt):
|
|
390 |
|
391 |
|
392 |
if __name__ == "__main__":
|
393 |
-
model_dir = snapshot_download('codefuse-ai/CodeFuse-DeepSeek-33B-4bits', revision='v1.0.0')
|
394 |
|
395 |
prompt = 'Please write a QuickSort program in Python'
|
396 |
|
|
|
1 |
---
|
2 |
frameworks:
|
3 |
- Pytorch
|
4 |
+
license: other
|
5 |
tasks:
|
6 |
- text-generation
|
7 |
---
|
8 |
+
# Model Card for CodeFuse-DeepSeek-33B-4bits
|
9 |
+

|
|
|
|
|
10 |
|
11 |
[[中文]](#chinese) [[English]](#english)
|
12 |
|
|
|
|
|
|
|
|
|
|
|
13 |
<a id="english"></a>
|
14 |
|
15 |
## Model Description
|
16 |
|
17 |
CodeFuse-DeepSeek-33B-4bits is the 4-bit quantized version of [CodeFuse-DeepSeek-33B](https://modelscope.cn/models/codefuse-ai/CodeFuse-DeepSeek-33B/summary) which is a 33B Code-LLM finetuned by QLoRA on multiple code-related tasks on the base model DeepSeek-Coder-33B.
|
18 |
+
|
19 |
+
fter undergoing 4-bit quantization, the CodeFuse-DeepSeek-33B-4bits model can be loaded on either a single A10 (24GB VRAM) or an RTX 4090 (24GB VRAM). Moreover, the quantized model still achives an impressive accuracy of 78.05% on the Humaneval pass@1 metric.
|
20 |
|
21 |
<br>
|
22 |
|
|
|
24 |
|
25 |
🔥🔥🔥 2024-01-12 CodeFuse-DeepSeek-33B-4bits has been released. Despite the quantization process, the model still achieves a remarkable 78.05% accuracy (greedy decoding) on the HumanEval pass@1 metric.
|
26 |
|
27 |
+
🔥🔥🔥 2024-01-12 CodeFuse-DeepSeek-33B has been released, achieving a pass@1 (greedy decoding) score of 78.65% on HumanEval.
|
28 |
|
29 |
🔥🔥 2023-11-10 CodeFuse-CodeGeeX2-6B has been released, achieving a pass@1 (greedy decoding) score of 45.12% on HumanEval, which is a 9.22% increase compared to CodeGeeX2 35.9%.
|
30 |
|
|
|
36 |
|
37 |
🔥🔥🔥 2023-09-26 We are pleased to announce the release of the [4-bit quantized version](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B-4bits/summary) of [CodeFuse-CodeLlama-34B](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B/summary). Despite the quantization process, the model still achieves a remarkable 73.8% accuracy (greedy decoding) on the HumanEval pass@1 metric.
|
38 |
|
39 |
+
🔥🔥🔥 2023-09-11 [CodeFuse-CodeLlama34B](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B/summary) has achieved 74.4% of pass@1 (greedy decoding) on HumanEval, which is SOTA results for openspurced LLMs at present.
|
40 |
|
41 |
<br>
|
42 |
|
|
|
134 |
import os
|
135 |
import torch
|
136 |
import time
|
137 |
+
from transformers import AutoTokenizer
|
138 |
from auto_gptq import AutoGPTQForCausalLM
|
139 |
|
140 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
|
143 |
"""
|
144 |
Load model and tokenizer based on the given model name or local path of downloaded model.
|
145 |
"""
|
146 |
+
tokenizer = AutoTokenizer.from_pretrained("codefuse-ai/CodeFuse-DeepSeek-33B-4bits",
|
147 |
trust_remote_code=True,
|
148 |
use_fast=False,
|
149 |
lagecy=False)
|
|
|
151 |
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids("<|end▁of▁sentence|>")
|
152 |
tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids("<|end▁of▁sentence|>")
|
153 |
|
154 |
+
model = AutoGPTQForCausalLM.from_quantized("codefuse-ai/CodeFuse-DeepSeek-33B-4bits",
|
155 |
inject_fused_attention=False,
|
156 |
inject_fused_mlp=False,
|
157 |
use_safetensors=False,
|
|
|
164 |
|
165 |
def inference(model, tokenizer, prompt):
|
166 |
"""
|
167 |
+
Uset the given model and tokenizer to generate an answer for the specified prompt.
|
168 |
"""
|
169 |
st = time.time()
|
170 |
prompt = prompt if prompt.endswith('\n') else f'{prompt}\n'
|
|
|
192 |
|
193 |
|
194 |
if __name__ == "__main__":
|
|
|
|
|
195 |
prompt = 'Please write a QuickSort program in Python'
|
196 |
|
197 |
model, tokenizer = load_model_tokenizer(model_dir)
|
|
|
324 |
import os
|
325 |
import torch
|
326 |
import time
|
327 |
+
from transformers import AutoTokenizer
|
328 |
from auto_gptq import AutoGPTQForCausalLM
|
329 |
|
330 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
331 |
|
332 |
def load_model_tokenizer(model_path):
|
333 |
"""
|
334 |
+
Load model and tokenizer based on the given model name or local path of the downloaded model.
|
335 |
"""
|
336 |
+
tokenizer = AutoTokenizer.from_pretrained("codefuse-ai/CodeFuse-DeepSeek-33B-4bits",
|
337 |
trust_remote_code=True,
|
338 |
use_fast=False,
|
339 |
lagecy=False)
|
|
|
341 |
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids("<|end▁of▁sentence|>")
|
342 |
tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids("<|end▁of▁sentence|>")
|
343 |
|
344 |
+
model = AutoGPTQForCausalLM.from_quantized("codefuse-ai/CodeFuse-DeepSeek-33B-4bits",
|
345 |
inject_fused_attention=False,
|
346 |
inject_fused_mlp=False,
|
347 |
use_safetensors=False,
|
|
|
382 |
|
383 |
|
384 |
if __name__ == "__main__":
|
|
|
385 |
|
386 |
prompt = 'Please write a QuickSort program in Python'
|
387 |
|