mlabonne commited on
Commit
48e4a71
·
verified ·
1 Parent(s): 3f747bf

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +8 -1
  2. tokenizer_config.json +1 -0
tokenizer.json CHANGED
@@ -1,7 +1,14 @@
1
  {
2
  "version": "1.0",
3
  "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
  "truncation": null,
4
+ "padding": {
5
+ "strategy": "BatchLongest",
6
+ "direction": "Left",
7
+ "pad_to_multiple_of": null,
8
+ "pad_id": 2,
9
+ "pad_type_id": 0,
10
+ "pad_token": "<|im_end|>"
11
+ },
12
  "added_tokens": [
13
  {
14
  "id": 0,
tokenizer_config.json CHANGED
@@ -149,6 +149,7 @@
149
  "extra_special_tokens": {},
150
  "model_max_length": 2048,
151
  "pad_token": "<|im_end|>",
 
152
  "tokenizer_class": "GPT2Tokenizer",
153
  "unk_token": "<|endoftext|>",
154
  "vocab_size": 49152
 
149
  "extra_special_tokens": {},
150
  "model_max_length": 2048,
151
  "pad_token": "<|im_end|>",
152
+ "padding_side": "left",
153
  "tokenizer_class": "GPT2Tokenizer",
154
  "unk_token": "<|endoftext|>",
155
  "vocab_size": 49152