chentianqi commited on
Commit
72efa40
·
verified ·
1 Parent(s): b10487e

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|AUDIO|>": 151646,
5
+ "<|IMAGE|>": 151655,
6
+ "<|VIDEO|>": 151656,
7
+ "<|audio_bos|>": 151647,
8
+ "<|audio_eos|>": 151648,
9
+ "<|box_end|>": 151649,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|quad_end|>": 151651,
19
+ "<|quad_start|>": 151650,
20
+ "<|repo_name|>": 151663,
21
+ "<|vision_bos|>": 151652,
22
+ "<|vision_eos|>": 151653,
23
+ "<|vision_pad|>": 151654
24
+ }
config.json ADDED
@@ -0,0 +1,593 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2_5OmniModel"
4
+ ],
5
+ "enable_audio_output": true,
6
+ "enable_talker": true,
7
+ "model_type": "qwen2_5_omni",
8
+ "quantization_config": {
9
+ "bits": 4,
10
+ "checkpoint_format": "gptq",
11
+ "desc_act": true,
12
+ "group_size": 128,
13
+ "lm_head": false,
14
+ "meta": {
15
+ "damp_auto_increment": 0.0025,
16
+ "damp_percent": 0.01,
17
+ "mse": 0.0,
18
+ "quantizer": [
19
+ "gptqmodel:2.1.1-dev"
20
+ ],
21
+ "static_groups": false,
22
+ "true_sequential": true,
23
+ "uri": "https://github.com/modelcloud/gptqmodel"
24
+ },
25
+ "pack_dtype": "int32",
26
+ "quant_method": "gptq",
27
+ "sym": true
28
+ },
29
+ "talker_config": {
30
+ "_attn_implementation_autoset": true,
31
+ "_name_or_path": "Qwen2.5-Omni-7B/talker",
32
+ "architectures": [
33
+ "Qwen2OmniTalkerForConditionalGeneration"
34
+ ],
35
+ "attention_dropout": 0.0,
36
+ "audio_end_token_id": 151648,
37
+ "audio_start_token_id": 151647,
38
+ "audio_token_index": 151646,
39
+ "embedding_size": 3584,
40
+ "head_dim": 128,
41
+ "hidden_act": "silu",
42
+ "hidden_size": 896,
43
+ "image_token_index": 151655,
44
+ "init_std": 0.02,
45
+ "initializer_range": 0.02,
46
+ "intermediate_size": 18944,
47
+ "max_position_embeddings": 32768,
48
+ "max_window_layers": 28,
49
+ "model_type": "qwen2_5_omni_talker",
50
+ "num_attention_heads": 12,
51
+ "num_hidden_layers": 24,
52
+ "num_key_value_heads": 4,
53
+ "position_id_per_seconds": 25,
54
+ "rms_norm_eps": 1e-06,
55
+ "rope_scaling": {
56
+ "mrope_section": [
57
+ 16,
58
+ 24,
59
+ 24
60
+ ],
61
+ "rope_type": "default",
62
+ "type": "default"
63
+ },
64
+ "rope_theta": 1000000.0,
65
+ "seconds_per_chunk": 2,
66
+ "sliding_window": 32768,
67
+ "spatial_merge_size": 2,
68
+ "torch_dtype": "float16",
69
+ "tts_codec_end_token_id": 8294,
70
+ "tts_codec_mask_token_id": 8296,
71
+ "tts_codec_pad_token_id": 8292,
72
+ "tts_codec_start_token_id": 8293,
73
+ "tts_text_end_token_id": 151861,
74
+ "tts_text_pad_token_id": 151859,
75
+ "tts_text_start_token_id": 151860,
76
+ "use_cache": true,
77
+ "use_mrope": false,
78
+ "use_sliding_window": false,
79
+ "video_token_index": 151656,
80
+ "vision_end_token_id": 151653,
81
+ "vision_start_token_id": 151652,
82
+ "vocab_size": 8448
83
+ },
84
+ "thinker_config": {
85
+ "_attn_implementation_autoset": true,
86
+ "_name_or_path": "Qwen2.5-Omni-7B/thinker",
87
+ "architectures": [
88
+ "Qwen2OmniNaViTThinkerForConditionalGeneration"
89
+ ],
90
+ "audio_config": {
91
+ "_attn_implementation_autoset": true,
92
+ "_name_or_path": "",
93
+ "activation_dropout": 0.0,
94
+ "activation_function": "gelu",
95
+ "add_cross_attention": false,
96
+ "architectures": null,
97
+ "attention_dropout": 0.0,
98
+ "bad_words_ids": null,
99
+ "begin_suppress_tokens": null,
100
+ "bos_token_id": null,
101
+ "chunk_size_feed_forward": 0,
102
+ "cross_attention_hidden_size": null,
103
+ "d_model": 1280,
104
+ "decoder_start_token_id": null,
105
+ "diversity_penalty": 0.0,
106
+ "do_sample": false,
107
+ "dropout": 0.0,
108
+ "early_stopping": false,
109
+ "encoder_attention_heads": 20,
110
+ "encoder_ffn_dim": 5120,
111
+ "encoder_layerdrop": 0.0,
112
+ "encoder_layers": 32,
113
+ "encoder_no_repeat_ngram_size": 0,
114
+ "eos_token_id": null,
115
+ "exponential_decay_length_penalty": null,
116
+ "finetuning_task": null,
117
+ "forced_bos_token_id": null,
118
+ "forced_eos_token_id": null,
119
+ "id2label": {
120
+ "0": "LABEL_0",
121
+ "1": "LABEL_1"
122
+ },
123
+ "init_std": 0.02,
124
+ "is_decoder": false,
125
+ "is_encoder_decoder": false,
126
+ "label2id": {
127
+ "LABEL_0": 0,
128
+ "LABEL_1": 1
129
+ },
130
+ "length_penalty": 1.0,
131
+ "max_length": 20,
132
+ "max_source_positions": 1500,
133
+ "min_length": 0,
134
+ "model_type": "qwen2_5_omni_audio_encoder",
135
+ "n_window": 100,
136
+ "no_repeat_ngram_size": 0,
137
+ "num_beam_groups": 1,
138
+ "num_beams": 1,
139
+ "num_hidden_layers": 32,
140
+ "num_mel_bins": 128,
141
+ "num_return_sequences": 1,
142
+ "output_attentions": false,
143
+ "output_dim": 3584,
144
+ "output_hidden_states": false,
145
+ "output_scores": false,
146
+ "pad_token_id": null,
147
+ "prefix": null,
148
+ "problem_type": null,
149
+ "pruned_heads": {},
150
+ "remove_invalid_values": false,
151
+ "repetition_penalty": 1.0,
152
+ "return_dict": true,
153
+ "return_dict_in_generate": false,
154
+ "scale_embedding": false,
155
+ "sep_token_id": null,
156
+ "suppress_tokens": null,
157
+ "task_specific_params": null,
158
+ "temperature": 1.0,
159
+ "tf_legacy_loss": false,
160
+ "tie_encoder_decoder": false,
161
+ "tie_word_embeddings": true,
162
+ "tokenizer_class": null,
163
+ "top_k": 50,
164
+ "top_p": 1.0,
165
+ "torch_dtype": null,
166
+ "torchscript": false,
167
+ "typical_p": 1.0,
168
+ "use_bfloat16": false
169
+ },
170
+ "audio_end_token_id": 151648,
171
+ "audio_start_token_id": 151647,
172
+ "audio_token_index": 151646,
173
+ "bos_token_id": 151644,
174
+ "eos_token_id": 151645,
175
+ "ignore_index": -100,
176
+ "image_token_index": 151655,
177
+ "init_std": 0.02,
178
+ "model_type": "qwen2_5_omni_thinker",
179
+ "pad_token_id": 151643,
180
+ "position_id_per_seconds": 25,
181
+ "seconds_per_chunk": 2,
182
+ "text_config": {
183
+ "_attn_implementation_autoset": false,
184
+ "_name_or_path": "",
185
+ "add_cross_attention": false,
186
+ "architectures": null,
187
+ "attention_dropout": 0.0,
188
+ "bad_words_ids": null,
189
+ "begin_suppress_tokens": null,
190
+ "bos_token_id": null,
191
+ "chunk_size_feed_forward": 0,
192
+ "cross_attention_hidden_size": null,
193
+ "decoder_start_token_id": null,
194
+ "diversity_penalty": 0.0,
195
+ "do_sample": false,
196
+ "early_stopping": false,
197
+ "encoder_no_repeat_ngram_size": 0,
198
+ "eos_token_id": null,
199
+ "exponential_decay_length_penalty": null,
200
+ "finetuning_task": null,
201
+ "forced_bos_token_id": null,
202
+ "forced_eos_token_id": null,
203
+ "hidden_act": "silu",
204
+ "hidden_size": 3584,
205
+ "id2label": {
206
+ "0": "LABEL_0",
207
+ "1": "LABEL_1"
208
+ },
209
+ "init_std": 0.02,
210
+ "intermediate_size": 18944,
211
+ "is_decoder": false,
212
+ "is_encoder_decoder": false,
213
+ "label2id": {
214
+ "LABEL_0": 0,
215
+ "LABEL_1": 1
216
+ },
217
+ "length_penalty": 1.0,
218
+ "max_length": 20,
219
+ "max_position_embeddings": 32768,
220
+ "max_window_layers": 28,
221
+ "min_length": 0,
222
+ "model_type": "qwen2_5_omni_text",
223
+ "no_repeat_ngram_size": 0,
224
+ "num_attention_heads": 28,
225
+ "num_beam_groups": 1,
226
+ "num_beams": 1,
227
+ "num_hidden_layers": 28,
228
+ "num_key_value_heads": 4,
229
+ "num_return_sequences": 1,
230
+ "output_attentions": false,
231
+ "output_hidden_states": false,
232
+ "output_scores": false,
233
+ "pad_token_id": null,
234
+ "prefix": null,
235
+ "problem_type": null,
236
+ "pruned_heads": {},
237
+ "remove_invalid_values": false,
238
+ "repetition_penalty": 1.0,
239
+ "return_dict": true,
240
+ "return_dict_in_generate": false,
241
+ "rms_norm_eps": 1e-06,
242
+ "rope_scaling": {
243
+ "mrope_section": [
244
+ 16,
245
+ 24,
246
+ 24
247
+ ],
248
+ "rope_type": "default",
249
+ "type": "default"
250
+ },
251
+ "rope_theta": 1000000.0,
252
+ "sep_token_id": null,
253
+ "sliding_window": 32768,
254
+ "suppress_tokens": null,
255
+ "task_specific_params": null,
256
+ "temperature": 1.0,
257
+ "tf_legacy_loss": false,
258
+ "tie_encoder_decoder": false,
259
+ "tie_word_embeddings": true,
260
+ "tokenizer_class": null,
261
+ "top_k": 50,
262
+ "top_p": 1.0,
263
+ "torch_dtype": null,
264
+ "torchscript": false,
265
+ "typical_p": 1.0,
266
+ "use_bfloat16": false,
267
+ "use_cache": true,
268
+ "use_sliding_window": false,
269
+ "vocab_size": 152064
270
+ },
271
+ "torch_dtype": "float16",
272
+ "use_mrope": false,
273
+ "user_token_id": 872,
274
+ "video_token_index": 151656,
275
+ "vision_config": {
276
+ "_attn_implementation_autoset": true,
277
+ "_name_or_path": "",
278
+ "add_cross_attention": false,
279
+ "architectures": null,
280
+ "bad_words_ids": null,
281
+ "begin_suppress_tokens": null,
282
+ "bos_token_id": null,
283
+ "chunk_size_feed_forward": 0,
284
+ "cross_attention_hidden_size": null,
285
+ "decoder_start_token_id": null,
286
+ "depth": 32,
287
+ "diversity_penalty": 0.0,
288
+ "do_sample": false,
289
+ "early_stopping": false,
290
+ "embed_dim": 1280,
291
+ "encoder_no_repeat_ngram_size": 0,
292
+ "eos_token_id": null,
293
+ "exponential_decay_length_penalty": null,
294
+ "finetuning_task": null,
295
+ "forced_bos_token_id": null,
296
+ "forced_eos_token_id": null,
297
+ "fullatt_block_indexes": [
298
+ 7,
299
+ 15,
300
+ 23,
301
+ 31
302
+ ],
303
+ "hidden_act": "silu",
304
+ "hidden_size": 1280,
305
+ "id2label": {
306
+ "0": "LABEL_0",
307
+ "1": "LABEL_1"
308
+ },
309
+ "in_channels": 3,
310
+ "in_chans": 3,
311
+ "init_std": 0.02,
312
+ "intermediate_size": 3420,
313
+ "is_decoder": false,
314
+ "is_encoder_decoder": false,
315
+ "label2id": {
316
+ "LABEL_0": 0,
317
+ "LABEL_1": 1
318
+ },
319
+ "length_penalty": 1.0,
320
+ "max_length": 20,
321
+ "min_length": 0,
322
+ "model_type": "qwen2_5_omni_vision_encoder",
323
+ "no_repeat_ngram_size": 0,
324
+ "num_beam_groups": 1,
325
+ "num_beams": 1,
326
+ "num_heads": 16,
327
+ "num_return_sequences": 1,
328
+ "out_hidden_size": 3584,
329
+ "output_attentions": false,
330
+ "output_hidden_states": false,
331
+ "output_scores": false,
332
+ "pad_token_id": null,
333
+ "patch_size": 14,
334
+ "prefix": null,
335
+ "problem_type": null,
336
+ "pruned_heads": {},
337
+ "remove_invalid_values": false,
338
+ "repetition_penalty": 1.0,
339
+ "return_dict": true,
340
+ "return_dict_in_generate": false,
341
+ "sep_token_id": null,
342
+ "spatial_merge_size": 2,
343
+ "spatial_patch_size": 14,
344
+ "suppress_tokens": null,
345
+ "task_specific_params": null,
346
+ "temperature": 1.0,
347
+ "temporal_patch_size": 2,
348
+ "tf_legacy_loss": false,
349
+ "tie_encoder_decoder": false,
350
+ "tie_word_embeddings": true,
351
+ "tokenizer_class": null,
352
+ "tokens_per_second": 25,
353
+ "top_k": 50,
354
+ "top_p": 1.0,
355
+ "torch_dtype": null,
356
+ "torchscript": false,
357
+ "typical_p": 1.0,
358
+ "use_bfloat16": false,
359
+ "window_size": 112
360
+ },
361
+ "vision_end_token_id": 151653,
362
+ "vision_start_token_id": 151652,
363
+ "vision_token_id": 151654
364
+ },
365
+ "token2wav_config": {
366
+ "_attn_implementation_autoset": true,
367
+ "bigvgan_config": {
368
+ "_attn_implementation_autoset": true,
369
+ "_name_or_path": "",
370
+ "add_cross_attention": false,
371
+ "architectures": null,
372
+ "bad_words_ids": null,
373
+ "begin_suppress_tokens": null,
374
+ "bos_token_id": null,
375
+ "chunk_size_feed_forward": 0,
376
+ "cross_attention_hidden_size": null,
377
+ "decoder_start_token_id": null,
378
+ "diversity_penalty": 0.0,
379
+ "do_sample": false,
380
+ "early_stopping": false,
381
+ "encoder_no_repeat_ngram_size": 0,
382
+ "eos_token_id": null,
383
+ "exponential_decay_length_penalty": null,
384
+ "finetuning_task": null,
385
+ "forced_bos_token_id": null,
386
+ "forced_eos_token_id": null,
387
+ "id2label": {
388
+ "0": "LABEL_0",
389
+ "1": "LABEL_1"
390
+ },
391
+ "is_decoder": false,
392
+ "is_encoder_decoder": false,
393
+ "label2id": {
394
+ "LABEL_0": 0,
395
+ "LABEL_1": 1
396
+ },
397
+ "length_penalty": 1.0,
398
+ "max_length": 20,
399
+ "mel_dim": 80,
400
+ "min_length": 0,
401
+ "model_type": "qwen2_5_omni_bigvgan",
402
+ "no_repeat_ngram_size": 0,
403
+ "num_beam_groups": 1,
404
+ "num_beams": 1,
405
+ "num_return_sequences": 1,
406
+ "output_attentions": false,
407
+ "output_hidden_states": false,
408
+ "output_scores": false,
409
+ "pad_token_id": null,
410
+ "prefix": null,
411
+ "problem_type": null,
412
+ "pruned_heads": {},
413
+ "remove_invalid_values": false,
414
+ "repetition_penalty": 1.0,
415
+ "resblock_dilation_sizes": [
416
+ [
417
+ 1,
418
+ 3,
419
+ 5
420
+ ],
421
+ [
422
+ 1,
423
+ 3,
424
+ 5
425
+ ],
426
+ [
427
+ 1,
428
+ 3,
429
+ 5
430
+ ]
431
+ ],
432
+ "resblock_kernel_sizes": [
433
+ 3,
434
+ 7,
435
+ 11
436
+ ],
437
+ "return_dict": true,
438
+ "return_dict_in_generate": false,
439
+ "sep_token_id": null,
440
+ "suppress_tokens": null,
441
+ "task_specific_params": null,
442
+ "temperature": 1.0,
443
+ "tf_legacy_loss": false,
444
+ "tie_encoder_decoder": false,
445
+ "tie_word_embeddings": true,
446
+ "tokenizer_class": null,
447
+ "top_k": 50,
448
+ "top_p": 1.0,
449
+ "torch_dtype": null,
450
+ "torchscript": false,
451
+ "typical_p": 1.0,
452
+ "upsample_initial_channel": 1536,
453
+ "upsample_kernel_sizes": [
454
+ 11,
455
+ 7,
456
+ 4,
457
+ 4,
458
+ 4,
459
+ 4
460
+ ],
461
+ "upsample_rates": [
462
+ 5,
463
+ 3,
464
+ 2,
465
+ 2,
466
+ 2,
467
+ 2
468
+ ],
469
+ "use_bfloat16": false,
470
+ "use_bias_at_final": false
471
+ },
472
+ "dit_config": {
473
+ "_attn_implementation_autoset": true,
474
+ "_name_or_path": "",
475
+ "add_cross_attention": false,
476
+ "architectures": null,
477
+ "bad_words_ids": null,
478
+ "begin_suppress_tokens": null,
479
+ "block_size": 24,
480
+ "bos_token_id": null,
481
+ "chunk_size_feed_forward": 0,
482
+ "cross_attention_hidden_size": null,
483
+ "decoder_start_token_id": null,
484
+ "depth": 22,
485
+ "dim": 1024,
486
+ "diversity_penalty": 0.0,
487
+ "do_sample": false,
488
+ "dropout": 0.1,
489
+ "early_stopping": false,
490
+ "emb_dim": 512,
491
+ "enc_attention_channels": 64,
492
+ "enc_channels": [
493
+ 256,
494
+ 256,
495
+ 256,
496
+ 256,
497
+ 768
498
+ ],
499
+ "enc_dilations": [
500
+ 1,
501
+ 2,
502
+ 3,
503
+ 4,
504
+ 1
505
+ ],
506
+ "enc_dim": 128,
507
+ "enc_emb_dim": 192,
508
+ "enc_global_context": true,
509
+ "enc_kernel_sizes": [
510
+ 5,
511
+ 3,
512
+ 3,
513
+ 3,
514
+ 1
515
+ ],
516
+ "enc_lin_neurons": 192,
517
+ "enc_res2net_scale": 2,
518
+ "enc_se_channels": 64,
519
+ "encoder_no_repeat_ngram_size": 0,
520
+ "eos_token_id": null,
521
+ "exponential_decay_length_penalty": null,
522
+ "ff_mult": 2,
523
+ "finetuning_task": null,
524
+ "forced_bos_token_id": null,
525
+ "forced_eos_token_id": null,
526
+ "head_dim": 64,
527
+ "heads": 16,
528
+ "hidden_size": 1024,
529
+ "id2label": {
530
+ "0": "LABEL_0",
531
+ "1": "LABEL_1"
532
+ },
533
+ "is_decoder": false,
534
+ "is_encoder_decoder": false,
535
+ "label2id": {
536
+ "LABEL_0": 0,
537
+ "LABEL_1": 1
538
+ },
539
+ "length_penalty": 1.0,
540
+ "look_ahead_layers": [
541
+ 10
542
+ ],
543
+ "look_backward_layers": [
544
+ 0,
545
+ 20
546
+ ],
547
+ "max_length": 20,
548
+ "max_position_embeddings": 32768,
549
+ "mel_dim": 80,
550
+ "min_length": 0,
551
+ "model_type": "qwen2_5_omni_dit",
552
+ "no_repeat_ngram_size": 0,
553
+ "num_attention_heads": 16,
554
+ "num_beam_groups": 1,
555
+ "num_beams": 1,
556
+ "num_embeds": 8193,
557
+ "num_hidden_layers": 22,
558
+ "num_return_sequences": 1,
559
+ "output_attentions": false,
560
+ "output_hidden_states": false,
561
+ "output_scores": false,
562
+ "pad_token_id": null,
563
+ "prefix": null,
564
+ "problem_type": null,
565
+ "pruned_heads": {},
566
+ "remove_invalid_values": false,
567
+ "repeats": 2,
568
+ "repetition_penalty": 1.0,
569
+ "return_dict": true,
570
+ "return_dict_in_generate": false,
571
+ "rope_theta": 10000.0,
572
+ "sep_token_id": null,
573
+ "suppress_tokens": null,
574
+ "task_specific_params": null,
575
+ "temperature": 1.0,
576
+ "tf_legacy_loss": false,
577
+ "tie_encoder_decoder": false,
578
+ "tie_word_embeddings": true,
579
+ "tokenizer_class": null,
580
+ "top_k": 50,
581
+ "top_p": 1.0,
582
+ "torch_dtype": "float32",
583
+ "torchscript": false,
584
+ "typical_p": 1.0,
585
+ "use_bfloat16": false
586
+ },
587
+ "model_type": "qwen2_5_omni_token2wav",
588
+ "torch_dtype": "float16"
589
+ },
590
+ "torch_dtype": "float16",
591
+ "transformers_version": "4.50.0.dev0",
592
+ "use_cache": false
593
+ }
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.50.0.dev0"
4
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f04a51ab40fc364c02855ec5a76bb680be239d04a6314b8f625ae1d0f8306f46
3
+ size 3980359152
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4690ab05b3edd91cc497017a226bf89383862b8042a10d520a647d0c325456eb
3
+ size 3137548688
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddca06aa8c99fccc2e54df60360e9328617d448ccf1901bbeb065eac78906d37
3
+ size 3993379832
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33733bedd858bb3157d479eaba1388693f29b83a26e650810581f33eb1d1c79
3
+ size 1595686480
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
quant_log.csv ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ layer,module,loss,samples,damp,time
2
+ 0,self_attn.k_proj,0.84503669,0.01000,0.847
3
+ 0,self_attn.v_proj,0.09790013,0.01000,0.621
4
+ 0,self_attn.q_proj,3.45275998,0.01000,0.623
5
+ 0,self_attn.o_proj,0.02797472,0.01000,0.716
6
+ 0,mlp.up_proj,6.59510899,0.01000,0.753
7
+ 0,mlp.gate_proj,14.64342403,0.01000,0.634
8
+ 0,mlp.down_proj,0.54082263,0.01000,4.907
9
+ 1,self_attn.k_proj,0.66450667,0.01000,0.755
10
+ 1,self_attn.v_proj,0.13586605,0.01000,0.624
11
+ 1,self_attn.q_proj,2.53379679,0.01000,0.622
12
+ 1,self_attn.o_proj,0.01710801,0.01000,0.720
13
+ 1,mlp.up_proj,56.40869904,0.01000,0.752
14
+ 1,mlp.gate_proj,94.36625671,0.01000,0.638
15
+ 1,mlp.down_proj,0.48488298,0.01000,4.885
16
+ 2,self_attn.k_proj,1.85023427,0.01000,0.758
17
+ 2,self_attn.v_proj,0.29796562,0.01000,0.620
18
+ 2,self_attn.q_proj,6.63527250,0.01000,0.624
19
+ 2,self_attn.o_proj,0.05357596,0.01000,0.721
20
+ 2,mlp.up_proj,72.35107422,0.01000,0.754
21
+ 2,mlp.gate_proj,120.69715118,0.01000,0.637
22
+ 2,mlp.down_proj,0.71529531,0.01000,4.897
23
+ 3,self_attn.k_proj,1.83869004,0.01000,0.816
24
+ 3,self_attn.v_proj,0.42401373,0.01000,0.667
25
+ 3,self_attn.q_proj,6.90928888,0.01000,0.638
26
+ 3,self_attn.o_proj,0.22543210,0.01000,0.795
27
+ 3,mlp.up_proj,199.29341125,0.01000,0.756
28
+ 3,mlp.gate_proj,270.88391113,0.01000,0.641
29
+ 3,mlp.down_proj,1.02437985,0.01000,4.892
30
+ 4,self_attn.k_proj,3.05090141,0.01000,0.757
31
+ 4,self_attn.v_proj,0.90732384,0.01000,0.623
32
+ 4,self_attn.q_proj,12.91889858,0.01000,0.622
33
+ 4,self_attn.o_proj,0.16272199,0.01000,0.724
34
+ 4,mlp.up_proj,177.98535156,0.01000,0.751
35
+ 4,mlp.gate_proj,268.20730591,0.01000,0.635
36
+ 4,mlp.down_proj,1.51549566,0.01000,4.892
37
+ 5,self_attn.k_proj,3.06540680,0.01000,0.769
38
+ 5,self_attn.v_proj,1.12325358,0.01000,0.625
39
+ 5,self_attn.q_proj,14.62543392,0.01000,0.622
40
+ 5,self_attn.o_proj,0.21586980,0.01000,0.719
41
+ 5,mlp.up_proj,267.72155762,0.01000,0.769
42
+ 5,mlp.gate_proj,343.41455078,0.01000,0.638
43
+ 5,mlp.down_proj,1.18007314,0.01000,4.889
44
+ 6,self_attn.k_proj,2.07387280,0.01000,0.755
45
+ 6,self_attn.v_proj,0.94506907,0.01000,0.621
46
+ 6,self_attn.q_proj,9.86525154,0.01000,0.625
47
+ 6,self_attn.o_proj,0.26159465,0.01000,0.720
48
+ 6,mlp.up_proj,66.96002197,0.01000,0.760
49
+ 6,mlp.gate_proj,95.20739746,0.01000,0.634
50
+ 6,mlp.down_proj,3.78148937,0.01000,4.891
51
+ 7,self_attn.k_proj,2.32142448,0.01000,0.754
52
+ 7,self_attn.v_proj,1.96631455,0.01000,0.621
53
+ 7,self_attn.q_proj,12.40788651,0.01000,0.621
54
+ 7,self_attn.o_proj,0.92824316,0.01000,0.718
55
+ 7,mlp.up_proj,66.62113190,0.01000,0.756
56
+ 7,mlp.gate_proj,74.05113220,0.01000,0.640
57
+ 7,mlp.down_proj,6.63149881,0.01000,4.882
58
+ 8,self_attn.k_proj,4.55058146,0.01000,0.755
59
+ 8,self_attn.v_proj,1.65348577,0.01000,0.632
60
+ 8,self_attn.q_proj,18.91058159,0.01000,0.649
61
+ 8,self_attn.o_proj,1.66989470,0.01000,0.720
62
+ 8,mlp.up_proj,79.65923309,0.01000,0.748
63
+ 8,mlp.gate_proj,83.44503021,0.01000,0.634
64
+ 8,mlp.down_proj,7.66813326,0.01000,4.916
65
+ 9,self_attn.k_proj,3.84148502,0.01000,0.754
66
+ 9,self_attn.v_proj,2.96535730,0.01000,0.622
67
+ 9,self_attn.q_proj,20.20914268,0.01000,0.626
68
+ 9,self_attn.o_proj,2.59902573,0.01000,0.719
69
+ 9,mlp.up_proj,156.97390747,0.01000,0.753
70
+ 9,mlp.gate_proj,251.82458496,0.01000,0.638
71
+ 9,mlp.down_proj,8.60938835,0.01000,4.885
72
+ 10,self_attn.k_proj,4.07611465,0.01000,0.755
73
+ 10,self_attn.v_proj,2.05425596,0.01000,0.622
74
+ 10,self_attn.q_proj,19.85713577,0.01000,0.623
75
+ 10,self_attn.o_proj,1.43030739,0.01000,0.720
76
+ 10,mlp.up_proj,100.09529114,0.01000,0.749
77
+ 10,mlp.gate_proj,113.80771637,0.01000,0.640
78
+ 10,mlp.down_proj,8.42348957,0.01000,4.895
79
+ 11,self_attn.k_proj,5.21213436,0.01000,0.755
80
+ 11,self_attn.v_proj,1.78169560,0.01000,0.624
81
+ 11,self_attn.q_proj,21.62147522,0.01000,0.625
82
+ 11,self_attn.o_proj,2.29331183,0.01000,0.719
83
+ 11,mlp.up_proj,95.10997009,0.01000,0.754
84
+ 11,mlp.gate_proj,100.02523804,0.01000,0.638
85
+ 11,mlp.down_proj,8.73736763,0.01000,4.897
86
+ 12,self_attn.k_proj,5.94782352,0.01000,0.752
87
+ 12,self_attn.v_proj,2.57430744,0.01000,0.623
88
+ 12,self_attn.q_proj,25.37301826,0.01000,0.624
89
+ 12,self_attn.o_proj,2.74405336,0.01000,0.720
90
+ 12,mlp.up_proj,99.41949463,0.01000,0.760
91
+ 12,mlp.gate_proj,98.57714081,0.01000,0.635
92
+ 12,mlp.down_proj,10.31996155,0.01000,4.876
93
+ 13,self_attn.k_proj,5.70451880,0.01000,0.753
94
+ 13,self_attn.v_proj,3.61313081,0.01000,0.620
95
+ 13,self_attn.q_proj,28.92498398,0.01000,0.623
96
+ 13,self_attn.o_proj,5.05193090,0.01000,0.717
97
+ 13,mlp.up_proj,102.82405853,0.01000,0.753
98
+ 13,mlp.gate_proj,109.35607910,0.01000,0.674
99
+ 13,mlp.down_proj,10.62277031,0.01000,4.872
100
+ 14,self_attn.k_proj,8.16368103,0.01000,0.750
101
+ 14,self_attn.v_proj,3.29415512,0.01000,0.616
102
+ 14,self_attn.q_proj,40.57175064,0.01000,0.618
103
+ 14,self_attn.o_proj,3.70151019,0.01000,0.719
104
+ 14,mlp.up_proj,116.39168549,0.01000,0.752
105
+ 14,mlp.gate_proj,117.40150452,0.01000,0.637
106
+ 14,mlp.down_proj,12.35162067,0.01000,4.897
107
+ 15,self_attn.k_proj,7.40435266,0.01000,0.754
108
+ 15,self_attn.v_proj,2.82182503,0.01000,0.629
109
+ 15,self_attn.q_proj,32.03612518,0.01000,0.622
110
+ 15,self_attn.o_proj,2.86537886,0.01000,0.720
111
+ 15,mlp.up_proj,109.34799194,0.01000,0.816
112
+ 15,mlp.gate_proj,106.16775513,0.01000,0.652
113
+ 15,mlp.down_proj,11.65571213,0.01000,4.899
114
+ 16,self_attn.k_proj,7.20972252,0.01000,0.750
115
+ 16,self_attn.v_proj,3.81571937,0.01000,0.615
116
+ 16,self_attn.q_proj,35.60685730,0.01000,0.617
117
+ 16,self_attn.o_proj,4.76396894,0.01000,0.718
118
+ 16,mlp.up_proj,119.07849121,0.01000,0.753
119
+ 16,mlp.gate_proj,115.41696167,0.01000,0.654
120
+ 16,mlp.down_proj,13.42798615,0.01000,4.888
121
+ 17,self_attn.k_proj,7.86328506,0.01000,0.756
122
+ 17,self_attn.v_proj,4.99391937,0.01000,0.622
123
+ 17,self_attn.q_proj,41.75648499,0.01000,0.624
124
+ 17,self_attn.o_proj,3.45894480,0.01000,0.719
125
+ 17,mlp.up_proj,143.43394470,0.01000,0.754
126
+ 17,mlp.gate_proj,136.14013672,0.01000,0.643
127
+ 17,mlp.down_proj,17.81203842,0.01000,4.882
128
+ 18,self_attn.k_proj,6.22359562,0.01000,0.753
129
+ 18,self_attn.v_proj,6.02423716,0.01000,0.624
130
+ 18,self_attn.q_proj,35.44544601,0.01000,0.624
131
+ 18,self_attn.o_proj,4.30885458,0.01000,0.722
132
+ 18,mlp.up_proj,153.58898926,0.01000,0.760
133
+ 18,mlp.gate_proj,142.47561646,0.01000,0.637
134
+ 18,mlp.down_proj,18.25153732,0.01000,4.894
135
+ 19,self_attn.k_proj,5.90289783,0.01000,0.757
136
+ 19,self_attn.v_proj,6.39343739,0.01000,0.623
137
+ 19,self_attn.q_proj,38.50756073,0.01000,0.624
138
+ 19,self_attn.o_proj,5.47891045,0.01000,0.719
139
+ 19,mlp.up_proj,163.33058167,0.01000,0.758
140
+ 19,mlp.gate_proj,157.55880737,0.01000,0.635
141
+ 19,mlp.down_proj,19.15095139,0.01000,4.872
142
+ 20,self_attn.k_proj,6.11784792,0.01000,0.756
143
+ 20,self_attn.v_proj,7.04739285,0.01000,0.625
144
+ 20,self_attn.q_proj,36.47023392,0.01000,0.623
145
+ 20,self_attn.o_proj,2.35553312,0.01000,0.717
146
+ 20,mlp.up_proj,198.99542236,0.01000,0.750
147
+ 20,mlp.gate_proj,190.01835632,0.01000,0.637
148
+ 20,mlp.down_proj,32.01580811,0.01000,4.894
149
+ 21,self_attn.k_proj,6.29537106,0.01000,0.758
150
+ 21,self_attn.v_proj,10.25707436,0.01000,0.621
151
+ 21,self_attn.q_proj,41.46713257,0.01000,0.621
152
+ 21,self_attn.o_proj,8.36542606,0.01000,0.720
153
+ 21,mlp.up_proj,250.49508667,0.01000,0.751
154
+ 21,mlp.gate_proj,251.44859314,0.01000,0.636
155
+ 21,mlp.down_proj,43.56330109,0.01000,4.881
156
+ 22,self_attn.k_proj,8.49018574,0.01000,0.758
157
+ 22,self_attn.v_proj,17.15532875,0.01000,0.622
158
+ 22,self_attn.q_proj,56.59531021,0.01000,0.622
159
+ 22,self_attn.o_proj,3.68090487,0.01000,0.717
160
+ 22,mlp.up_proj,337.93640137,0.01000,0.756
161
+ 22,mlp.gate_proj,333.52142334,0.01000,0.632
162
+ 22,mlp.down_proj,67.46343994,0.01000,4.917
163
+ 23,self_attn.k_proj,12.21148396,0.01000,0.757
164
+ 23,self_attn.v_proj,25.81269073,0.01000,0.621
165
+ 23,self_attn.q_proj,71.46172333,0.01000,0.623
166
+ 23,self_attn.o_proj,11.98000240,0.01000,0.720
167
+ 23,mlp.up_proj,483.65325928,0.01000,0.752
168
+ 23,mlp.gate_proj,493.06561279,0.01000,0.635
169
+ 23,mlp.down_proj,85.32797241,0.01000,4.916
170
+ 24,self_attn.k_proj,9.50092220,0.01000,0.755
171
+ 24,self_attn.v_proj,26.38395882,0.01000,0.623
172
+ 24,self_attn.q_proj,66.23488617,0.01000,0.624
173
+ 24,self_attn.o_proj,8.46130848,0.01000,0.730
174
+ 24,mlp.up_proj,575.74047852,0.01000,0.748
175
+ 24,mlp.gate_proj,538.54052734,0.01000,0.636
176
+ 24,mlp.down_proj,111.23144531,0.01000,4.899
177
+ 25,self_attn.k_proj,11.69873905,0.01000,0.754
178
+ 25,self_attn.v_proj,50.58491135,0.01000,0.623
179
+ 25,self_attn.q_proj,81.08997345,0.01000,0.622
180
+ 25,self_attn.o_proj,11.35678291,0.01000,0.721
181
+ 25,mlp.up_proj,770.88513184,0.01000,0.761
182
+ 25,mlp.gate_proj,682.92773438,0.01000,0.637
183
+ 25,mlp.down_proj,195.86593628,0.01000,4.877
184
+ 26,self_attn.k_proj,15.98041534,0.01000,0.748
185
+ 26,self_attn.v_proj,117.35370636,0.01000,0.618
186
+ 26,self_attn.q_proj,119.34364319,0.01000,0.628
187
+ 26,self_attn.o_proj,26.08083153,0.01000,0.716
188
+ 26,mlp.up_proj,769.50878906,0.01000,0.747
189
+ 26,mlp.gate_proj,671.80041504,0.01000,0.633
190
+ 26,mlp.down_proj,279.47283936,0.01000,4.886
191
+ 27,self_attn.k_proj,19.93762970,0.01000,0.750
192
+ 27,self_attn.v_proj,191.60113525,0.01000,0.616
193
+ 27,self_attn.q_proj,190.72473145,0.01000,0.619
194
+ 27,self_attn.o_proj,32.69224930,0.01000,0.715
195
+ 27,mlp.up_proj,870.30517578,0.01000,0.751
196
+ 27,mlp.gate_proj,839.04919434,0.01000,0.634
197
+ 27,mlp.down_proj,378.88079834,0.01000,4.875
quantize_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "desc_act": true,
5
+ "sym": true,
6
+ "lm_head": false,
7
+ "quant_method": "gptq",
8
+ "checkpoint_format": "gptq",
9
+ "pack_dtype": "int32",
10
+ "meta": {
11
+ "quantizer": [
12
+ "gptqmodel:2.1.1-dev"
13
+ ],
14
+ "uri": "https://github.com/modelcloud/gptqmodel",
15
+ "damp_percent": 0.01,
16
+ "damp_auto_increment": 0.0025,
17
+ "static_groups": false,
18
+ "true_sequential": true,
19
+ "mse": 0.0
20
+ }
21
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|AUDIO|>",
6
+ "<|audio_bos|>",
7
+ "<|audio_eos|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_bos|>",
12
+ "<|vision_eos|>",
13
+ "<|vision_pad|>",
14
+ "<|IMAGE|>",
15
+ "<|VIDEO|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": "<unk>"
25
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8441917e39ae0244e06d704b95b3124795cec478e297f9afac39ba670d7e9d99
3
+ size 11421870
tokenizer_config.json ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "151646": {
29
+ "content": "<|AUDIO|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "151647": {
37
+ "content": "<|audio_bos|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "151648": {
45
+ "content": "<|audio_eos|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "151649": {
53
+ "content": "<|box_end|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "151650": {
61
+ "content": "<|quad_start|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "151651": {
69
+ "content": "<|quad_end|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "151652": {
77
+ "content": "<|vision_bos|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "151653": {
85
+ "content": "<|vision_eos|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "151654": {
93
+ "content": "<|vision_pad|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "151655": {
101
+ "content": "<|IMAGE|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "151656": {
109
+ "content": "<|VIDEO|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "151657": {
117
+ "content": "<tool_call>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": false
123
+ },
124
+ "151658": {
125
+ "content": "</tool_call>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": false
131
+ },
132
+ "151659": {
133
+ "content": "<|fim_prefix|>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": false
139
+ },
140
+ "151660": {
141
+ "content": "<|fim_middle|>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "151661": {
149
+ "content": "<|fim_suffix|>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "151662": {
157
+ "content": "<|fim_pad|>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "151663": {
165
+ "content": "<|repo_name|>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": false
171
+ },
172
+ "151664": {
173
+ "content": "<|file_sep|>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": false
179
+ }
180
+ },
181
+ "additional_special_tokens": [
182
+ "<|im_start|>",
183
+ "<|im_end|>",
184
+ "<|AUDIO|>",
185
+ "<|audio_bos|>",
186
+ "<|audio_eos|>",
187
+ "<|box_end|>",
188
+ "<|quad_start|>",
189
+ "<|quad_end|>",
190
+ "<|vision_bos|>",
191
+ "<|vision_eos|>",
192
+ "<|vision_pad|>",
193
+ "<|IMAGE|>",
194
+ "<|VIDEO|>"
195
+ ],
196
+ "bos_token": null,
197
+ "chat_template": "{% set audio_count = namespace(value=0) %}{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_bos|><|IMAGE|><|vision_eos|>{% elif content['type'] == 'audio' or 'audio' in content or 'audio_url' in content %}{% set audio_count.value = audio_count.value + 1 %}{% if add_audio_id %}Audio {{ audio_count.value }}: {% endif %}<|audio_bos|><|AUDIO|><|audio_eos|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_bos|><|VIDEO|><|vision_eos|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 32768,
203
+ "pad_token": "<unk>",
204
+ "processor_class": "Qwen2_5OmniProcessor",
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "Qwen2TokenizerFast",
207
+ "unk_token": null,
208
+ "_commit_hash": null
209
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff