zerchen commited on
Commit
e01ca83
·
1 Parent(s): 717b269
pretrained_models/dataset_config.yaml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARCTIC-TRAIN:
2
+ TYPE: ImageDataset
3
+ URLS: wilor_training_data/dataset_tars/arctic-train/{000000..000176}.tar
4
+ epoch_size: 177000
5
+ BEDLAM-TRAIN:
6
+ TYPE: ImageDataset
7
+ URLS: wilor_training_data/dataset_tars/bedlam-train/{000000..000300}.tar
8
+ epoch_size: 301000
9
+ COCOW-TRAIN:
10
+ TYPE: ImageDataset
11
+ URLS: wilor_training_data/dataset_tars/cocow-train/{000000..000036}.tar
12
+ epoch_size: 78666
13
+ DEX-TRAIN:
14
+ TYPE: ImageDataset
15
+ URLS: wilor_training_data/dataset_tars/dex-train/{000000..000406}.tar
16
+ epoch_size: 406888
17
+ FREIHAND-MOCAP:
18
+ DATASET_FILE: wilor_training_data/freihand_mocap.npz
19
+ FREIHAND-TRAIN:
20
+ TYPE: ImageDataset
21
+ URLS: wilor_training_data/dataset_tars/freihand-train/{000000..000130}.tar
22
+ epoch_size: 130240
23
+ H2O3D-TRAIN:
24
+ TYPE: ImageDataset
25
+ URLS: wilor_training_data/dataset_tars/h2o3d-train/{000000..000060}.tar
26
+ epoch_size: 121996
27
+ HALPE-TRAIN:
28
+ TYPE: ImageDataset
29
+ URLS: wilor_training_data/dataset_tars/halpe-train/{000000..000022}.tar
30
+ epoch_size: 34289
31
+ HO3D-TRAIN:
32
+ TYPE: ImageDataset
33
+ URLS: wilor_training_data/dataset_tars/ho3d-train/{000000..000083}.tar
34
+ epoch_size: 83325
35
+ HOT3D-TRAIN:
36
+ TYPE: ImageDataset
37
+ URLS: wilor_training_data/dataset_tars/hot3d-train/{000000..000571}.tar
38
+ epoch_size: 572000
39
+ INTERHAND26M-TRAIN:
40
+ TYPE: ImageDataset
41
+ URLS: wilor_training_data/dataset_tars/interhand26m-train/{000000..001056}.tar
42
+ epoch_size: 1424632
43
+ MPIINZSL-TRAIN:
44
+ TYPE: ImageDataset
45
+ URLS: wilor_training_data/dataset_tars/mpiinzsl-train/{000000..000015}.tar
46
+ epoch_size: 15184
47
+ MTC-TRAIN:
48
+ TYPE: ImageDataset
49
+ URLS: wilor_training_data/dataset_tars/mtc-train/{000000..000306}.tar
50
+ epoch_size: 363947
51
+ REINTER-TRAIN:
52
+ TYPE: ImageDataset
53
+ URLS: wilor_training_data/dataset_tars/reinter-train/{000000..000418}.tar
54
+ epoch_size: 419000
55
+ RHD-TRAIN:
56
+ TYPE: ImageDataset
57
+ URLS: wilor_training_data/dataset_tars/rhd-train/{000000..000041}.tar
58
+ epoch_size: 61705
pretrained_models/model_config.yaml ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task_name: train
2
+ tags:
3
+ - dev
4
+ train: true
5
+ test: false
6
+ ckpt_path: null
7
+ seed: null
8
+ DATASETS:
9
+ TRAIN:
10
+ FREIHAND-TRAIN:
11
+ WEIGHT: 0.2
12
+ INTERHAND26M-TRAIN:
13
+ WEIGHT: 0.1
14
+ MTC-TRAIN:
15
+ WEIGHT: 0.05
16
+ RHD-TRAIN:
17
+ WEIGHT: 0.05
18
+ COCOW-TRAIN:
19
+ WEIGHT: 0.05
20
+ HALPE-TRAIN:
21
+ WEIGHT: 0.05
22
+ MPIINZSL-TRAIN:
23
+ WEIGHT: 0.05
24
+ HO3D-TRAIN:
25
+ WEIGHT: 0.05
26
+ H2O3D-TRAIN:
27
+ WEIGHT: 0.05
28
+ DEX-TRAIN:
29
+ WEIGHT: 0.05
30
+ BEDLAM-TRAIN:
31
+ WEIGHT: 0.05
32
+ REINTER-TRAIN:
33
+ WEIGHT: 0.1
34
+ HOT3D-TRAIN:
35
+ WEIGHT: 0.05
36
+ ARCTIC-TRAIN:
37
+ WEIGHT: 0.1
38
+ VAL:
39
+ FREIHAND-TRAIN:
40
+ WEIGHT: 1.0
41
+ MOCAP: FREIHAND-MOCAP
42
+ BETAS_REG: true
43
+ CONFIG:
44
+ SCALE_FACTOR: 0.3
45
+ ROT_FACTOR: 30
46
+ TRANS_FACTOR: 0.02
47
+ COLOR_SCALE: 0.2
48
+ ROT_AUG_RATE: 0.6
49
+ TRANS_AUG_RATE: 0.5
50
+ DO_FLIP: false
51
+ FLIP_AUG_RATE: 0.0
52
+ EXTREME_CROP_AUG_RATE: 0.0
53
+ EXTREME_CROP_AUG_LEVEL: 1
54
+ extras:
55
+ ignore_warnings: false
56
+ enforce_tags: true
57
+ print_config: true
58
+ exp_name: WiLoR
59
+ MANO:
60
+ DATA_DIR: mano_data
61
+ MODEL_PATH: ${MANO.DATA_DIR}/mano
62
+ GENDER: neutral
63
+ NUM_HAND_JOINTS: 15
64
+ MEAN_PARAMS: ${MANO.DATA_DIR}/mano_mean_params.npz
65
+ CREATE_BODY_POSE: false
66
+ EXTRA:
67
+ FOCAL_LENGTH: 5000
68
+ NUM_LOG_IMAGES: 4
69
+ NUM_LOG_SAMPLES_PER_IMAGE: 8
70
+ PELVIS_IND: 0
71
+ GENERAL:
72
+ TOTAL_STEPS: 1000000
73
+ LOG_STEPS: 1000
74
+ VAL_STEPS: 1000
75
+ CHECKPOINT_STEPS: 1000
76
+ CHECKPOINT_SAVE_TOP_K: 1
77
+ NUM_WORKERS: 8
78
+ PREFETCH_FACTOR: 2
79
+ TRAIN:
80
+ LR: 1.0e-05
81
+ WEIGHT_DECAY: 0.0001
82
+ BATCH_SIZE: 32
83
+ LOSS_REDUCTION: mean
84
+ NUM_TRAIN_SAMPLES: 2
85
+ NUM_TEST_SAMPLES: 64
86
+ POSE_2D_NOISE_RATIO: 0.01
87
+ SMPL_PARAM_NOISE_RATIO: 0.005
88
+ MODEL:
89
+ IMAGE_SIZE: 256
90
+ IMAGE_MEAN:
91
+ - 0.485
92
+ - 0.456
93
+ - 0.406
94
+ IMAGE_STD:
95
+ - 0.229
96
+ - 0.224
97
+ - 0.225
98
+ BACKBONE:
99
+ TYPE: vit
100
+ PRETRAINED_WEIGHTS: training_data/vitpose_backbone.pth
101
+ MANO_HEAD:
102
+ TYPE: transformer_decoder
103
+ IN_CHANNELS: 2048
104
+ TRANSFORMER_DECODER:
105
+ depth: 6
106
+ heads: 8
107
+ mlp_dim: 1024
108
+ dim_head: 64
109
+ dropout: 0.0
110
+ emb_dropout: 0.0
111
+ norm: layer
112
+ context_dim: 1280
113
+ LOSS_WEIGHTS:
114
+ KEYPOINTS_3D: 0.05
115
+ KEYPOINTS_2D: 0.01
116
+ GLOBAL_ORIENT: 0.001
117
+ HAND_POSE: 0.001
118
+ BETAS: 0.0005
119
+ ADVERSARIAL: 0.0005