yeliudev's picture
Add files
bc120ce
_base_ = ['models']
# model settings
model = dict(
type='R2Tuning',
arch='ViT-B/32',
init=False,
dims=256,
strides=(1, 2, 4, 8),
buffer_size=1024,
max_num_moment=50,
adapter_cfg=dict(
type='R2Block',
k=4,
dropout=0.5,
use_tef=True,
pos_cfg=dict(type='PositionalEncoding', normalize=True, max_len=1024),
tem_cfg=dict(
type='TransformerDecoderLayer',
heads=8,
ratio=4,
att_dropout=0.0,
ffn_dropout=0.0,
att_out_dropout=0.0,
ffn_out_dropout=0.0,
droppath=0.1,
pre_norm=False,
bias=True,
norm_cfg=dict(type='LN'),
act_cfg=dict(type='ReLU', inplace=True),
order=('cross_att', 'self_att', 'ffn'),
att_init_cfg=dict(type='xavier', distribution='uniform'),
ffn_init_cfg=dict(type='kaiming'))),
pyramid_cfg=dict(type='ConvPyramid'),
pooling_cfg=dict(type='AdaPooling'),
class_head_cfg=dict(type='ConvHead', kernal_size=3),
coord_head_cfg=dict(type='ConvHead', kernal_size=3),
loss_cfg=dict(
type='BundleLoss',
sample_radius=1.5,
loss_cls=dict(type='FocalLoss', loss_weight=1.0),
loss_reg=dict(type='L1Loss', loss_weight=0.2),
loss_sal=dict(type='SampledNCELoss', loss_weight=0.1),
loss_video_cal=dict(type='InfoNCELoss', loss_weight=0.1),
loss_layer_cal=dict(type='InfoNCELoss', loss_weight=0.1)))