_base_ = './dino-4scale_r50_8xb2-12e_coco.py' | |
fp16 = dict(loss_scale=512.) | |
pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth' # noqa | |
num_levels = 5 | |
model = dict( | |
num_feature_levels=num_levels, | |
backbone=dict( | |
_delete_=True, | |
type='SwinTransformer', | |
pretrain_img_size=384, | |
embed_dims=192, | |
depths=[2, 2, 18, 2], | |
num_heads=[6, 12, 24, 48], | |
window_size=12, | |
mlp_ratio=4, | |
qkv_bias=True, | |
qk_scale=None, | |
drop_rate=0., | |
attn_drop_rate=0., | |
drop_path_rate=0.2, | |
patch_norm=True, | |
out_indices=(0, 1, 2, 3), | |
# Please only add indices that would be used | |
# in FPN, otherwise some parameter will not be used | |
with_cp=True, | |
convert_weights=True, | |
init_cfg=dict(type='Pretrained', checkpoint=pretrained)), | |
neck=dict(in_channels=[192, 384, 768, 1536], num_outs=num_levels), | |
encoder=dict(layer_cfg=dict(self_attn_cfg=dict(num_levels=num_levels))), | |
decoder=dict(layer_cfg=dict(cross_attn_cfg=dict(num_levels=num_levels)))) | |