scfive commited on
Commit
e8f2571
·
1 Parent(s): 0eabbc1

Resolve README.md conflict and continue rebase

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .idea/.gitignore +3 -0
  2. .idea/SpecDETR.iml +18 -0
  3. .idea/inspectionProfiles/Project_Default.xml +42 -0
  4. .idea/inspectionProfiles/profiles_settings.xml +6 -0
  5. .idea/misc.xml +4 -0
  6. .idea/modules.xml +8 -0
  7. MANIFEST.in +6 -0
  8. benchmark.py +133 -0
  9. configs/_base_/datasets/cityscapes_detection.py +84 -0
  10. configs/_base_/datasets/cityscapes_instance.py +113 -0
  11. configs/_base_/datasets/coco_detection.py +95 -0
  12. configs/_base_/datasets/coco_instance.py +95 -0
  13. configs/_base_/datasets/coco_instance_semantic.py +78 -0
  14. configs/_base_/datasets/coco_panoptic.py +94 -0
  15. configs/_base_/datasets/deepfashion.py +95 -0
  16. configs/_base_/datasets/hsi_detection.py +96 -0
  17. configs/_base_/datasets/objects365v1_detection.py +74 -0
  18. configs/_base_/datasets/objects365v2_detection.py +73 -0
  19. configs/_base_/datasets/openimages_detection.py +81 -0
  20. configs/_base_/datasets/semi_coco_detection.py +178 -0
  21. configs/_base_/datasets/voc0712.py +92 -0
  22. configs/_base_/datasets/wider_face.py +73 -0
  23. configs/_base_/default_runtime.py +25 -0
  24. configs/_base_/models/cascade-mask-rcnn_r50_fpn.py +203 -0
  25. configs/_base_/models/cascade-rcnn_r50_fpn.py +185 -0
  26. configs/_base_/models/fast-rcnn_r50_fpn.py +68 -0
  27. configs/_base_/models/faster-rcnn_r50-caffe-c4.py +123 -0
  28. configs/_base_/models/faster-rcnn_r50-caffe-dc5.py +111 -0
  29. configs/_base_/models/faster-rcnn_r50_fpn.py +114 -0
  30. configs/_base_/models/mask-rcnn_r50-caffe-c4.py +132 -0
  31. configs/_base_/models/mask-rcnn_r50_fpn.py +127 -0
  32. configs/_base_/models/retinanet_r50_fpn.py +68 -0
  33. configs/_base_/models/rpn_r50-caffe-c4.py +64 -0
  34. configs/_base_/models/rpn_r50_fpn.py +64 -0
  35. configs/_base_/models/ssd300.py +63 -0
  36. configs/_base_/schedules/schedule_1x.py +28 -0
  37. configs/_base_/schedules/schedule_20e.py +28 -0
  38. configs/_base_/schedules/schedule_2x.py +28 -0
  39. configs/backup/albu_example/README.md +31 -0
  40. configs/backup/albu_example/mask-rcnn_r50_fpn_albu-1x_coco.py +66 -0
  41. configs/backup/albu_example/metafile.yml +17 -0
  42. configs/backup/atss/README.md +31 -0
  43. configs/backup/atss/atss_r101_fpn_1x_coco.py +6 -0
  44. configs/backup/atss/atss_r101_fpn_8xb8-amp-lsj-200e_coco.py +7 -0
  45. configs/backup/atss/atss_r18_fpn_8xb8-amp-lsj-200e_coco.py +7 -0
  46. configs/backup/atss/atss_r50_fpn_1x_coco.py +71 -0
  47. configs/backup/atss/atss_r50_fpn_8xb8-amp-lsj-200e_coco.py +81 -0
  48. configs/backup/atss/metafile.yml +60 -0
  49. configs/backup/autoassign/README.md +35 -0
  50. configs/backup/autoassign/autoassign_r50-caffe_fpn_1x_coco.py +69 -0
.idea/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # 默认忽略的文件
2
+ /shelf/
3
+ /workspace.xml
.idea/SpecDETR.iml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="jdk" jdkName="mmcv2" jdkType="Python SDK" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ <component name="PackageRequirementsSettings">
9
+ <option name="requirementsPath" value="" />
10
+ </component>
11
+ <component name="PyDocumentationSettings">
12
+ <option name="format" value="GOOGLE" />
13
+ <option name="myDocStringFormat" value="Google" />
14
+ </component>
15
+ <component name="TestRunnerService">
16
+ <option name="PROJECT_TEST_RUNNER" value="Unittests" />
17
+ </component>
18
+ </module>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
5
+ <option name="ignoredPackages">
6
+ <value>
7
+ <list size="22">
8
+ <item index="0" class="java.lang.String" itemvalue="imagecorruptions" />
9
+ <item index="1" class="java.lang.String" itemvalue="interrogate" />
10
+ <item index="2" class="java.lang.String" itemvalue="mmtrack" />
11
+ <item index="3" class="java.lang.String" itemvalue="isort" />
12
+ <item index="4" class="java.lang.String" itemvalue="kwarray" />
13
+ <item index="5" class="java.lang.String" itemvalue="asynctest" />
14
+ <item index="6" class="java.lang.String" itemvalue="onnx" />
15
+ <item index="7" class="java.lang.String" itemvalue="xdoctest" />
16
+ <item index="8" class="java.lang.String" itemvalue="codecov" />
17
+ <item index="9" class="java.lang.String" itemvalue="flake8" />
18
+ <item index="10" class="java.lang.String" itemvalue="ubelt" />
19
+ <item index="11" class="java.lang.String" itemvalue="fairscale" />
20
+ <item index="12" class="java.lang.String" itemvalue="pytest" />
21
+ <item index="13" class="java.lang.String" itemvalue="emoji" />
22
+ <item index="14" class="java.lang.String" itemvalue="lightning" />
23
+ <item index="15" class="java.lang.String" itemvalue="hydra-core" />
24
+ <item index="16" class="java.lang.String" itemvalue="memory_profiler" />
25
+ <item index="17" class="java.lang.String" itemvalue="mmpose" />
26
+ <item index="18" class="java.lang.String" itemvalue="mmrazor" />
27
+ <item index="19" class="java.lang.String" itemvalue="parameterized" />
28
+ <item index="20" class="java.lang.String" itemvalue="mmcls" />
29
+ <item index="21" class="java.lang.String" itemvalue="mmrotate" />
30
+ </list>
31
+ </value>
32
+ </option>
33
+ </inspection_tool>
34
+ <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
35
+ <option name="ignoredIdentifiers">
36
+ <list>
37
+ <option value="pkl2json._" />
38
+ </list>
39
+ </option>
40
+ </inspection_tool>
41
+ </profile>
42
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="mmcv2" project-jdk-type="Python SDK" />
4
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/SpecDETR.iml" filepath="$PROJECT_DIR$/.idea/SpecDETR.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
MANIFEST.in ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ include requirements/*.txt
2
+ include mmdet/VERSION
3
+ include mmdet/.mim/model-index.yml
4
+ include mmdet/.mim/demo/*/*
5
+ recursive-include mmdet/.mim/configs *.py *.yml
6
+ recursive-include mmdet/.mim/tools *.sh *.py
benchmark.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import argparse
3
+ import os
4
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
5
+ from mmengine import MMLogger
6
+ from mmengine.config import Config, DictAction
7
+ from mmengine.dist import init_dist
8
+ from mmengine.registry import init_default_scope
9
+ from mmengine.utils import mkdir_or_exist
10
+
11
+ from mmdet.utils.benchmark import (DataLoaderBenchmark, DatasetBenchmark,
12
+ InferenceBenchmark)
13
+
14
+
15
+ def parse_args():
16
+ parser = argparse.ArgumentParser(description='MMDet benchmark')
17
+ parser.add_argument('--config',default='./configs/specdetr_sb-2s-100e_hsi.py',help='test config file path')
18
+ parser.add_argument('--checkpoint',default='./work_dirs/SpecDETR/SpecDETR_100e.pth', help='checkpoint file')
19
+ parser.add_argument(
20
+ '--task',
21
+ choices=['inference', 'dataloader', 'dataset'],
22
+ default='inference',
23
+ help='Which task do you want to go to benchmark')
24
+ parser.add_argument(
25
+ '--repeat-num',
26
+ type=int,
27
+ default=1,
28
+ help='number of repeat times of measurement for averaging the results')
29
+ parser.add_argument(
30
+ '--max-iter', type=int, default=2000, help='num of max iter')
31
+ parser.add_argument(
32
+ '--log-interval', type=int, default=50, help='interval of logging')
33
+ parser.add_argument(
34
+ '--num-warmup', type=int, default=5, help='Number of warmup')
35
+ parser.add_argument(
36
+ '--fuse-conv-bn',
37
+ action='store_true',
38
+ help='Whether to fuse conv and bn, this will slightly increase'
39
+ 'the inference speed')
40
+ parser.add_argument(
41
+ '--dataset-type',
42
+ choices=['train', 'val', 'test'],
43
+ default='test',
44
+ help='Benchmark dataset type. only supports train, val and test')
45
+ parser.add_argument(
46
+ '--work-dir',
47
+ help='the directory to save the file containing '
48
+ 'benchmark metrics')
49
+ parser.add_argument(
50
+ '--cfg-options',
51
+ nargs='+',
52
+ action=DictAction,
53
+ help='override some settings in the used config, the key-value pair '
54
+ 'in xxx=yyy format will be merged into config file. If the value to '
55
+ 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
56
+ 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
57
+ 'Note that the quotation marks are necessary and that no white space '
58
+ 'is allowed.')
59
+ parser.add_argument(
60
+ '--launcher',
61
+ choices=['none', 'pytorch', 'slurm', 'mpi'],
62
+ default='none',
63
+ help='job launcher')
64
+ parser.add_argument('--local_rank', type=int, default=0)
65
+ args = parser.parse_args()
66
+ if 'LOCAL_RANK' not in os.environ:
67
+ os.environ['LOCAL_RANK'] = str(args.local_rank)
68
+ return args
69
+
70
+
71
+ def inference_benchmark(args, cfg, distributed, logger):
72
+ benchmark = InferenceBenchmark(
73
+ cfg,
74
+ args.checkpoint,
75
+ distributed,
76
+ args.fuse_conv_bn,
77
+ args.max_iter,
78
+ args.log_interval,
79
+ args.num_warmup,
80
+ logger=logger)
81
+ return benchmark
82
+
83
+
84
+ def dataloader_benchmark(args, cfg, distributed, logger):
85
+ benchmark = DataLoaderBenchmark(
86
+ cfg,
87
+ distributed,
88
+ args.dataset_type,
89
+ args.max_iter,
90
+ args.log_interval,
91
+ args.num_warmup,
92
+ logger=logger)
93
+ return benchmark
94
+
95
+
96
+ def dataset_benchmark(args, cfg, distributed, logger):
97
+ benchmark = DatasetBenchmark(
98
+ cfg,
99
+ args.dataset_type,
100
+ args.max_iter,
101
+ args.log_interval,
102
+ args.num_warmup,
103
+ logger=logger)
104
+ return benchmark
105
+
106
+
107
+ def main():
108
+ args = parse_args()
109
+ cfg = Config.fromfile(args.config)
110
+ if args.cfg_options is not None:
111
+ cfg.merge_from_dict(args.cfg_options)
112
+
113
+ init_default_scope(cfg.get('default_scope', 'mmdet'))
114
+
115
+ distributed = False
116
+ if args.launcher != 'none':
117
+ init_dist(args.launcher, **cfg.get('env_cfg', {}).get('dist_cfg', {}))
118
+ distributed = True
119
+
120
+ log_file = None
121
+ if args.work_dir:
122
+ log_file = os.path.join(args.work_dir, 'benchmark.log')
123
+ mkdir_or_exist(args.work_dir)
124
+
125
+ logger = MMLogger.get_instance(
126
+ 'mmdet', log_file=log_file, log_level='INFO')
127
+
128
+ benchmark = eval(f'{args.task}_benchmark')(args, cfg, distributed, logger)
129
+ benchmark.run(args.repeat_num)
130
+
131
+
132
+ if __name__ == '__main__':
133
+ main()
configs/_base_/datasets/cityscapes_detection.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'CityscapesDataset'
3
+ data_root = 'data/cityscapes/'
4
+
5
+ # Example to use different file client
6
+ # Method 1: simply set the data root and let the file I/O module
7
+ # automatically infer from prefix (not support LMDB and Memcache yet)
8
+
9
+ # data_root = 's3://openmmlab/datasets/segmentation/cityscapes/'
10
+
11
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
12
+ # backend_args = dict(
13
+ # backend='petrel',
14
+ # path_mapping=dict({
15
+ # './data/': 's3://openmmlab/datasets/segmentation/',
16
+ # 'data/': 's3://openmmlab/datasets/segmentation/'
17
+ # }))
18
+ backend_args = None
19
+
20
+ train_pipeline = [
21
+ dict(type='LoadImageFromFile', backend_args=backend_args),
22
+ dict(type='LoadAnnotations', with_bbox=True),
23
+ dict(
24
+ type='RandomResize',
25
+ scale=[(2048, 800), (2048, 1024)],
26
+ keep_ratio=True),
27
+ dict(type='RandomFlip', prob=0.5),
28
+ dict(type='PackDetInputs')
29
+ ]
30
+
31
+ test_pipeline = [
32
+ dict(type='LoadImageFromFile', backend_args=backend_args),
33
+ dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
34
+ # If you don't have a gt annotation, delete the pipeline
35
+ dict(type='LoadAnnotations', with_bbox=True),
36
+ dict(
37
+ type='PackDetInputs',
38
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
39
+ 'scale_factor'))
40
+ ]
41
+
42
+ train_dataloader = dict(
43
+ batch_size=1,
44
+ num_workers=2,
45
+ persistent_workers=True,
46
+ sampler=dict(type='DefaultSampler', shuffle=True),
47
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
48
+ dataset=dict(
49
+ type='RepeatDataset',
50
+ times=8,
51
+ dataset=dict(
52
+ type=dataset_type,
53
+ data_root=data_root,
54
+ ann_file='annotations/instancesonly_filtered_gtFine_train.json',
55
+ data_prefix=dict(img='leftImg8bit/train/'),
56
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
57
+ pipeline=train_pipeline,
58
+ backend_args=backend_args)))
59
+
60
+ val_dataloader = dict(
61
+ batch_size=1,
62
+ num_workers=2,
63
+ persistent_workers=True,
64
+ drop_last=False,
65
+ sampler=dict(type='DefaultSampler', shuffle=False),
66
+ dataset=dict(
67
+ type=dataset_type,
68
+ data_root=data_root,
69
+ ann_file='annotations/instancesonly_filtered_gtFine_val.json',
70
+ data_prefix=dict(img='leftImg8bit/val/'),
71
+ test_mode=True,
72
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
73
+ pipeline=test_pipeline,
74
+ backend_args=backend_args))
75
+
76
+ test_dataloader = val_dataloader
77
+
78
+ val_evaluator = dict(
79
+ type='CocoMetric',
80
+ ann_file=data_root + 'annotations/instancesonly_filtered_gtFine_val.json',
81
+ metric='bbox',
82
+ backend_args=backend_args)
83
+
84
+ test_evaluator = val_evaluator
configs/_base_/datasets/cityscapes_instance.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'CityscapesDataset'
3
+ data_root = 'data/cityscapes/'
4
+
5
+ # Example to use different file client
6
+ # Method 1: simply set the data root and let the file I/O module
7
+ # automatically infer from prefix (not support LMDB and Memcache yet)
8
+
9
+ # data_root = 's3://openmmlab/datasets/segmentation/cityscapes/'
10
+
11
+ # Method 2: Use backend_args, file_client_args in versions before 3.0.0rc6
12
+ # backend_args = dict(
13
+ # backend='petrel',
14
+ # path_mapping=dict({
15
+ # './data/': 's3://openmmlab/datasets/segmentation/',
16
+ # 'data/': 's3://openmmlab/datasets/segmentation/'
17
+ # }))
18
+ backend_args = None
19
+
20
+ train_pipeline = [
21
+ dict(type='LoadImageFromFile', backend_args=backend_args),
22
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
23
+ dict(
24
+ type='RandomResize',
25
+ scale=[(2048, 800), (2048, 1024)],
26
+ keep_ratio=True),
27
+ dict(type='RandomFlip', prob=0.5),
28
+ dict(type='PackDetInputs')
29
+ ]
30
+
31
+ test_pipeline = [
32
+ dict(type='LoadImageFromFile', backend_args=backend_args),
33
+ dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
34
+ # If you don't have a gt annotation, delete the pipeline
35
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
36
+ dict(
37
+ type='PackDetInputs',
38
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
39
+ 'scale_factor'))
40
+ ]
41
+
42
+ train_dataloader = dict(
43
+ batch_size=1,
44
+ num_workers=2,
45
+ persistent_workers=True,
46
+ sampler=dict(type='DefaultSampler', shuffle=True),
47
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
48
+ dataset=dict(
49
+ type='RepeatDataset',
50
+ times=8,
51
+ dataset=dict(
52
+ type=dataset_type,
53
+ data_root=data_root,
54
+ ann_file='annotations/instancesonly_filtered_gtFine_train.json',
55
+ data_prefix=dict(img='leftImg8bit/train/'),
56
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
57
+ pipeline=train_pipeline,
58
+ backend_args=backend_args)))
59
+
60
+ val_dataloader = dict(
61
+ batch_size=1,
62
+ num_workers=2,
63
+ persistent_workers=True,
64
+ drop_last=False,
65
+ sampler=dict(type='DefaultSampler', shuffle=False),
66
+ dataset=dict(
67
+ type=dataset_type,
68
+ data_root=data_root,
69
+ ann_file='annotations/instancesonly_filtered_gtFine_val.json',
70
+ data_prefix=dict(img='leftImg8bit/val/'),
71
+ test_mode=True,
72
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
73
+ pipeline=test_pipeline,
74
+ backend_args=backend_args))
75
+
76
+ test_dataloader = val_dataloader
77
+
78
+ val_evaluator = [
79
+ dict(
80
+ type='CocoMetric',
81
+ ann_file=data_root +
82
+ 'annotations/instancesonly_filtered_gtFine_val.json',
83
+ metric=['bbox', 'segm'],
84
+ backend_args=backend_args),
85
+ dict(
86
+ type='CityScapesMetric',
87
+ seg_prefix=data_root + 'gtFine/val',
88
+ outfile_prefix='./work_dirs/cityscapes_metric/instance',
89
+ backend_args=backend_args)
90
+ ]
91
+
92
+ test_evaluator = val_evaluator
93
+
94
+ # inference on test dataset and
95
+ # format the output results for submission.
96
+ # test_dataloader = dict(
97
+ # batch_size=1,
98
+ # num_workers=2,
99
+ # persistent_workers=True,
100
+ # drop_last=False,
101
+ # sampler=dict(type='DefaultSampler', shuffle=False),
102
+ # dataset=dict(
103
+ # type=dataset_type,
104
+ # data_root=data_root,
105
+ # ann_file='annotations/instancesonly_filtered_gtFine_test.json',
106
+ # data_prefix=dict(img='leftImg8bit/test/'),
107
+ # test_mode=True,
108
+ # filter_cfg=dict(filter_empty_gt=True, min_size=32),
109
+ # pipeline=test_pipeline))
110
+ # test_evaluator = dict(
111
+ # type='CityScapesMetric',
112
+ # format_only=True,
113
+ # outfile_prefix='./work_dirs/cityscapes_metric/test')
configs/_base_/datasets/coco_detection.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'CocoDataset'
3
+ data_root = 'data/coco/'
4
+
5
+ # Example to use different file client
6
+ # Method 1: simply set the data root and let the file I/O module
7
+ # automatically infer from prefix (not support LMDB and Memcache yet)
8
+
9
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
10
+
11
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
12
+ # backend_args = dict(
13
+ # backend='petrel',
14
+ # path_mapping=dict({
15
+ # './data/': 's3://openmmlab/datasets/detection/',
16
+ # 'data/': 's3://openmmlab/datasets/detection/'
17
+ # }))
18
+ backend_args = None
19
+
20
+ train_pipeline = [
21
+ dict(type='LoadImageFromFile', backend_args=backend_args),
22
+ dict(type='LoadAnnotations', with_bbox=True),
23
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
24
+ dict(type='RandomFlip', prob=0.5),
25
+ dict(type='PackDetInputs')
26
+ ]
27
+ test_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=backend_args),
29
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
30
+ # If you don't have a gt annotation, delete the pipeline
31
+ dict(type='LoadAnnotations', with_bbox=True),
32
+ dict(
33
+ type='PackDetInputs',
34
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
35
+ 'scale_factor'))
36
+ ]
37
+ train_dataloader = dict(
38
+ batch_size=2,
39
+ num_workers=2,
40
+ persistent_workers=True,
41
+ sampler=dict(type='DefaultSampler', shuffle=True),
42
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
43
+ dataset=dict(
44
+ type=dataset_type,
45
+ data_root=data_root,
46
+ ann_file='annotations/instances_train2017.json',
47
+ data_prefix=dict(img='train2017/'),
48
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
49
+ pipeline=train_pipeline,
50
+ backend_args=backend_args))
51
+ val_dataloader = dict(
52
+ batch_size=1,
53
+ num_workers=2,
54
+ persistent_workers=True,
55
+ drop_last=False,
56
+ sampler=dict(type='DefaultSampler', shuffle=False),
57
+ dataset=dict(
58
+ type=dataset_type,
59
+ data_root=data_root,
60
+ ann_file='annotations/instances_val2017.json',
61
+ data_prefix=dict(img='val2017/'),
62
+ test_mode=True,
63
+ pipeline=test_pipeline,
64
+ backend_args=backend_args))
65
+ test_dataloader = val_dataloader
66
+
67
+ val_evaluator = dict(
68
+ type='CocoMetric',
69
+ ann_file=data_root + 'annotations/instances_val2017.json',
70
+ metric='bbox',
71
+ format_only=False,
72
+ backend_args=backend_args)
73
+ test_evaluator = val_evaluator
74
+
75
+ # inference on test dataset and
76
+ # format the output results for submission.
77
+ # test_dataloader = dict(
78
+ # batch_size=1,
79
+ # num_workers=2,
80
+ # persistent_workers=True,
81
+ # drop_last=False,
82
+ # sampler=dict(type='DefaultSampler', shuffle=False),
83
+ # dataset=dict(
84
+ # type=dataset_type,
85
+ # data_root=data_root,
86
+ # ann_file=data_root + 'annotations/image_info_test-dev2017.json',
87
+ # data_prefix=dict(img='test2017/'),
88
+ # test_mode=True,
89
+ # pipeline=test_pipeline))
90
+ # test_evaluator = dict(
91
+ # type='CocoMetric',
92
+ # metric='bbox',
93
+ # format_only=True,
94
+ # ann_file=data_root + 'annotations/image_info_test-dev2017.json',
95
+ # outfile_prefix='./work_dirs/coco_detection/test')
configs/_base_/datasets/coco_instance.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'CocoDataset'
3
+ data_root = 'data/coco/'
4
+
5
+ # Example to use different file client
6
+ # Method 1: simply set the data root and let the file I/O module
7
+ # automatically infer from prefix (not support LMDB and Memcache yet)
8
+
9
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
10
+
11
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
12
+ # backend_args = dict(
13
+ # backend='petrel',
14
+ # path_mapping=dict({
15
+ # './data/': 's3://openmmlab/datasets/detection/',
16
+ # 'data/': 's3://openmmlab/datasets/detection/'
17
+ # }))
18
+ backend_args = None
19
+
20
+ train_pipeline = [
21
+ dict(type='LoadImageFromFile', backend_args=backend_args),
22
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
23
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
24
+ dict(type='RandomFlip', prob=0.5),
25
+ dict(type='PackDetInputs')
26
+ ]
27
+ test_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=backend_args),
29
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
30
+ # If you don't have a gt annotation, delete the pipeline
31
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
32
+ dict(
33
+ type='PackDetInputs',
34
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
35
+ 'scale_factor'))
36
+ ]
37
+ train_dataloader = dict(
38
+ batch_size=2,
39
+ num_workers=2,
40
+ persistent_workers=True,
41
+ sampler=dict(type='DefaultSampler', shuffle=True),
42
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
43
+ dataset=dict(
44
+ type=dataset_type,
45
+ data_root=data_root,
46
+ ann_file='annotations/instances_train2017.json',
47
+ data_prefix=dict(img='train2017/'),
48
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
49
+ pipeline=train_pipeline,
50
+ backend_args=backend_args))
51
+ val_dataloader = dict(
52
+ batch_size=1,
53
+ num_workers=2,
54
+ persistent_workers=True,
55
+ drop_last=False,
56
+ sampler=dict(type='DefaultSampler', shuffle=False),
57
+ dataset=dict(
58
+ type=dataset_type,
59
+ data_root=data_root,
60
+ ann_file='annotations/instances_val2017.json',
61
+ data_prefix=dict(img='val2017/'),
62
+ test_mode=True,
63
+ pipeline=test_pipeline,
64
+ backend_args=backend_args))
65
+ test_dataloader = val_dataloader
66
+
67
+ val_evaluator = dict(
68
+ type='CocoMetric',
69
+ ann_file=data_root + 'annotations/instances_val2017.json',
70
+ metric=['bbox', 'segm'],
71
+ format_only=False,
72
+ backend_args=backend_args)
73
+ test_evaluator = val_evaluator
74
+
75
+ # inference on test dataset and
76
+ # format the output results for submission.
77
+ # test_dataloader = dict(
78
+ # batch_size=1,
79
+ # num_workers=2,
80
+ # persistent_workers=True,
81
+ # drop_last=False,
82
+ # sampler=dict(type='DefaultSampler', shuffle=False),
83
+ # dataset=dict(
84
+ # type=dataset_type,
85
+ # data_root=data_root,
86
+ # ann_file=data_root + 'annotations/image_info_test-dev2017.json',
87
+ # data_prefix=dict(img='test2017/'),
88
+ # test_mode=True,
89
+ # pipeline=test_pipeline))
90
+ # test_evaluator = dict(
91
+ # type='CocoMetric',
92
+ # metric=['bbox', 'segm'],
93
+ # format_only=True,
94
+ # ann_file=data_root + 'annotations/image_info_test-dev2017.json',
95
+ # outfile_prefix='./work_dirs/coco_instance/test')
configs/_base_/datasets/coco_instance_semantic.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'CocoDataset'
3
+ data_root = 'data/coco/'
4
+
5
+ # Example to use different file client
6
+ # Method 1: simply set the data root and let the file I/O module
7
+ # automatically infer from prefix (not support LMDB and Memcache yet)
8
+
9
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
10
+
11
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
12
+ # backend_args = dict(
13
+ # backend='petrel',
14
+ # path_mapping=dict({
15
+ # './data/': 's3://openmmlab/datasets/detection/',
16
+ # 'data/': 's3://openmmlab/datasets/detection/'
17
+ # }))
18
+ backend_args = None
19
+
20
+ train_pipeline = [
21
+ dict(type='LoadImageFromFile', backend_args=backend_args),
22
+ dict(
23
+ type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True),
24
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
25
+ dict(type='RandomFlip', prob=0.5),
26
+ dict(type='PackDetInputs')
27
+ ]
28
+ test_pipeline = [
29
+ dict(type='LoadImageFromFile', backend_args=backend_args),
30
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
31
+ # If you don't have a gt annotation, delete the pipeline
32
+ dict(
33
+ type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True),
34
+ dict(
35
+ type='PackDetInputs',
36
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
37
+ 'scale_factor'))
38
+ ]
39
+
40
+ train_dataloader = dict(
41
+ batch_size=2,
42
+ num_workers=2,
43
+ persistent_workers=True,
44
+ sampler=dict(type='DefaultSampler', shuffle=True),
45
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
46
+ dataset=dict(
47
+ type=dataset_type,
48
+ data_root=data_root,
49
+ ann_file='annotations/instances_train2017.json',
50
+ data_prefix=dict(img='train2017/', seg='stuffthingmaps/train2017/'),
51
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
52
+ pipeline=train_pipeline,
53
+ backend_args=backend_args))
54
+
55
+ val_dataloader = dict(
56
+ batch_size=1,
57
+ num_workers=2,
58
+ persistent_workers=True,
59
+ drop_last=False,
60
+ sampler=dict(type='DefaultSampler', shuffle=False),
61
+ dataset=dict(
62
+ type=dataset_type,
63
+ data_root=data_root,
64
+ ann_file='annotations/instances_val2017.json',
65
+ data_prefix=dict(img='val2017/'),
66
+ test_mode=True,
67
+ pipeline=test_pipeline,
68
+ backend_args=backend_args))
69
+
70
+ test_dataloader = val_dataloader
71
+
72
+ val_evaluator = dict(
73
+ type='CocoMetric',
74
+ ann_file=data_root + 'annotations/instances_val2017.json',
75
+ metric=['bbox', 'segm'],
76
+ format_only=False,
77
+ backend_args=backend_args)
78
+ test_evaluator = val_evaluator
configs/_base_/datasets/coco_panoptic.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'CocoPanopticDataset'
3
+ # data_root = 'data/coco/'
4
+
5
+ # Example to use different file client
6
+ # Method 1: simply set the data root and let the file I/O module
7
+ # automatically infer from prefix (not support LMDB and Memcache yet)
8
+
9
+ data_root = 's3://openmmlab/datasets/detection/coco/'
10
+
11
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
12
+ # backend_args = dict(
13
+ # backend='petrel',
14
+ # path_mapping=dict({
15
+ # './data/': 's3://openmmlab/datasets/detection/',
16
+ # 'data/': 's3://openmmlab/datasets/detection/'
17
+ # }))
18
+ backend_args = None
19
+
20
+ train_pipeline = [
21
+ dict(type='LoadImageFromFile', backend_args=backend_args),
22
+ dict(type='LoadPanopticAnnotations', backend_args=backend_args),
23
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
24
+ dict(type='RandomFlip', prob=0.5),
25
+ dict(type='PackDetInputs')
26
+ ]
27
+ test_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=backend_args),
29
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
30
+ dict(type='LoadPanopticAnnotations', backend_args=backend_args),
31
+ dict(
32
+ type='PackDetInputs',
33
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
34
+ 'scale_factor'))
35
+ ]
36
+
37
+ train_dataloader = dict(
38
+ batch_size=2,
39
+ num_workers=2,
40
+ persistent_workers=True,
41
+ sampler=dict(type='DefaultSampler', shuffle=True),
42
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
43
+ dataset=dict(
44
+ type=dataset_type,
45
+ data_root=data_root,
46
+ ann_file='annotations/panoptic_train2017.json',
47
+ data_prefix=dict(
48
+ img='train2017/', seg='annotations/panoptic_train2017/'),
49
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
50
+ pipeline=train_pipeline,
51
+ backend_args=backend_args))
52
+ val_dataloader = dict(
53
+ batch_size=1,
54
+ num_workers=2,
55
+ persistent_workers=True,
56
+ drop_last=False,
57
+ sampler=dict(type='DefaultSampler', shuffle=False),
58
+ dataset=dict(
59
+ type=dataset_type,
60
+ data_root=data_root,
61
+ ann_file='annotations/panoptic_val2017.json',
62
+ data_prefix=dict(img='val2017/', seg='annotations/panoptic_val2017/'),
63
+ test_mode=True,
64
+ pipeline=test_pipeline,
65
+ backend_args=backend_args))
66
+ test_dataloader = val_dataloader
67
+
68
+ val_evaluator = dict(
69
+ type='CocoPanopticMetric',
70
+ ann_file=data_root + 'annotations/panoptic_val2017.json',
71
+ seg_prefix=data_root + 'annotations/panoptic_val2017/',
72
+ backend_args=backend_args)
73
+ test_evaluator = val_evaluator
74
+
75
+ # inference on test dataset and
76
+ # format the output results for submission.
77
+ # test_dataloader = dict(
78
+ # batch_size=1,
79
+ # num_workers=1,
80
+ # persistent_workers=True,
81
+ # drop_last=False,
82
+ # sampler=dict(type='DefaultSampler', shuffle=False),
83
+ # dataset=dict(
84
+ # type=dataset_type,
85
+ # data_root=data_root,
86
+ # ann_file='annotations/panoptic_image_info_test-dev2017.json',
87
+ # data_prefix=dict(img='test2017/'),
88
+ # test_mode=True,
89
+ # pipeline=test_pipeline))
90
+ # test_evaluator = dict(
91
+ # type='CocoPanopticMetric',
92
+ # format_only=True,
93
+ # ann_file=data_root + 'annotations/panoptic_image_info_test-dev2017.json',
94
+ # outfile_prefix='./work_dirs/coco_panoptic/test')
configs/_base_/datasets/deepfashion.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'DeepFashionDataset'
3
+ data_root = 'data/DeepFashion/In-shop/'
4
+
5
+ # Example to use different file client
6
+ # Method 1: simply set the data root and let the file I/O module
7
+ # automatically infer from prefix (not support LMDB and Memcache yet)
8
+
9
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
10
+
11
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
12
+ # backend_args = dict(
13
+ # backend='petrel',
14
+ # path_mapping=dict({
15
+ # './data/': 's3://openmmlab/datasets/detection/',
16
+ # 'data/': 's3://openmmlab/datasets/detection/'
17
+ # }))
18
+ backend_args = None
19
+
20
+ train_pipeline = [
21
+ dict(type='LoadImageFromFile', backend_args=backend_args),
22
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
23
+ dict(type='Resize', scale=(750, 1101), keep_ratio=True),
24
+ dict(type='RandomFlip', prob=0.5),
25
+ dict(type='PackDetInputs')
26
+ ]
27
+ test_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=backend_args),
29
+ dict(type='Resize', scale=(750, 1101), keep_ratio=True),
30
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
31
+ dict(
32
+ type='PackDetInputs',
33
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
34
+ 'scale_factor'))
35
+ ]
36
+ train_dataloader = dict(
37
+ batch_size=2,
38
+ num_workers=2,
39
+ persistent_workers=True,
40
+ sampler=dict(type='DefaultSampler', shuffle=True),
41
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
42
+ dataset=dict(
43
+ type='RepeatDataset',
44
+ times=2,
45
+ dataset=dict(
46
+ type=dataset_type,
47
+ data_root=data_root,
48
+ ann_file='Anno/segmentation/DeepFashion_segmentation_train.json',
49
+ data_prefix=dict(img='Img/'),
50
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
51
+ pipeline=train_pipeline,
52
+ backend_args=backend_args)))
53
+ val_dataloader = dict(
54
+ batch_size=1,
55
+ num_workers=2,
56
+ persistent_workers=True,
57
+ drop_last=False,
58
+ sampler=dict(type='DefaultSampler', shuffle=False),
59
+ dataset=dict(
60
+ type=dataset_type,
61
+ data_root=data_root,
62
+ ann_file='Anno/segmentation/DeepFashion_segmentation_query.json',
63
+ data_prefix=dict(img='Img/'),
64
+ test_mode=True,
65
+ pipeline=test_pipeline,
66
+ backend_args=backend_args))
67
+ test_dataloader = dict(
68
+ batch_size=1,
69
+ num_workers=2,
70
+ persistent_workers=True,
71
+ drop_last=False,
72
+ sampler=dict(type='DefaultSampler', shuffle=False),
73
+ dataset=dict(
74
+ type=dataset_type,
75
+ data_root=data_root,
76
+ ann_file='Anno/segmentation/DeepFashion_segmentation_gallery.json',
77
+ data_prefix=dict(img='Img/'),
78
+ test_mode=True,
79
+ pipeline=test_pipeline,
80
+ backend_args=backend_args))
81
+
82
+ val_evaluator = dict(
83
+ type='CocoMetric',
84
+ ann_file=data_root +
85
+ 'Anno/segmentation/DeepFashion_segmentation_query.json',
86
+ metric=['bbox', 'segm'],
87
+ format_only=False,
88
+ backend_args=backend_args)
89
+ test_evaluator = dict(
90
+ type='CocoMetric',
91
+ ann_file=data_root +
92
+ 'Anno/segmentation/DeepFashion_segmentation_gallery.json',
93
+ metric=['bbox', 'segm'],
94
+ format_only=False,
95
+ backend_args=backend_args)
configs/_base_/datasets/hsi_detection.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'HSIDataset'
3
+ data_root = '/media/ubuntu/data/HTD_dataset/SPOD_30b_8c/'
4
+ # Example to use different file client
5
+ # Method 1: simply set the data root and let the file I/O module
6
+ # automatically infer from prefix (not support LMDB and Memcache yet)
7
+
8
+
9
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
10
+ # backend_args = dict(
11
+ # backend='petrel',
12
+ # path_mapping=dict({
13
+ # './data/': 's3://openmmlab/datasets/detection/',
14
+ # 'data/': 's3://openmmlab/datasets/detection/'
15
+ # }))
16
+
17
+ normalized_basis =3000
18
+ backend_args = None
19
+ train_pipeline = [
20
+ dict(type='LoadHyperspectralImageFromFiles', to_float32 =True, normalized_basis=normalized_basis),
21
+ dict(type='LoadAnnotations', with_bbox=True),
22
+ # dict(type='Resize', scale=(512, 512), keep_ratio=True),
23
+ dict(type='HSIResize', scale_factor=1, keep_ratio=True),
24
+ dict(type='RandomFlip', prob=0.5),
25
+ dict(type='PackDetInputs',meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction','scale_factor'))
26
+ ]
27
+ test_pipeline = [
28
+ dict(type='LoadHyperspectralImageFromFiles', to_float32 =True, normalized_basis=normalized_basis),
29
+ # dict(type='Resize', scale=(512, 512), keep_ratio=True),
30
+ dict(type='HSIResize', scale_factor=1, keep_ratio=True),
31
+ # If you don't have a gt annotation, delete the pipeline
32
+ dict(type='LoadAnnotations', with_bbox=True),
33
+ dict(
34
+ type='PackDetInputs',
35
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor'))
36
+ ]
37
+ train_dataloader = dict(
38
+ batch_size=4,
39
+ num_workers=2,
40
+ persistent_workers=True,
41
+ sampler=dict(type='DefaultSampler', shuffle=True),
42
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
43
+ dataset=dict(
44
+ type=dataset_type,
45
+ data_root=data_root,
46
+ ann_file='annotations/train.json',
47
+ data_prefix=dict(img='train/'),
48
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
49
+ pipeline=train_pipeline,
50
+ backend_args=backend_args))
51
+ val_dataloader = dict(
52
+ batch_size=1,
53
+ num_workers=2,
54
+ persistent_workers=True,
55
+ drop_last=False,
56
+ sampler=dict(type='DefaultSampler', shuffle=False),
57
+ dataset=dict(
58
+ type=dataset_type,
59
+ data_root=data_root,
60
+ ann_file='annotations/test.json',
61
+ data_prefix=dict(img='test/'),
62
+ test_mode=True,
63
+ pipeline=test_pipeline,
64
+ backend_args=backend_args))
65
+ test_dataloader = val_dataloader
66
+
67
+ val_evaluator = dict(
68
+ type='CocoMetric',
69
+ ann_file=data_root + 'annotations/test.json',
70
+ metric=['bbox','proposal_fast'],
71
+ classwise = True,
72
+ format_only=False,
73
+ backend_args=backend_args)
74
+ test_evaluator = val_evaluator
75
+
76
+ # inference on test dataset and
77
+ # format the output results for submission.
78
+ # test_dataloader = dict(
79
+ # batch_size=1,
80
+ # num_workers=2,
81
+ # persistent_workers=True,
82
+ # drop_last=False,
83
+ # sampler=dict(type='DefaultSampler', shuffle=False),
84
+ # dataset=dict(
85
+ # type=dataset_type,
86
+ # data_root=data_root,
87
+ # ann_file=data_root + 'annotations/image_info_test-dev2017.json',
88
+ # data_prefix=dict(img='test2017/'),
89
+ # test_mode=True,
90
+ # pipeline=test_pipeline))
91
+ # test_evaluator = dict(
92
+ # type='CocoMetric',
93
+ # metric='bbox',
94
+ # format_only=True,
95
+ # ann_file=data_root + 'annotations/image_info_test-dev2017.json',
96
+ # outfile_prefix='./work_dirs/coco_detection/test')
configs/_base_/datasets/objects365v1_detection.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'Objects365V1Dataset'
3
+ data_root = 'data/Objects365/Obj365_v1/'
4
+
5
+ # Example to use different file client
6
+ # Method 1: simply set the data root and let the file I/O module
7
+ # automatically infer from prefix (not support LMDB and Memcache yet)
8
+
9
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
10
+
11
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
12
+ # backend_args = dict(
13
+ # backend='petrel',
14
+ # path_mapping=dict({
15
+ # './data/': 's3://openmmlab/datasets/detection/',
16
+ # 'data/': 's3://openmmlab/datasets/detection/'
17
+ # }))
18
+ backend_args = None
19
+
20
+ train_pipeline = [
21
+ dict(type='LoadImageFromFile', backend_args=backend_args),
22
+ dict(type='LoadAnnotations', with_bbox=True),
23
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
24
+ dict(type='RandomFlip', prob=0.5),
25
+ dict(type='PackDetInputs')
26
+ ]
27
+ test_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=backend_args),
29
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
30
+ # If you don't have a gt annotation, delete the pipeline
31
+ dict(type='LoadAnnotations', with_bbox=True),
32
+ dict(
33
+ type='PackDetInputs',
34
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
35
+ 'scale_factor'))
36
+ ]
37
+ train_dataloader = dict(
38
+ batch_size=2,
39
+ num_workers=2,
40
+ persistent_workers=True,
41
+ sampler=dict(type='DefaultSampler', shuffle=True),
42
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
43
+ dataset=dict(
44
+ type=dataset_type,
45
+ data_root=data_root,
46
+ ann_file='annotations/objects365_train.json',
47
+ data_prefix=dict(img='train/'),
48
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
49
+ pipeline=train_pipeline,
50
+ backend_args=backend_args))
51
+ val_dataloader = dict(
52
+ batch_size=1,
53
+ num_workers=2,
54
+ persistent_workers=True,
55
+ drop_last=False,
56
+ sampler=dict(type='DefaultSampler', shuffle=False),
57
+ dataset=dict(
58
+ type=dataset_type,
59
+ data_root=data_root,
60
+ ann_file='annotations/objects365_val.json',
61
+ data_prefix=dict(img='val/'),
62
+ test_mode=True,
63
+ pipeline=test_pipeline,
64
+ backend_args=backend_args))
65
+ test_dataloader = val_dataloader
66
+
67
+ val_evaluator = dict(
68
+ type='CocoMetric',
69
+ ann_file=data_root + 'annotations/objects365_val.json',
70
+ metric='bbox',
71
+ sort_categories=True,
72
+ format_only=False,
73
+ backend_args=backend_args)
74
+ test_evaluator = val_evaluator
configs/_base_/datasets/objects365v2_detection.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'Objects365V2Dataset'
3
+ data_root = 'data/Objects365/Obj365_v2/'
4
+
5
+ # Example to use different file client
6
+ # Method 1: simply set the data root and let the file I/O module
7
+ # automatically infer from prefix (not support LMDB and Memcache yet)
8
+
9
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
10
+
11
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
12
+ # backend_args = dict(
13
+ # backend='petrel',
14
+ # path_mapping=dict({
15
+ # './data/': 's3://openmmlab/datasets/detection/',
16
+ # 'data/': 's3://openmmlab/datasets/detection/'
17
+ # }))
18
+ backend_args = None
19
+
20
+ train_pipeline = [
21
+ dict(type='LoadImageFromFile', backend_args=backend_args),
22
+ dict(type='LoadAnnotations', with_bbox=True),
23
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
24
+ dict(type='RandomFlip', prob=0.5),
25
+ dict(type='PackDetInputs')
26
+ ]
27
+ test_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=backend_args),
29
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
30
+ # If you don't have a gt annotation, delete the pipeline
31
+ dict(type='LoadAnnotations', with_bbox=True),
32
+ dict(
33
+ type='PackDetInputs',
34
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
35
+ 'scale_factor'))
36
+ ]
37
+ train_dataloader = dict(
38
+ batch_size=2,
39
+ num_workers=2,
40
+ persistent_workers=True,
41
+ sampler=dict(type='DefaultSampler', shuffle=True),
42
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
43
+ dataset=dict(
44
+ type=dataset_type,
45
+ data_root=data_root,
46
+ ann_file='annotations/zhiyuan_objv2_train.json',
47
+ data_prefix=dict(img='train/'),
48
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
49
+ pipeline=train_pipeline,
50
+ backend_args=backend_args))
51
+ val_dataloader = dict(
52
+ batch_size=1,
53
+ num_workers=2,
54
+ persistent_workers=True,
55
+ drop_last=False,
56
+ sampler=dict(type='DefaultSampler', shuffle=False),
57
+ dataset=dict(
58
+ type=dataset_type,
59
+ data_root=data_root,
60
+ ann_file='annotations/zhiyuan_objv2_val.json',
61
+ data_prefix=dict(img='val/'),
62
+ test_mode=True,
63
+ pipeline=test_pipeline,
64
+ backend_args=backend_args))
65
+ test_dataloader = val_dataloader
66
+
67
+ val_evaluator = dict(
68
+ type='CocoMetric',
69
+ ann_file=data_root + 'annotations/zhiyuan_objv2_val.json',
70
+ metric='bbox',
71
+ format_only=False,
72
+ backend_args=backend_args)
73
+ test_evaluator = val_evaluator
configs/_base_/datasets/openimages_detection.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'OpenImagesDataset'
3
+ data_root = 'data/OpenImages/'
4
+
5
+ # Example to use different file client
6
+ # Method 1: simply set the data root and let the file I/O module
7
+ # automatically infer from prefix (not support LMDB and Memcache yet)
8
+
9
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
10
+
11
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
12
+ # backend_args = dict(
13
+ # backend='petrel',
14
+ # path_mapping=dict({
15
+ # './data/': 's3://openmmlab/datasets/detection/',
16
+ # 'data/': 's3://openmmlab/datasets/detection/'
17
+ # }))
18
+ backend_args = None
19
+
20
+ train_pipeline = [
21
+ dict(type='LoadImageFromFile', backend_args=backend_args),
22
+ dict(type='LoadAnnotations', with_bbox=True),
23
+ dict(type='Resize', scale=(1024, 800), keep_ratio=True),
24
+ dict(type='RandomFlip', prob=0.5),
25
+ dict(type='PackDetInputs')
26
+ ]
27
+ test_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=backend_args),
29
+ dict(type='Resize', scale=(1024, 800), keep_ratio=True),
30
+ # avoid bboxes being resized
31
+ dict(type='LoadAnnotations', with_bbox=True),
32
+ # TODO: find a better way to collect image_level_labels
33
+ dict(
34
+ type='PackDetInputs',
35
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
36
+ 'scale_factor', 'instances', 'image_level_labels'))
37
+ ]
38
+
39
+ train_dataloader = dict(
40
+ batch_size=2,
41
+ num_workers=0, # workers_per_gpu > 0 may occur out of memory
42
+ persistent_workers=False,
43
+ sampler=dict(type='DefaultSampler', shuffle=True),
44
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
45
+ dataset=dict(
46
+ type=dataset_type,
47
+ data_root=data_root,
48
+ ann_file='annotations/oidv6-train-annotations-bbox.csv',
49
+ data_prefix=dict(img='OpenImages/train/'),
50
+ label_file='annotations/class-descriptions-boxable.csv',
51
+ hierarchy_file='annotations/bbox_labels_600_hierarchy.json',
52
+ meta_file='annotations/train-image-metas.pkl',
53
+ pipeline=train_pipeline,
54
+ backend_args=backend_args))
55
+ val_dataloader = dict(
56
+ batch_size=1,
57
+ num_workers=0,
58
+ persistent_workers=False,
59
+ drop_last=False,
60
+ sampler=dict(type='DefaultSampler', shuffle=False),
61
+ dataset=dict(
62
+ type=dataset_type,
63
+ data_root=data_root,
64
+ ann_file='annotations/validation-annotations-bbox.csv',
65
+ data_prefix=dict(img='OpenImages/validation/'),
66
+ label_file='annotations/class-descriptions-boxable.csv',
67
+ hierarchy_file='annotations/bbox_labels_600_hierarchy.json',
68
+ meta_file='annotations/validation-image-metas.pkl',
69
+ image_level_ann_file='annotations/validation-'
70
+ 'annotations-human-imagelabels-boxable.csv',
71
+ pipeline=test_pipeline,
72
+ backend_args=backend_args))
73
+ test_dataloader = val_dataloader
74
+
75
+ val_evaluator = dict(
76
+ type='OpenImagesMetric',
77
+ iou_thrs=0.5,
78
+ ioa_thrs=0.5,
79
+ use_group_of=True,
80
+ get_supercategory=True)
81
+ test_evaluator = val_evaluator
configs/_base_/datasets/semi_coco_detection.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'CocoDataset'
3
+ data_root = 'data/coco/'
4
+
5
+ # Example to use different file client
6
+ # Method 1: simply set the data root and let the file I/O module
7
+ # automatically infer from prefix (not support LMDB and Memcache yet)
8
+
9
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
10
+
11
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
12
+ # backend_args = dict(
13
+ # backend='petrel',
14
+ # path_mapping=dict({
15
+ # './data/': 's3://openmmlab/datasets/detection/',
16
+ # 'data/': 's3://openmmlab/datasets/detection/'
17
+ # }))
18
+ backend_args = None
19
+
20
+ color_space = [
21
+ [dict(type='ColorTransform')],
22
+ [dict(type='AutoContrast')],
23
+ [dict(type='Equalize')],
24
+ [dict(type='Sharpness')],
25
+ [dict(type='Posterize')],
26
+ [dict(type='Solarize')],
27
+ [dict(type='Color')],
28
+ [dict(type='Contrast')],
29
+ [dict(type='Brightness')],
30
+ ]
31
+
32
+ geometric = [
33
+ [dict(type='Rotate')],
34
+ [dict(type='ShearX')],
35
+ [dict(type='ShearY')],
36
+ [dict(type='TranslateX')],
37
+ [dict(type='TranslateY')],
38
+ ]
39
+
40
+ scale = [(1333, 400), (1333, 1200)]
41
+
42
+ branch_field = ['sup', 'unsup_teacher', 'unsup_student']
43
+ # pipeline used to augment labeled data,
44
+ # which will be sent to student model for supervised training.
45
+ sup_pipeline = [
46
+ dict(type='LoadImageFromFile', backend_args=backend_args),
47
+ dict(type='LoadAnnotations', with_bbox=True),
48
+ dict(type='RandomResize', scale=scale, keep_ratio=True),
49
+ dict(type='RandomFlip', prob=0.5),
50
+ dict(type='RandAugment', aug_space=color_space, aug_num=1),
51
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),
52
+ dict(
53
+ type='MultiBranch',
54
+ branch_field=branch_field,
55
+ sup=dict(type='PackDetInputs'))
56
+ ]
57
+
58
+ # pipeline used to augment unlabeled data weakly,
59
+ # which will be sent to teacher model for predicting pseudo instances.
60
+ weak_pipeline = [
61
+ dict(type='RandomResize', scale=scale, keep_ratio=True),
62
+ dict(type='RandomFlip', prob=0.5),
63
+ dict(
64
+ type='PackDetInputs',
65
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
66
+ 'scale_factor', 'flip', 'flip_direction',
67
+ 'homography_matrix')),
68
+ ]
69
+
70
+ # pipeline used to augment unlabeled data strongly,
71
+ # which will be sent to student model for unsupervised training.
72
+ strong_pipeline = [
73
+ dict(type='RandomResize', scale=scale, keep_ratio=True),
74
+ dict(type='RandomFlip', prob=0.5),
75
+ dict(
76
+ type='RandomOrder',
77
+ transforms=[
78
+ dict(type='RandAugment', aug_space=color_space, aug_num=1),
79
+ dict(type='RandAugment', aug_space=geometric, aug_num=1),
80
+ ]),
81
+ dict(type='RandomErasing', n_patches=(1, 5), ratio=(0, 0.2)),
82
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),
83
+ dict(
84
+ type='PackDetInputs',
85
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
86
+ 'scale_factor', 'flip', 'flip_direction',
87
+ 'homography_matrix')),
88
+ ]
89
+
90
+ # pipeline used to augment unlabeled data into different views
91
+ unsup_pipeline = [
92
+ dict(type='LoadImageFromFile', backend_args=backend_args),
93
+ dict(type='LoadEmptyAnnotations'),
94
+ dict(
95
+ type='MultiBranch',
96
+ branch_field=branch_field,
97
+ unsup_teacher=weak_pipeline,
98
+ unsup_student=strong_pipeline,
99
+ )
100
+ ]
101
+
102
+ test_pipeline = [
103
+ dict(type='LoadImageFromFile', backend_args=backend_args),
104
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
105
+ dict(
106
+ type='PackDetInputs',
107
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
108
+ 'scale_factor'))
109
+ ]
110
+
111
+ batch_size = 5
112
+ num_workers = 5
113
+ # There are two common semi-supervised learning settings on the coco dataset:
114
+ # (1) Divide the train2017 into labeled and unlabeled datasets
115
+ # by a fixed percentage, such as 1%, 2%, 5% and 10%.
116
+ # The format of labeled_ann_file and unlabeled_ann_file are
117
+ # instances_train2017.{fold}@{percent}.json, and
118
+ # instances_train2017.{fold}@{percent}-unlabeled.json
119
+ # `fold` is used for cross-validation, and `percent` represents
120
+ # the proportion of labeled data in the train2017.
121
+ # (2) Choose the train2017 as the labeled dataset
122
+ # and unlabeled2017 as the unlabeled dataset.
123
+ # The labeled_ann_file and unlabeled_ann_file are
124
+ # instances_train2017.json and image_info_unlabeled2017.json
125
+ # We use this configuration by default.
126
+ labeled_dataset = dict(
127
+ type=dataset_type,
128
+ data_root=data_root,
129
+ ann_file='annotations/instances_train2017.json',
130
+ data_prefix=dict(img='train2017/'),
131
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
132
+ pipeline=sup_pipeline,
133
+ backend_args=backend_args)
134
+
135
+ unlabeled_dataset = dict(
136
+ type=dataset_type,
137
+ data_root=data_root,
138
+ ann_file='annotations/instances_unlabeled2017.json',
139
+ data_prefix=dict(img='unlabeled2017/'),
140
+ filter_cfg=dict(filter_empty_gt=False),
141
+ pipeline=unsup_pipeline,
142
+ backend_args=backend_args)
143
+
144
+ train_dataloader = dict(
145
+ batch_size=batch_size,
146
+ num_workers=num_workers,
147
+ persistent_workers=True,
148
+ sampler=dict(
149
+ type='GroupMultiSourceSampler',
150
+ batch_size=batch_size,
151
+ source_ratio=[1, 4]),
152
+ dataset=dict(
153
+ type='ConcatDataset', datasets=[labeled_dataset, unlabeled_dataset]))
154
+
155
+ val_dataloader = dict(
156
+ batch_size=1,
157
+ num_workers=2,
158
+ persistent_workers=True,
159
+ drop_last=False,
160
+ sampler=dict(type='DefaultSampler', shuffle=False),
161
+ dataset=dict(
162
+ type=dataset_type,
163
+ data_root=data_root,
164
+ ann_file='annotations/instances_val2017.json',
165
+ data_prefix=dict(img='val2017/'),
166
+ test_mode=True,
167
+ pipeline=test_pipeline,
168
+ backend_args=backend_args))
169
+
170
+ test_dataloader = val_dataloader
171
+
172
+ val_evaluator = dict(
173
+ type='CocoMetric',
174
+ ann_file=data_root + 'annotations/instances_val2017.json',
175
+ metric='bbox',
176
+ format_only=False,
177
+ backend_args=backend_args)
178
+ test_evaluator = val_evaluator
configs/_base_/datasets/voc0712.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'VOCDataset'
3
+ data_root = 'data/VOCdevkit/'
4
+
5
+ # Example to use different file client
6
+ # Method 1: simply set the data root and let the file I/O module
7
+ # automatically Infer from prefix (not support LMDB and Memcache yet)
8
+
9
+ # data_root = 's3://openmmlab/datasets/detection/segmentation/VOCdevkit/'
10
+
11
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
12
+ # backend_args = dict(
13
+ # backend='petrel',
14
+ # path_mapping=dict({
15
+ # './data/': 's3://openmmlab/datasets/segmentation/',
16
+ # 'data/': 's3://openmmlab/datasets/segmentation/'
17
+ # }))
18
+ backend_args = None
19
+
20
+ train_pipeline = [
21
+ dict(type='LoadImageFromFile', backend_args=backend_args),
22
+ dict(type='LoadAnnotations', with_bbox=True),
23
+ dict(type='Resize', scale=(1000, 600), keep_ratio=True),
24
+ dict(type='RandomFlip', prob=0.5),
25
+ dict(type='PackDetInputs')
26
+ ]
27
+ test_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=backend_args),
29
+ dict(type='Resize', scale=(1000, 600), keep_ratio=True),
30
+ # avoid bboxes being resized
31
+ dict(type='LoadAnnotations', with_bbox=True),
32
+ dict(
33
+ type='PackDetInputs',
34
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
35
+ 'scale_factor'))
36
+ ]
37
+ train_dataloader = dict(
38
+ batch_size=2,
39
+ num_workers=2,
40
+ persistent_workers=True,
41
+ sampler=dict(type='DefaultSampler', shuffle=True),
42
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
43
+ dataset=dict(
44
+ type='RepeatDataset',
45
+ times=3,
46
+ dataset=dict(
47
+ type='ConcatDataset',
48
+ # VOCDataset will add different `dataset_type` in dataset.metainfo,
49
+ # which will get error if using ConcatDataset. Adding
50
+ # `ignore_keys` can avoid this error.
51
+ ignore_keys=['dataset_type'],
52
+ datasets=[
53
+ dict(
54
+ type=dataset_type,
55
+ data_root=data_root,
56
+ ann_file='VOC2007/ImageSets/Main/trainval.txt',
57
+ data_prefix=dict(sub_data_root='VOC2007/'),
58
+ filter_cfg=dict(
59
+ filter_empty_gt=True, min_size=32, bbox_min_size=32),
60
+ pipeline=train_pipeline,
61
+ backend_args=backend_args),
62
+ dict(
63
+ type=dataset_type,
64
+ data_root=data_root,
65
+ ann_file='VOC2012/ImageSets/Main/trainval.txt',
66
+ data_prefix=dict(sub_data_root='VOC2012/'),
67
+ filter_cfg=dict(
68
+ filter_empty_gt=True, min_size=32, bbox_min_size=32),
69
+ pipeline=train_pipeline,
70
+ backend_args=backend_args)
71
+ ])))
72
+
73
+ val_dataloader = dict(
74
+ batch_size=1,
75
+ num_workers=2,
76
+ persistent_workers=True,
77
+ drop_last=False,
78
+ sampler=dict(type='DefaultSampler', shuffle=False),
79
+ dataset=dict(
80
+ type=dataset_type,
81
+ data_root=data_root,
82
+ ann_file='VOC2007/ImageSets/Main/test.txt',
83
+ data_prefix=dict(sub_data_root='VOC2007/'),
84
+ test_mode=True,
85
+ pipeline=test_pipeline,
86
+ backend_args=backend_args))
87
+ test_dataloader = val_dataloader
88
+
89
+ # Pascal VOC2007 uses `11points` as default evaluate mode, while PASCAL
90
+ # VOC2012 defaults to use 'area'.
91
+ val_evaluator = dict(type='VOCMetric', metric='mAP', eval_mode='11points')
92
+ test_evaluator = val_evaluator
configs/_base_/datasets/wider_face.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'WIDERFaceDataset'
3
+ data_root = 'data/WIDERFace/'
4
+ # Example to use different file client
5
+ # Method 1: simply set the data root and let the file I/O module
6
+ # automatically infer from prefix (not support LMDB and Memcache yet)
7
+
8
+ # data_root = 's3://openmmlab/datasets/detection/cityscapes/'
9
+
10
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
11
+ # backend_args = dict(
12
+ # backend='petrel',
13
+ # path_mapping=dict({
14
+ # './data/': 's3://openmmlab/datasets/detection/',
15
+ # 'data/': 's3://openmmlab/datasets/detection/'
16
+ # }))
17
+ backend_args = None
18
+
19
+ img_scale = (640, 640) # VGA resolution
20
+
21
+ train_pipeline = [
22
+ dict(type='LoadImageFromFile', backend_args=backend_args),
23
+ dict(type='LoadAnnotations', with_bbox=True),
24
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
25
+ dict(type='RandomFlip', prob=0.5),
26
+ dict(type='PackDetInputs')
27
+ ]
28
+ test_pipeline = [
29
+ dict(type='LoadImageFromFile', backend_args=backend_args),
30
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
31
+ dict(type='LoadAnnotations', with_bbox=True),
32
+ dict(
33
+ type='PackDetInputs',
34
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
35
+ 'scale_factor'))
36
+ ]
37
+
38
+ train_dataloader = dict(
39
+ batch_size=2,
40
+ num_workers=2,
41
+ persistent_workers=True,
42
+ drop_last=False,
43
+ sampler=dict(type='DefaultSampler', shuffle=True),
44
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
45
+ dataset=dict(
46
+ type=dataset_type,
47
+ data_root=data_root,
48
+ ann_file='train.txt',
49
+ data_prefix=dict(img='WIDER_train'),
50
+ filter_cfg=dict(filter_empty_gt=True, bbox_min_size=17, min_size=32),
51
+ pipeline=train_pipeline))
52
+
53
+ val_dataloader = dict(
54
+ batch_size=1,
55
+ num_workers=2,
56
+ persistent_workers=True,
57
+ drop_last=False,
58
+ sampler=dict(type='DefaultSampler', shuffle=False),
59
+ dataset=dict(
60
+ type=dataset_type,
61
+ data_root=data_root,
62
+ ann_file='val.txt',
63
+ data_prefix=dict(img='WIDER_val'),
64
+ test_mode=True,
65
+ pipeline=test_pipeline))
66
+ test_dataloader = val_dataloader
67
+
68
+ val_evaluator = dict(
69
+ # TODO: support WiderFace-Evaluation for easy, medium, hard cases
70
+ type='VOCMetric',
71
+ metric='mAP',
72
+ eval_mode='11points')
73
+ test_evaluator = val_evaluator
configs/_base_/default_runtime.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_scope = 'mmdet'
2
+
3
+ default_hooks = dict(
4
+ timer=dict(type='IterTimerHook'),
5
+ logger=dict(type='LoggerHook', interval=50),
6
+ param_scheduler=dict(type='ParamSchedulerHook'),
7
+ # checkpoint=dict(type='CheckpointHook',interval=-1, by_epoch=True, save_best='auto'),
8
+ checkpoint=dict(type='CheckpointHook', interval=999999, by_epoch=True),
9
+ sampler_seed=dict(type='DistSamplerSeedHook'),
10
+ visualization=dict(type='DetVisualizationHook'))
11
+
12
+ env_cfg = dict(
13
+ cudnn_benchmark=False,
14
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
15
+ dist_cfg=dict(backend='nccl'),
16
+ )
17
+
18
+ vis_backends = [dict(type='LocalVisBackend')]
19
+ visualizer = dict(
20
+ type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
21
+ log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
22
+
23
+ log_level = 'INFO'
24
+ load_from = None
25
+ resume = False
configs/_base_/models/cascade-mask-rcnn_r50_fpn.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='CascadeRCNN',
4
+ data_preprocessor=dict(
5
+ type='DetDataPreprocessor',
6
+ mean=[123.675, 116.28, 103.53],
7
+ std=[58.395, 57.12, 57.375],
8
+ bgr_to_rgb=True,
9
+ pad_mask=True,
10
+ pad_size_divisor=32),
11
+ backbone=dict(
12
+ type='ResNet',
13
+ depth=50,
14
+ num_stages=4,
15
+ out_indices=(0, 1, 2, 3),
16
+ frozen_stages=1,
17
+ norm_cfg=dict(type='BN', requires_grad=True),
18
+ norm_eval=True,
19
+ style='pytorch',
20
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
21
+ neck=dict(
22
+ type='FPN',
23
+ in_channels=[256, 512, 1024, 2048],
24
+ out_channels=256,
25
+ num_outs=5),
26
+ rpn_head=dict(
27
+ type='RPNHead',
28
+ in_channels=256,
29
+ feat_channels=256,
30
+ anchor_generator=dict(
31
+ type='AnchorGenerator',
32
+ scales=[8],
33
+ ratios=[0.5, 1.0, 2.0],
34
+ strides=[4, 8, 16, 32, 64]),
35
+ bbox_coder=dict(
36
+ type='DeltaXYWHBBoxCoder',
37
+ target_means=[.0, .0, .0, .0],
38
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
39
+ loss_cls=dict(
40
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
41
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
42
+ roi_head=dict(
43
+ type='CascadeRoIHead',
44
+ num_stages=3,
45
+ stage_loss_weights=[1, 0.5, 0.25],
46
+ bbox_roi_extractor=dict(
47
+ type='SingleRoIExtractor',
48
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
49
+ out_channels=256,
50
+ featmap_strides=[4, 8, 16, 32]),
51
+ bbox_head=[
52
+ dict(
53
+ type='Shared2FCBBoxHead',
54
+ in_channels=256,
55
+ fc_out_channels=1024,
56
+ roi_feat_size=7,
57
+ num_classes=80,
58
+ bbox_coder=dict(
59
+ type='DeltaXYWHBBoxCoder',
60
+ target_means=[0., 0., 0., 0.],
61
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
62
+ reg_class_agnostic=True,
63
+ loss_cls=dict(
64
+ type='CrossEntropyLoss',
65
+ use_sigmoid=False,
66
+ loss_weight=1.0),
67
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
68
+ loss_weight=1.0)),
69
+ dict(
70
+ type='Shared2FCBBoxHead',
71
+ in_channels=256,
72
+ fc_out_channels=1024,
73
+ roi_feat_size=7,
74
+ num_classes=80,
75
+ bbox_coder=dict(
76
+ type='DeltaXYWHBBoxCoder',
77
+ target_means=[0., 0., 0., 0.],
78
+ target_stds=[0.05, 0.05, 0.1, 0.1]),
79
+ reg_class_agnostic=True,
80
+ loss_cls=dict(
81
+ type='CrossEntropyLoss',
82
+ use_sigmoid=False,
83
+ loss_weight=1.0),
84
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
85
+ loss_weight=1.0)),
86
+ dict(
87
+ type='Shared2FCBBoxHead',
88
+ in_channels=256,
89
+ fc_out_channels=1024,
90
+ roi_feat_size=7,
91
+ num_classes=80,
92
+ bbox_coder=dict(
93
+ type='DeltaXYWHBBoxCoder',
94
+ target_means=[0., 0., 0., 0.],
95
+ target_stds=[0.033, 0.033, 0.067, 0.067]),
96
+ reg_class_agnostic=True,
97
+ loss_cls=dict(
98
+ type='CrossEntropyLoss',
99
+ use_sigmoid=False,
100
+ loss_weight=1.0),
101
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
102
+ ],
103
+ mask_roi_extractor=dict(
104
+ type='SingleRoIExtractor',
105
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
106
+ out_channels=256,
107
+ featmap_strides=[4, 8, 16, 32]),
108
+ mask_head=dict(
109
+ type='FCNMaskHead',
110
+ num_convs=4,
111
+ in_channels=256,
112
+ conv_out_channels=256,
113
+ num_classes=80,
114
+ loss_mask=dict(
115
+ type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
116
+ # model training and testing settings
117
+ train_cfg=dict(
118
+ rpn=dict(
119
+ assigner=dict(
120
+ type='MaxIoUAssigner',
121
+ pos_iou_thr=0.7,
122
+ neg_iou_thr=0.3,
123
+ min_pos_iou=0.3,
124
+ match_low_quality=True,
125
+ ignore_iof_thr=-1),
126
+ sampler=dict(
127
+ type='RandomSampler',
128
+ num=256,
129
+ pos_fraction=0.5,
130
+ neg_pos_ub=-1,
131
+ add_gt_as_proposals=False),
132
+ allowed_border=0,
133
+ pos_weight=-1,
134
+ debug=False),
135
+ rpn_proposal=dict(
136
+ nms_pre=2000,
137
+ max_per_img=2000,
138
+ nms=dict(type='nms', iou_threshold=0.7),
139
+ min_bbox_size=0),
140
+ rcnn=[
141
+ dict(
142
+ assigner=dict(
143
+ type='MaxIoUAssigner',
144
+ pos_iou_thr=0.5,
145
+ neg_iou_thr=0.5,
146
+ min_pos_iou=0.5,
147
+ match_low_quality=False,
148
+ ignore_iof_thr=-1),
149
+ sampler=dict(
150
+ type='RandomSampler',
151
+ num=512,
152
+ pos_fraction=0.25,
153
+ neg_pos_ub=-1,
154
+ add_gt_as_proposals=True),
155
+ mask_size=28,
156
+ pos_weight=-1,
157
+ debug=False),
158
+ dict(
159
+ assigner=dict(
160
+ type='MaxIoUAssigner',
161
+ pos_iou_thr=0.6,
162
+ neg_iou_thr=0.6,
163
+ min_pos_iou=0.6,
164
+ match_low_quality=False,
165
+ ignore_iof_thr=-1),
166
+ sampler=dict(
167
+ type='RandomSampler',
168
+ num=512,
169
+ pos_fraction=0.25,
170
+ neg_pos_ub=-1,
171
+ add_gt_as_proposals=True),
172
+ mask_size=28,
173
+ pos_weight=-1,
174
+ debug=False),
175
+ dict(
176
+ assigner=dict(
177
+ type='MaxIoUAssigner',
178
+ pos_iou_thr=0.7,
179
+ neg_iou_thr=0.7,
180
+ min_pos_iou=0.7,
181
+ match_low_quality=False,
182
+ ignore_iof_thr=-1),
183
+ sampler=dict(
184
+ type='RandomSampler',
185
+ num=512,
186
+ pos_fraction=0.25,
187
+ neg_pos_ub=-1,
188
+ add_gt_as_proposals=True),
189
+ mask_size=28,
190
+ pos_weight=-1,
191
+ debug=False)
192
+ ]),
193
+ test_cfg=dict(
194
+ rpn=dict(
195
+ nms_pre=1000,
196
+ max_per_img=1000,
197
+ nms=dict(type='nms', iou_threshold=0.7),
198
+ min_bbox_size=0),
199
+ rcnn=dict(
200
+ score_thr=0.05,
201
+ nms=dict(type='nms', iou_threshold=0.5),
202
+ max_per_img=100,
203
+ mask_thr_binary=0.5)))
configs/_base_/models/cascade-rcnn_r50_fpn.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='CascadeRCNN',
4
+ data_preprocessor=dict(
5
+ type='DetDataPreprocessor',
6
+ mean=[123.675, 116.28, 103.53],
7
+ std=[58.395, 57.12, 57.375],
8
+ bgr_to_rgb=True,
9
+ pad_size_divisor=32),
10
+ backbone=dict(
11
+ type='ResNet',
12
+ depth=50,
13
+ num_stages=4,
14
+ out_indices=(0, 1, 2, 3),
15
+ frozen_stages=1,
16
+ norm_cfg=dict(type='BN', requires_grad=True),
17
+ norm_eval=True,
18
+ style='pytorch',
19
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
20
+ neck=dict(
21
+ type='FPN',
22
+ in_channels=[256, 512, 1024, 2048],
23
+ out_channels=256,
24
+ num_outs=5),
25
+ rpn_head=dict(
26
+ type='RPNHead',
27
+ in_channels=256,
28
+ feat_channels=256,
29
+ anchor_generator=dict(
30
+ type='AnchorGenerator',
31
+ scales=[8],
32
+ ratios=[0.5, 1.0, 2.0],
33
+ strides=[4, 8, 16, 32, 64]),
34
+ bbox_coder=dict(
35
+ type='DeltaXYWHBBoxCoder',
36
+ target_means=[.0, .0, .0, .0],
37
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
38
+ loss_cls=dict(
39
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
40
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
41
+ roi_head=dict(
42
+ type='CascadeRoIHead',
43
+ num_stages=3,
44
+ stage_loss_weights=[1, 0.5, 0.25],
45
+ bbox_roi_extractor=dict(
46
+ type='SingleRoIExtractor',
47
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
48
+ out_channels=256,
49
+ featmap_strides=[4, 8, 16, 32]),
50
+ bbox_head=[
51
+ dict(
52
+ type='Shared2FCBBoxHead',
53
+ in_channels=256,
54
+ fc_out_channels=1024,
55
+ roi_feat_size=7,
56
+ num_classes=80,
57
+ bbox_coder=dict(
58
+ type='DeltaXYWHBBoxCoder',
59
+ target_means=[0., 0., 0., 0.],
60
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
61
+ reg_class_agnostic=True,
62
+ loss_cls=dict(
63
+ type='CrossEntropyLoss',
64
+ use_sigmoid=False,
65
+ loss_weight=1.0),
66
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
67
+ loss_weight=1.0)),
68
+ dict(
69
+ type='Shared2FCBBoxHead',
70
+ in_channels=256,
71
+ fc_out_channels=1024,
72
+ roi_feat_size=7,
73
+ num_classes=80,
74
+ bbox_coder=dict(
75
+ type='DeltaXYWHBBoxCoder',
76
+ target_means=[0., 0., 0., 0.],
77
+ target_stds=[0.05, 0.05, 0.1, 0.1]),
78
+ reg_class_agnostic=True,
79
+ loss_cls=dict(
80
+ type='CrossEntropyLoss',
81
+ use_sigmoid=False,
82
+ loss_weight=1.0),
83
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
84
+ loss_weight=1.0)),
85
+ dict(
86
+ type='Shared2FCBBoxHead',
87
+ in_channels=256,
88
+ fc_out_channels=1024,
89
+ roi_feat_size=7,
90
+ num_classes=80,
91
+ bbox_coder=dict(
92
+ type='DeltaXYWHBBoxCoder',
93
+ target_means=[0., 0., 0., 0.],
94
+ target_stds=[0.033, 0.033, 0.067, 0.067]),
95
+ reg_class_agnostic=True,
96
+ loss_cls=dict(
97
+ type='CrossEntropyLoss',
98
+ use_sigmoid=False,
99
+ loss_weight=1.0),
100
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
101
+ ]),
102
+ # model training and testing settings
103
+ train_cfg=dict(
104
+ rpn=dict(
105
+ assigner=dict(
106
+ type='MaxIoUAssigner',
107
+ pos_iou_thr=0.7,
108
+ neg_iou_thr=0.3,
109
+ min_pos_iou=0.3,
110
+ match_low_quality=True,
111
+ ignore_iof_thr=-1),
112
+ sampler=dict(
113
+ type='RandomSampler',
114
+ num=256,
115
+ pos_fraction=0.5,
116
+ neg_pos_ub=-1,
117
+ add_gt_as_proposals=False),
118
+ allowed_border=0,
119
+ pos_weight=-1,
120
+ debug=False),
121
+ rpn_proposal=dict(
122
+ nms_pre=2000,
123
+ max_per_img=2000,
124
+ nms=dict(type='nms', iou_threshold=0.7),
125
+ min_bbox_size=0),
126
+ rcnn=[
127
+ dict(
128
+ assigner=dict(
129
+ type='MaxIoUAssigner',
130
+ pos_iou_thr=0.5,
131
+ neg_iou_thr=0.5,
132
+ min_pos_iou=0.5,
133
+ match_low_quality=False,
134
+ ignore_iof_thr=-1),
135
+ sampler=dict(
136
+ type='RandomSampler',
137
+ num=512,
138
+ pos_fraction=0.25,
139
+ neg_pos_ub=-1,
140
+ add_gt_as_proposals=True),
141
+ pos_weight=-1,
142
+ debug=False),
143
+ dict(
144
+ assigner=dict(
145
+ type='MaxIoUAssigner',
146
+ pos_iou_thr=0.6,
147
+ neg_iou_thr=0.6,
148
+ min_pos_iou=0.6,
149
+ match_low_quality=False,
150
+ ignore_iof_thr=-1),
151
+ sampler=dict(
152
+ type='RandomSampler',
153
+ num=512,
154
+ pos_fraction=0.25,
155
+ neg_pos_ub=-1,
156
+ add_gt_as_proposals=True),
157
+ pos_weight=-1,
158
+ debug=False),
159
+ dict(
160
+ assigner=dict(
161
+ type='MaxIoUAssigner',
162
+ pos_iou_thr=0.7,
163
+ neg_iou_thr=0.7,
164
+ min_pos_iou=0.7,
165
+ match_low_quality=False,
166
+ ignore_iof_thr=-1),
167
+ sampler=dict(
168
+ type='RandomSampler',
169
+ num=512,
170
+ pos_fraction=0.25,
171
+ neg_pos_ub=-1,
172
+ add_gt_as_proposals=True),
173
+ pos_weight=-1,
174
+ debug=False)
175
+ ]),
176
+ test_cfg=dict(
177
+ rpn=dict(
178
+ nms_pre=1000,
179
+ max_per_img=1000,
180
+ nms=dict(type='nms', iou_threshold=0.7),
181
+ min_bbox_size=0),
182
+ rcnn=dict(
183
+ score_thr=0.05,
184
+ nms=dict(type='nms', iou_threshold=0.5),
185
+ max_per_img=100)))
configs/_base_/models/fast-rcnn_r50_fpn.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='FastRCNN',
4
+ data_preprocessor=dict(
5
+ type='DetDataPreprocessor',
6
+ mean=[123.675, 116.28, 103.53],
7
+ std=[58.395, 57.12, 57.375],
8
+ bgr_to_rgb=True,
9
+ pad_size_divisor=32),
10
+ backbone=dict(
11
+ type='ResNet',
12
+ depth=50,
13
+ num_stages=4,
14
+ out_indices=(0, 1, 2, 3),
15
+ frozen_stages=1,
16
+ norm_cfg=dict(type='BN', requires_grad=True),
17
+ norm_eval=True,
18
+ style='pytorch',
19
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
20
+ neck=dict(
21
+ type='FPN',
22
+ in_channels=[256, 512, 1024, 2048],
23
+ out_channels=256,
24
+ num_outs=5),
25
+ roi_head=dict(
26
+ type='StandardRoIHead',
27
+ bbox_roi_extractor=dict(
28
+ type='SingleRoIExtractor',
29
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
30
+ out_channels=256,
31
+ featmap_strides=[4, 8, 16, 32]),
32
+ bbox_head=dict(
33
+ type='Shared2FCBBoxHead',
34
+ in_channels=256,
35
+ fc_out_channels=1024,
36
+ roi_feat_size=7,
37
+ num_classes=80,
38
+ bbox_coder=dict(
39
+ type='DeltaXYWHBBoxCoder',
40
+ target_means=[0., 0., 0., 0.],
41
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
42
+ reg_class_agnostic=False,
43
+ loss_cls=dict(
44
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
45
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
46
+ # model training and testing settings
47
+ train_cfg=dict(
48
+ rcnn=dict(
49
+ assigner=dict(
50
+ type='MaxIoUAssigner',
51
+ pos_iou_thr=0.5,
52
+ neg_iou_thr=0.5,
53
+ min_pos_iou=0.5,
54
+ match_low_quality=False,
55
+ ignore_iof_thr=-1),
56
+ sampler=dict(
57
+ type='RandomSampler',
58
+ num=512,
59
+ pos_fraction=0.25,
60
+ neg_pos_ub=-1,
61
+ add_gt_as_proposals=True),
62
+ pos_weight=-1,
63
+ debug=False)),
64
+ test_cfg=dict(
65
+ rcnn=dict(
66
+ score_thr=0.05,
67
+ nms=dict(type='nms', iou_threshold=0.5),
68
+ max_per_img=100)))
configs/_base_/models/faster-rcnn_r50-caffe-c4.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='BN', requires_grad=False)
3
+ model = dict(
4
+ type='FasterRCNN',
5
+ data_preprocessor=dict(
6
+ type='DetDataPreprocessor',
7
+ mean=[103.530, 116.280, 123.675],
8
+ std=[1.0, 1.0, 1.0],
9
+ bgr_to_rgb=False,
10
+ pad_size_divisor=32),
11
+ backbone=dict(
12
+ type='ResNet',
13
+ depth=50,
14
+ num_stages=3,
15
+ strides=(1, 2, 2),
16
+ dilations=(1, 1, 1),
17
+ out_indices=(2, ),
18
+ frozen_stages=1,
19
+ norm_cfg=norm_cfg,
20
+ norm_eval=True,
21
+ style='caffe',
22
+ init_cfg=dict(
23
+ type='Pretrained',
24
+ checkpoint='open-mmlab://detectron2/resnet50_caffe')),
25
+ rpn_head=dict(
26
+ type='RPNHead',
27
+ in_channels=1024,
28
+ feat_channels=1024,
29
+ anchor_generator=dict(
30
+ type='AnchorGenerator',
31
+ scales=[2, 4, 8, 16, 32],
32
+ ratios=[0.5, 1.0, 2.0],
33
+ strides=[16]),
34
+ bbox_coder=dict(
35
+ type='DeltaXYWHBBoxCoder',
36
+ target_means=[.0, .0, .0, .0],
37
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
38
+ loss_cls=dict(
39
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
40
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
41
+ roi_head=dict(
42
+ type='StandardRoIHead',
43
+ shared_head=dict(
44
+ type='ResLayer',
45
+ depth=50,
46
+ stage=3,
47
+ stride=2,
48
+ dilation=1,
49
+ style='caffe',
50
+ norm_cfg=norm_cfg,
51
+ norm_eval=True,
52
+ init_cfg=dict(
53
+ type='Pretrained',
54
+ checkpoint='open-mmlab://detectron2/resnet50_caffe')),
55
+ bbox_roi_extractor=dict(
56
+ type='SingleRoIExtractor',
57
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
58
+ out_channels=1024,
59
+ featmap_strides=[16]),
60
+ bbox_head=dict(
61
+ type='BBoxHead',
62
+ with_avg_pool=True,
63
+ roi_feat_size=7,
64
+ in_channels=2048,
65
+ num_classes=80,
66
+ bbox_coder=dict(
67
+ type='DeltaXYWHBBoxCoder',
68
+ target_means=[0., 0., 0., 0.],
69
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
70
+ reg_class_agnostic=False,
71
+ loss_cls=dict(
72
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
73
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
74
+ # model training and testing settings
75
+ train_cfg=dict(
76
+ rpn=dict(
77
+ assigner=dict(
78
+ type='MaxIoUAssigner',
79
+ pos_iou_thr=0.7,
80
+ neg_iou_thr=0.3,
81
+ min_pos_iou=0.3,
82
+ match_low_quality=True,
83
+ ignore_iof_thr=-1),
84
+ sampler=dict(
85
+ type='RandomSampler',
86
+ num=256,
87
+ pos_fraction=0.5,
88
+ neg_pos_ub=-1,
89
+ add_gt_as_proposals=False),
90
+ allowed_border=-1,
91
+ pos_weight=-1,
92
+ debug=False),
93
+ rpn_proposal=dict(
94
+ nms_pre=12000,
95
+ max_per_img=2000,
96
+ nms=dict(type='nms', iou_threshold=0.7),
97
+ min_bbox_size=0),
98
+ rcnn=dict(
99
+ assigner=dict(
100
+ type='MaxIoUAssigner',
101
+ pos_iou_thr=0.5,
102
+ neg_iou_thr=0.5,
103
+ min_pos_iou=0.5,
104
+ match_low_quality=False,
105
+ ignore_iof_thr=-1),
106
+ sampler=dict(
107
+ type='RandomSampler',
108
+ num=512,
109
+ pos_fraction=0.25,
110
+ neg_pos_ub=-1,
111
+ add_gt_as_proposals=True),
112
+ pos_weight=-1,
113
+ debug=False)),
114
+ test_cfg=dict(
115
+ rpn=dict(
116
+ nms_pre=6000,
117
+ max_per_img=1000,
118
+ nms=dict(type='nms', iou_threshold=0.7),
119
+ min_bbox_size=0),
120
+ rcnn=dict(
121
+ score_thr=0.05,
122
+ nms=dict(type='nms', iou_threshold=0.5),
123
+ max_per_img=100)))
configs/_base_/models/faster-rcnn_r50-caffe-dc5.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='BN', requires_grad=False)
3
+ model = dict(
4
+ type='FasterRCNN',
5
+ data_preprocessor=dict(
6
+ type='DetDataPreprocessor',
7
+ mean=[103.530, 116.280, 123.675],
8
+ std=[1.0, 1.0, 1.0],
9
+ bgr_to_rgb=False,
10
+ pad_size_divisor=32),
11
+ backbone=dict(
12
+ type='ResNet',
13
+ depth=50,
14
+ num_stages=4,
15
+ strides=(1, 2, 2, 1),
16
+ dilations=(1, 1, 1, 2),
17
+ out_indices=(3, ),
18
+ frozen_stages=1,
19
+ norm_cfg=norm_cfg,
20
+ norm_eval=True,
21
+ style='caffe',
22
+ init_cfg=dict(
23
+ type='Pretrained',
24
+ checkpoint='open-mmlab://detectron2/resnet50_caffe')),
25
+ rpn_head=dict(
26
+ type='RPNHead',
27
+ in_channels=2048,
28
+ feat_channels=2048,
29
+ anchor_generator=dict(
30
+ type='AnchorGenerator',
31
+ scales=[2, 4, 8, 16, 32],
32
+ ratios=[0.5, 1.0, 2.0],
33
+ strides=[16]),
34
+ bbox_coder=dict(
35
+ type='DeltaXYWHBBoxCoder',
36
+ target_means=[.0, .0, .0, .0],
37
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
38
+ loss_cls=dict(
39
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
40
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
41
+ roi_head=dict(
42
+ type='StandardRoIHead',
43
+ bbox_roi_extractor=dict(
44
+ type='SingleRoIExtractor',
45
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
46
+ out_channels=2048,
47
+ featmap_strides=[16]),
48
+ bbox_head=dict(
49
+ type='Shared2FCBBoxHead',
50
+ in_channels=2048,
51
+ fc_out_channels=1024,
52
+ roi_feat_size=7,
53
+ num_classes=80,
54
+ bbox_coder=dict(
55
+ type='DeltaXYWHBBoxCoder',
56
+ target_means=[0., 0., 0., 0.],
57
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
58
+ reg_class_agnostic=False,
59
+ loss_cls=dict(
60
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
61
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
62
+ # model training and testing settings
63
+ train_cfg=dict(
64
+ rpn=dict(
65
+ assigner=dict(
66
+ type='MaxIoUAssigner',
67
+ pos_iou_thr=0.7,
68
+ neg_iou_thr=0.3,
69
+ min_pos_iou=0.3,
70
+ match_low_quality=True,
71
+ ignore_iof_thr=-1),
72
+ sampler=dict(
73
+ type='RandomSampler',
74
+ num=256,
75
+ pos_fraction=0.5,
76
+ neg_pos_ub=-1,
77
+ add_gt_as_proposals=False),
78
+ allowed_border=0,
79
+ pos_weight=-1,
80
+ debug=False),
81
+ rpn_proposal=dict(
82
+ nms_pre=12000,
83
+ max_per_img=2000,
84
+ nms=dict(type='nms', iou_threshold=0.7),
85
+ min_bbox_size=0),
86
+ rcnn=dict(
87
+ assigner=dict(
88
+ type='MaxIoUAssigner',
89
+ pos_iou_thr=0.5,
90
+ neg_iou_thr=0.5,
91
+ min_pos_iou=0.5,
92
+ match_low_quality=False,
93
+ ignore_iof_thr=-1),
94
+ sampler=dict(
95
+ type='RandomSampler',
96
+ num=512,
97
+ pos_fraction=0.25,
98
+ neg_pos_ub=-1,
99
+ add_gt_as_proposals=True),
100
+ pos_weight=-1,
101
+ debug=False)),
102
+ test_cfg=dict(
103
+ rpn=dict(
104
+ nms=dict(type='nms', iou_threshold=0.7),
105
+ nms_pre=6000,
106
+ max_per_img=1000,
107
+ min_bbox_size=0),
108
+ rcnn=dict(
109
+ score_thr=0.05,
110
+ nms=dict(type='nms', iou_threshold=0.5),
111
+ max_per_img=100)))
configs/_base_/models/faster-rcnn_r50_fpn.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='FasterRCNN',
4
+ data_preprocessor=dict(
5
+ type='DetDataPreprocessor',
6
+ mean=[123.675, 116.28, 103.53],
7
+ std=[58.395, 57.12, 57.375],
8
+ bgr_to_rgb=True,
9
+ pad_size_divisor=32),
10
+ backbone=dict(
11
+ type='ResNet',
12
+ depth=50,
13
+ num_stages=4,
14
+ out_indices=(0, 1, 2, 3),
15
+ frozen_stages=1,
16
+ norm_cfg=dict(type='BN', requires_grad=True),
17
+ norm_eval=True,
18
+ style='pytorch',
19
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
20
+ neck=dict(
21
+ type='FPN',
22
+ in_channels=[256, 512, 1024, 2048],
23
+ out_channels=256,
24
+ num_outs=5),
25
+ rpn_head=dict(
26
+ type='RPNHead',
27
+ in_channels=256,
28
+ feat_channels=256,
29
+ anchor_generator=dict(
30
+ type='AnchorGenerator',
31
+ scales=[8],
32
+ ratios=[0.5, 1.0, 2.0],
33
+ strides=[4, 8, 16, 32, 64]),
34
+ bbox_coder=dict(
35
+ type='DeltaXYWHBBoxCoder',
36
+ target_means=[.0, .0, .0, .0],
37
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
38
+ loss_cls=dict(
39
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
40
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
41
+ roi_head=dict(
42
+ type='StandardRoIHead',
43
+ bbox_roi_extractor=dict(
44
+ type='SingleRoIExtractor',
45
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
46
+ out_channels=256,
47
+ featmap_strides=[4, 8, 16, 32]),
48
+ bbox_head=dict(
49
+ type='Shared2FCBBoxHead',
50
+ in_channels=256,
51
+ fc_out_channels=1024,
52
+ roi_feat_size=7,
53
+ num_classes=80,
54
+ bbox_coder=dict(
55
+ type='DeltaXYWHBBoxCoder',
56
+ target_means=[0., 0., 0., 0.],
57
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
58
+ reg_class_agnostic=False,
59
+ loss_cls=dict(
60
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
61
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
62
+ # model training and testing settings
63
+ train_cfg=dict(
64
+ rpn=dict(
65
+ assigner=dict(
66
+ type='MaxIoUAssigner',
67
+ pos_iou_thr=0.7,
68
+ neg_iou_thr=0.3,
69
+ min_pos_iou=0.3,
70
+ match_low_quality=True,
71
+ ignore_iof_thr=-1),
72
+ sampler=dict(
73
+ type='RandomSampler',
74
+ num=256,
75
+ pos_fraction=0.5,
76
+ neg_pos_ub=-1,
77
+ add_gt_as_proposals=False),
78
+ allowed_border=-1,
79
+ pos_weight=-1,
80
+ debug=False),
81
+ rpn_proposal=dict(
82
+ nms_pre=2000,
83
+ max_per_img=1000,
84
+ nms=dict(type='nms', iou_threshold=0.7),
85
+ min_bbox_size=0),
86
+ rcnn=dict(
87
+ assigner=dict(
88
+ type='MaxIoUAssigner',
89
+ pos_iou_thr=0.5,
90
+ neg_iou_thr=0.5,
91
+ min_pos_iou=0.5,
92
+ match_low_quality=False,
93
+ ignore_iof_thr=-1),
94
+ sampler=dict(
95
+ type='RandomSampler',
96
+ num=512,
97
+ pos_fraction=0.25,
98
+ neg_pos_ub=-1,
99
+ add_gt_as_proposals=True),
100
+ pos_weight=-1,
101
+ debug=False)),
102
+ test_cfg=dict(
103
+ rpn=dict(
104
+ nms_pre=1000,
105
+ max_per_img=1000,
106
+ nms=dict(type='nms', iou_threshold=0.7),
107
+ min_bbox_size=0),
108
+ rcnn=dict(
109
+ score_thr=0.05,
110
+ nms=dict(type='nms', iou_threshold=0.5),
111
+ max_per_img=100)
112
+ # soft-nms is also supported for rcnn testing
113
+ # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
114
+ ))
configs/_base_/models/mask-rcnn_r50-caffe-c4.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='BN', requires_grad=False)
3
+ model = dict(
4
+ type='MaskRCNN',
5
+ data_preprocessor=dict(
6
+ type='DetDataPreprocessor',
7
+ mean=[103.530, 116.280, 123.675],
8
+ std=[1.0, 1.0, 1.0],
9
+ bgr_to_rgb=False,
10
+ pad_mask=True,
11
+ pad_size_divisor=32),
12
+ backbone=dict(
13
+ type='ResNet',
14
+ depth=50,
15
+ num_stages=3,
16
+ strides=(1, 2, 2),
17
+ dilations=(1, 1, 1),
18
+ out_indices=(2, ),
19
+ frozen_stages=1,
20
+ norm_cfg=norm_cfg,
21
+ norm_eval=True,
22
+ style='caffe',
23
+ init_cfg=dict(
24
+ type='Pretrained',
25
+ checkpoint='open-mmlab://detectron2/resnet50_caffe')),
26
+ rpn_head=dict(
27
+ type='RPNHead',
28
+ in_channels=1024,
29
+ feat_channels=1024,
30
+ anchor_generator=dict(
31
+ type='AnchorGenerator',
32
+ scales=[2, 4, 8, 16, 32],
33
+ ratios=[0.5, 1.0, 2.0],
34
+ strides=[16]),
35
+ bbox_coder=dict(
36
+ type='DeltaXYWHBBoxCoder',
37
+ target_means=[.0, .0, .0, .0],
38
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
39
+ loss_cls=dict(
40
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
41
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
42
+ roi_head=dict(
43
+ type='StandardRoIHead',
44
+ shared_head=dict(
45
+ type='ResLayer',
46
+ depth=50,
47
+ stage=3,
48
+ stride=2,
49
+ dilation=1,
50
+ style='caffe',
51
+ norm_cfg=norm_cfg,
52
+ norm_eval=True),
53
+ bbox_roi_extractor=dict(
54
+ type='SingleRoIExtractor',
55
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
56
+ out_channels=1024,
57
+ featmap_strides=[16]),
58
+ bbox_head=dict(
59
+ type='BBoxHead',
60
+ with_avg_pool=True,
61
+ roi_feat_size=7,
62
+ in_channels=2048,
63
+ num_classes=80,
64
+ bbox_coder=dict(
65
+ type='DeltaXYWHBBoxCoder',
66
+ target_means=[0., 0., 0., 0.],
67
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
68
+ reg_class_agnostic=False,
69
+ loss_cls=dict(
70
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
71
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
72
+ mask_roi_extractor=None,
73
+ mask_head=dict(
74
+ type='FCNMaskHead',
75
+ num_convs=0,
76
+ in_channels=2048,
77
+ conv_out_channels=256,
78
+ num_classes=80,
79
+ loss_mask=dict(
80
+ type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
81
+ # model training and testing settings
82
+ train_cfg=dict(
83
+ rpn=dict(
84
+ assigner=dict(
85
+ type='MaxIoUAssigner',
86
+ pos_iou_thr=0.7,
87
+ neg_iou_thr=0.3,
88
+ min_pos_iou=0.3,
89
+ match_low_quality=True,
90
+ ignore_iof_thr=-1),
91
+ sampler=dict(
92
+ type='RandomSampler',
93
+ num=256,
94
+ pos_fraction=0.5,
95
+ neg_pos_ub=-1,
96
+ add_gt_as_proposals=False),
97
+ allowed_border=0,
98
+ pos_weight=-1,
99
+ debug=False),
100
+ rpn_proposal=dict(
101
+ nms_pre=12000,
102
+ max_per_img=2000,
103
+ nms=dict(type='nms', iou_threshold=0.7),
104
+ min_bbox_size=0),
105
+ rcnn=dict(
106
+ assigner=dict(
107
+ type='MaxIoUAssigner',
108
+ pos_iou_thr=0.5,
109
+ neg_iou_thr=0.5,
110
+ min_pos_iou=0.5,
111
+ match_low_quality=False,
112
+ ignore_iof_thr=-1),
113
+ sampler=dict(
114
+ type='RandomSampler',
115
+ num=512,
116
+ pos_fraction=0.25,
117
+ neg_pos_ub=-1,
118
+ add_gt_as_proposals=True),
119
+ mask_size=14,
120
+ pos_weight=-1,
121
+ debug=False)),
122
+ test_cfg=dict(
123
+ rpn=dict(
124
+ nms_pre=6000,
125
+ nms=dict(type='nms', iou_threshold=0.7),
126
+ max_per_img=1000,
127
+ min_bbox_size=0),
128
+ rcnn=dict(
129
+ score_thr=0.05,
130
+ nms=dict(type='nms', iou_threshold=0.5),
131
+ max_per_img=100,
132
+ mask_thr_binary=0.5)))
configs/_base_/models/mask-rcnn_r50_fpn.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='MaskRCNN',
4
+ data_preprocessor=dict(
5
+ type='DetDataPreprocessor',
6
+ mean=[123.675, 116.28, 103.53],
7
+ std=[58.395, 57.12, 57.375],
8
+ bgr_to_rgb=True,
9
+ pad_mask=True,
10
+ pad_size_divisor=32),
11
+ backbone=dict(
12
+ type='ResNet',
13
+ depth=50,
14
+ num_stages=4,
15
+ out_indices=(0, 1, 2, 3),
16
+ frozen_stages=1,
17
+ norm_cfg=dict(type='BN', requires_grad=True),
18
+ norm_eval=True,
19
+ style='pytorch',
20
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
21
+ neck=dict(
22
+ type='FPN',
23
+ in_channels=[256, 512, 1024, 2048],
24
+ out_channels=256,
25
+ num_outs=5),
26
+ rpn_head=dict(
27
+ type='RPNHead',
28
+ in_channels=256,
29
+ feat_channels=256,
30
+ anchor_generator=dict(
31
+ type='AnchorGenerator',
32
+ scales=[8],
33
+ ratios=[0.5, 1.0, 2.0],
34
+ strides=[4, 8, 16, 32, 64]),
35
+ bbox_coder=dict(
36
+ type='DeltaXYWHBBoxCoder',
37
+ target_means=[.0, .0, .0, .0],
38
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
39
+ loss_cls=dict(
40
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
41
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
42
+ roi_head=dict(
43
+ type='StandardRoIHead',
44
+ bbox_roi_extractor=dict(
45
+ type='SingleRoIExtractor',
46
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
47
+ out_channels=256,
48
+ featmap_strides=[4, 8, 16, 32]),
49
+ bbox_head=dict(
50
+ type='Shared2FCBBoxHead',
51
+ in_channels=256,
52
+ fc_out_channels=1024,
53
+ roi_feat_size=7,
54
+ num_classes=80,
55
+ bbox_coder=dict(
56
+ type='DeltaXYWHBBoxCoder',
57
+ target_means=[0., 0., 0., 0.],
58
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
59
+ reg_class_agnostic=False,
60
+ loss_cls=dict(
61
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
62
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
63
+ mask_roi_extractor=dict(
64
+ type='SingleRoIExtractor',
65
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
66
+ out_channels=256,
67
+ featmap_strides=[4, 8, 16, 32]),
68
+ mask_head=dict(
69
+ type='FCNMaskHead',
70
+ num_convs=4,
71
+ in_channels=256,
72
+ conv_out_channels=256,
73
+ num_classes=80,
74
+ loss_mask=dict(
75
+ type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
76
+ # model training and testing settings
77
+ train_cfg=dict(
78
+ rpn=dict(
79
+ assigner=dict(
80
+ type='MaxIoUAssigner',
81
+ pos_iou_thr=0.7,
82
+ neg_iou_thr=0.3,
83
+ min_pos_iou=0.3,
84
+ match_low_quality=True,
85
+ ignore_iof_thr=-1),
86
+ sampler=dict(
87
+ type='RandomSampler',
88
+ num=256,
89
+ pos_fraction=0.5,
90
+ neg_pos_ub=-1,
91
+ add_gt_as_proposals=False),
92
+ allowed_border=-1,
93
+ pos_weight=-1,
94
+ debug=False),
95
+ rpn_proposal=dict(
96
+ nms_pre=2000,
97
+ max_per_img=1000,
98
+ nms=dict(type='nms', iou_threshold=0.7),
99
+ min_bbox_size=0),
100
+ rcnn=dict(
101
+ assigner=dict(
102
+ type='MaxIoUAssigner',
103
+ pos_iou_thr=0.5,
104
+ neg_iou_thr=0.5,
105
+ min_pos_iou=0.5,
106
+ match_low_quality=True,
107
+ ignore_iof_thr=-1),
108
+ sampler=dict(
109
+ type='RandomSampler',
110
+ num=512,
111
+ pos_fraction=0.25,
112
+ neg_pos_ub=-1,
113
+ add_gt_as_proposals=True),
114
+ mask_size=28,
115
+ pos_weight=-1,
116
+ debug=False)),
117
+ test_cfg=dict(
118
+ rpn=dict(
119
+ nms_pre=1000,
120
+ max_per_img=1000,
121
+ nms=dict(type='nms', iou_threshold=0.7),
122
+ min_bbox_size=0),
123
+ rcnn=dict(
124
+ score_thr=0.05,
125
+ nms=dict(type='nms', iou_threshold=0.5),
126
+ max_per_img=100,
127
+ mask_thr_binary=0.5)))
configs/_base_/models/retinanet_r50_fpn.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='RetinaNet',
4
+ data_preprocessor=dict(
5
+ type='DetDataPreprocessor',
6
+ mean=[123.675, 116.28, 103.53],
7
+ std=[58.395, 57.12, 57.375],
8
+ bgr_to_rgb=True,
9
+ pad_size_divisor=32),
10
+ backbone=dict(
11
+ type='ResNet',
12
+ depth=50,
13
+ num_stages=4,
14
+ out_indices=(0, 1, 2, 3),
15
+ frozen_stages=1,
16
+ norm_cfg=dict(type='BN', requires_grad=True),
17
+ norm_eval=True,
18
+ style='pytorch',
19
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
20
+ neck=dict(
21
+ type='FPN',
22
+ in_channels=[256, 512, 1024, 2048],
23
+ out_channels=256,
24
+ start_level=1,
25
+ add_extra_convs='on_input',
26
+ num_outs=5),
27
+ bbox_head=dict(
28
+ type='RetinaHead',
29
+ num_classes=80,
30
+ in_channels=256,
31
+ stacked_convs=4,
32
+ feat_channels=256,
33
+ anchor_generator=dict(
34
+ type='AnchorGenerator',
35
+ octave_base_scale=4,
36
+ scales_per_octave=3,
37
+ ratios=[0.5, 1.0, 2.0],
38
+ strides=[8, 16, 32, 64, 128]),
39
+ bbox_coder=dict(
40
+ type='DeltaXYWHBBoxCoder',
41
+ target_means=[.0, .0, .0, .0],
42
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
43
+ loss_cls=dict(
44
+ type='FocalLoss',
45
+ use_sigmoid=True,
46
+ gamma=2.0,
47
+ alpha=0.25,
48
+ loss_weight=1.0),
49
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
50
+ # model training and testing settings
51
+ train_cfg=dict(
52
+ assigner=dict(
53
+ type='MaxIoUAssigner',
54
+ pos_iou_thr=0.5,
55
+ neg_iou_thr=0.4,
56
+ min_pos_iou=0,
57
+ ignore_iof_thr=-1),
58
+ sampler=dict(
59
+ type='PseudoSampler'), # Focal loss should use PseudoSampler
60
+ allowed_border=-1,
61
+ pos_weight=-1,
62
+ debug=False),
63
+ test_cfg=dict(
64
+ nms_pre=1000,
65
+ min_bbox_size=0,
66
+ score_thr=0.05,
67
+ nms=dict(type='nms', iou_threshold=0.5),
68
+ max_per_img=100))
configs/_base_/models/rpn_r50-caffe-c4.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='RPN',
4
+ data_preprocessor=dict(
5
+ type='DetDataPreprocessor',
6
+ mean=[103.530, 116.280, 123.675],
7
+ std=[1.0, 1.0, 1.0],
8
+ bgr_to_rgb=False,
9
+ pad_size_divisor=32),
10
+ backbone=dict(
11
+ type='ResNet',
12
+ depth=50,
13
+ num_stages=3,
14
+ strides=(1, 2, 2),
15
+ dilations=(1, 1, 1),
16
+ out_indices=(2, ),
17
+ frozen_stages=1,
18
+ norm_cfg=dict(type='BN', requires_grad=False),
19
+ norm_eval=True,
20
+ style='caffe',
21
+ init_cfg=dict(
22
+ type='Pretrained',
23
+ checkpoint='open-mmlab://detectron2/resnet50_caffe')),
24
+ neck=None,
25
+ rpn_head=dict(
26
+ type='RPNHead',
27
+ in_channels=1024,
28
+ feat_channels=1024,
29
+ anchor_generator=dict(
30
+ type='AnchorGenerator',
31
+ scales=[2, 4, 8, 16, 32],
32
+ ratios=[0.5, 1.0, 2.0],
33
+ strides=[16]),
34
+ bbox_coder=dict(
35
+ type='DeltaXYWHBBoxCoder',
36
+ target_means=[.0, .0, .0, .0],
37
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
38
+ loss_cls=dict(
39
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
40
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
41
+ # model training and testing settings
42
+ train_cfg=dict(
43
+ rpn=dict(
44
+ assigner=dict(
45
+ type='MaxIoUAssigner',
46
+ pos_iou_thr=0.7,
47
+ neg_iou_thr=0.3,
48
+ min_pos_iou=0.3,
49
+ ignore_iof_thr=-1),
50
+ sampler=dict(
51
+ type='RandomSampler',
52
+ num=256,
53
+ pos_fraction=0.5,
54
+ neg_pos_ub=-1,
55
+ add_gt_as_proposals=False),
56
+ allowed_border=-1,
57
+ pos_weight=-1,
58
+ debug=False)),
59
+ test_cfg=dict(
60
+ rpn=dict(
61
+ nms_pre=12000,
62
+ max_per_img=2000,
63
+ nms=dict(type='nms', iou_threshold=0.7),
64
+ min_bbox_size=0)))
configs/_base_/models/rpn_r50_fpn.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='RPN',
4
+ data_preprocessor=dict(
5
+ type='DetDataPreprocessor',
6
+ mean=[123.675, 116.28, 103.53],
7
+ std=[58.395, 57.12, 57.375],
8
+ bgr_to_rgb=True,
9
+ pad_size_divisor=32),
10
+ backbone=dict(
11
+ type='ResNet',
12
+ depth=50,
13
+ num_stages=4,
14
+ out_indices=(0, 1, 2, 3),
15
+ frozen_stages=1,
16
+ norm_cfg=dict(type='BN', requires_grad=True),
17
+ norm_eval=True,
18
+ style='pytorch',
19
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
20
+ neck=dict(
21
+ type='FPN',
22
+ in_channels=[256, 512, 1024, 2048],
23
+ out_channels=256,
24
+ num_outs=5),
25
+ rpn_head=dict(
26
+ type='RPNHead',
27
+ in_channels=256,
28
+ feat_channels=256,
29
+ anchor_generator=dict(
30
+ type='AnchorGenerator',
31
+ scales=[8],
32
+ ratios=[0.5, 1.0, 2.0],
33
+ strides=[4, 8, 16, 32, 64]),
34
+ bbox_coder=dict(
35
+ type='DeltaXYWHBBoxCoder',
36
+ target_means=[.0, .0, .0, .0],
37
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
38
+ loss_cls=dict(
39
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
40
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
41
+ # model training and testing settings
42
+ train_cfg=dict(
43
+ rpn=dict(
44
+ assigner=dict(
45
+ type='MaxIoUAssigner',
46
+ pos_iou_thr=0.7,
47
+ neg_iou_thr=0.3,
48
+ min_pos_iou=0.3,
49
+ ignore_iof_thr=-1),
50
+ sampler=dict(
51
+ type='RandomSampler',
52
+ num=256,
53
+ pos_fraction=0.5,
54
+ neg_pos_ub=-1,
55
+ add_gt_as_proposals=False),
56
+ allowed_border=-1,
57
+ pos_weight=-1,
58
+ debug=False)),
59
+ test_cfg=dict(
60
+ rpn=dict(
61
+ nms_pre=2000,
62
+ max_per_img=1000,
63
+ nms=dict(type='nms', iou_threshold=0.7),
64
+ min_bbox_size=0)))
configs/_base_/models/ssd300.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ input_size = 300
3
+ model = dict(
4
+ type='SingleStageDetector',
5
+ data_preprocessor=dict(
6
+ type='DetDataPreprocessor',
7
+ mean=[123.675, 116.28, 103.53],
8
+ std=[1, 1, 1],
9
+ bgr_to_rgb=True,
10
+ pad_size_divisor=1),
11
+ backbone=dict(
12
+ type='SSDVGG',
13
+ depth=16,
14
+ with_last_pool=False,
15
+ ceil_mode=True,
16
+ out_indices=(3, 4),
17
+ out_feature_indices=(22, 34),
18
+ init_cfg=dict(
19
+ type='Pretrained', checkpoint='open-mmlab://vgg16_caffe')),
20
+ neck=dict(
21
+ type='SSDNeck',
22
+ in_channels=(512, 1024),
23
+ out_channels=(512, 1024, 512, 256, 256, 256),
24
+ level_strides=(2, 2, 1, 1),
25
+ level_paddings=(1, 1, 0, 0),
26
+ l2_norm_scale=20),
27
+ bbox_head=dict(
28
+ type='SSDHead',
29
+ in_channels=(512, 1024, 512, 256, 256, 256),
30
+ num_classes=80,
31
+ anchor_generator=dict(
32
+ type='SSDAnchorGenerator',
33
+ scale_major=False,
34
+ input_size=input_size,
35
+ basesize_ratio_range=(0.15, 0.9),
36
+ strides=[8, 16, 32, 64, 100, 300],
37
+ ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
38
+ bbox_coder=dict(
39
+ type='DeltaXYWHBBoxCoder',
40
+ target_means=[.0, .0, .0, .0],
41
+ target_stds=[0.1, 0.1, 0.2, 0.2])),
42
+ # model training and testing settings
43
+ train_cfg=dict(
44
+ assigner=dict(
45
+ type='MaxIoUAssigner',
46
+ pos_iou_thr=0.5,
47
+ neg_iou_thr=0.5,
48
+ min_pos_iou=0.,
49
+ ignore_iof_thr=-1,
50
+ gt_max_assign_all=False),
51
+ sampler=dict(type='PseudoSampler'),
52
+ smoothl1_beta=1.,
53
+ allowed_border=-1,
54
+ pos_weight=-1,
55
+ neg_pos_ratio=3,
56
+ debug=False),
57
+ test_cfg=dict(
58
+ nms_pre=1000,
59
+ nms=dict(type='nms', iou_threshold=0.45),
60
+ min_bbox_size=0,
61
+ score_thr=0.02,
62
+ max_per_img=200))
63
+ cudnn_benchmark = True
configs/_base_/schedules/schedule_1x.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # training schedule for 1x
2
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1)
3
+ val_cfg = dict(type='ValLoop')
4
+ test_cfg = dict(type='TestLoop')
5
+
6
+ # learning rate
7
+ param_scheduler = [
8
+ dict(
9
+ type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
10
+ dict(
11
+ type='MultiStepLR',
12
+ begin=0,
13
+ end=12,
14
+ by_epoch=True,
15
+ milestones=[8, 11],
16
+ gamma=0.1)
17
+ ]
18
+
19
+ # optimizer
20
+ optim_wrapper = dict(
21
+ type='OptimWrapper',
22
+ optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
23
+
24
+ # Default setting for scaling LR automatically
25
+ # - `enable` means enable scaling LR automatically
26
+ # or not by default.
27
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
28
+ auto_scale_lr = dict(enable=False, base_batch_size=4)
configs/_base_/schedules/schedule_20e.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # training schedule for 20e
2
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=20, val_interval=1)
3
+ val_cfg = dict(type='ValLoop')
4
+ test_cfg = dict(type='TestLoop')
5
+
6
+ # learning rate
7
+ param_scheduler = [
8
+ dict(
9
+ type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
10
+ dict(
11
+ type='MultiStepLR',
12
+ begin=0,
13
+ end=20,
14
+ by_epoch=True,
15
+ milestones=[16, 19],
16
+ gamma=0.1)
17
+ ]
18
+
19
+ # optimizer
20
+ optim_wrapper = dict(
21
+ type='OptimWrapper',
22
+ optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
23
+
24
+ # Default setting for scaling LR automatically
25
+ # - `enable` means enable scaling LR automatically
26
+ # or not by default.
27
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
28
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
configs/_base_/schedules/schedule_2x.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # training schedule for 2x
2
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=1)
3
+ val_cfg = dict(type='ValLoop')
4
+ test_cfg = dict(type='TestLoop')
5
+
6
+ # learning rate
7
+ param_scheduler = [
8
+ dict(
9
+ type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
10
+ dict(
11
+ type='MultiStepLR',
12
+ begin=0,
13
+ end=24,
14
+ by_epoch=True,
15
+ milestones=[16, 22],
16
+ gamma=0.1)
17
+ ]
18
+
19
+ # optimizer
20
+ optim_wrapper = dict(
21
+ type='OptimWrapper',
22
+ optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
23
+
24
+ # Default setting for scaling LR automatically
25
+ # - `enable` means enable scaling LR automatically
26
+ # or not by default.
27
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
28
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
configs/backup/albu_example/README.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Albu Example
2
+
3
+ > [Albumentations: fast and flexible image augmentations](https://arxiv.org/abs/1809.06839)
4
+
5
+ <!-- [OTHERS] -->
6
+
7
+ ## Abstract
8
+
9
+ Data augmentation is a commonly used technique for increasing both the size and the diversity of labeled training sets by leveraging input transformations that preserve output labels. In computer vision domain, image augmentations have become a common implicit regularization technique to combat overfitting in deep convolutional neural networks and are ubiquitously used to improve performance. While most deep learning frameworks implement basic image transformations, the list is typically limited to some variations and combinations of flipping, rotating, scaling, and cropping. Moreover, the image processing speed varies in existing tools for image augmentation. We present Albumentations, a fast and flexible library for image augmentations with many various image transform operations available, that is also an easy-to-use wrapper around other augmentation libraries. We provide examples of image augmentations for different computer vision tasks and show that Albumentations is faster than other commonly used image augmentation tools on the most of commonly used image transformations.
10
+
11
+ <div align=center>
12
+ <img src="https://user-images.githubusercontent.com/40661020/143870703-74f3ea3f-ae23-4035-9856-746bc3f88464.png" height="400" />
13
+ </div>
14
+
15
+ ## Results and Models
16
+
17
+ | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download |
18
+ | :------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :-------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
19
+ | R-50 | pytorch | 1x | 4.4 | 16.6 | 38.0 | 34.5 | [config](mask-rcnn_r50_fpn_albu-1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/albu_example/mask_rcnn_r50_fpn_albu_1x_coco/mask_rcnn_r50_fpn_albu_1x_coco_20200208-ab203bcd.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/albu_example/mask_rcnn_r50_fpn_albu_1x_coco/mask_rcnn_r50_fpn_albu_1x_coco_20200208_225520.log.json) |
20
+
21
+ ## Citation
22
+
23
+ ```latex
24
+ @article{2018arXiv180906839B,
25
+ author = {A. Buslaev, A. Parinov, E. Khvedchenya, V.~I. Iglovikov and A.~A. Kalinin},
26
+ title = "{Albumentations: fast and flexible image augmentations}",
27
+ journal = {ArXiv e-prints},
28
+ eprint = {1809.06839},
29
+ year = 2018
30
+ }
31
+ ```
configs/backup/albu_example/mask-rcnn_r50_fpn_albu-1x_coco.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = '../mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py'
2
+
3
+ albu_train_transforms = [
4
+ dict(
5
+ type='ShiftScaleRotate',
6
+ shift_limit=0.0625,
7
+ scale_limit=0.0,
8
+ rotate_limit=0,
9
+ interpolation=1,
10
+ p=0.5),
11
+ dict(
12
+ type='RandomBrightnessContrast',
13
+ brightness_limit=[0.1, 0.3],
14
+ contrast_limit=[0.1, 0.3],
15
+ p=0.2),
16
+ dict(
17
+ type='OneOf',
18
+ transforms=[
19
+ dict(
20
+ type='RGBShift',
21
+ r_shift_limit=10,
22
+ g_shift_limit=10,
23
+ b_shift_limit=10,
24
+ p=1.0),
25
+ dict(
26
+ type='HueSaturationValue',
27
+ hue_shift_limit=20,
28
+ sat_shift_limit=30,
29
+ val_shift_limit=20,
30
+ p=1.0)
31
+ ],
32
+ p=0.1),
33
+ dict(type='JpegCompression', quality_lower=85, quality_upper=95, p=0.2),
34
+ dict(type='ChannelShuffle', p=0.1),
35
+ dict(
36
+ type='OneOf',
37
+ transforms=[
38
+ dict(type='Blur', blur_limit=3, p=1.0),
39
+ dict(type='MedianBlur', blur_limit=3, p=1.0)
40
+ ],
41
+ p=0.1),
42
+ ]
43
+ train_pipeline = [
44
+ dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
45
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
46
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
47
+ dict(
48
+ type='Albu',
49
+ transforms=albu_train_transforms,
50
+ bbox_params=dict(
51
+ type='BboxParams',
52
+ format='pascal_voc',
53
+ label_fields=['gt_bboxes_labels', 'gt_ignore_flags'],
54
+ min_visibility=0.0,
55
+ filter_lost_elements=True),
56
+ keymap={
57
+ 'img': 'image',
58
+ 'gt_masks': 'masks',
59
+ 'gt_bboxes': 'bboxes'
60
+ },
61
+ skip_img_without_anno=True),
62
+ dict(type='RandomFlip', prob=0.5),
63
+ dict(type='PackDetInputs')
64
+ ]
65
+
66
+ train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
configs/backup/albu_example/metafile.yml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Models:
2
+ - Name: mask-rcnn_r50_fpn_albu-1x_coco
3
+ In Collection: Mask R-CNN
4
+ Config: mask-rcnn_r50_fpn_albu-1x_coco.py
5
+ Metadata:
6
+ Training Memory (GB): 4.4
7
+ Epochs: 12
8
+ Results:
9
+ - Task: Object Detection
10
+ Dataset: COCO
11
+ Metrics:
12
+ box AP: 38.0
13
+ - Task: Instance Segmentation
14
+ Dataset: COCO
15
+ Metrics:
16
+ mask AP: 34.5
17
+ Weights: https://download.openmmlab.com/mmdetection/v2.0/albu_example/mask_rcnn_r50_fpn_albu_1x_coco/mask_rcnn_r50_fpn_albu_1x_coco_20200208-ab203bcd.pth
configs/backup/atss/README.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ATSS
2
+
3
+ > [Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection](https://arxiv.org/abs/1912.02424)
4
+
5
+ <!-- [ALGORITHM] -->
6
+
7
+ ## Abstract
8
+
9
+ Object detection has been dominated by anchor-based detectors for several years. Recently, anchor-free detectors have become popular due to the proposal of FPN and Focal Loss. In this paper, we first point out that the essential difference between anchor-based and anchor-free detection is actually how to define positive and negative training samples, which leads to the performance gap between them. If they adopt the same definition of positive and negative samples during training, there is no obvious difference in the final performance, no matter regressing from a box or a point. This shows that how to select positive and negative training samples is important for current object detectors. Then, we propose an Adaptive Training Sample Selection (ATSS) to automatically select positive and negative samples according to statistical characteristics of object. It significantly improves the performance of anchor-based and anchor-free detectors and bridges the gap between them. Finally, we discuss the necessity of tiling multiple anchors per location on the image to detect objects. Extensive experiments conducted on MS COCO support our aforementioned analysis and conclusions. With the newly introduced ATSS, we improve state-of-the-art detectors by a large margin to 50.7% AP without introducing any overhead.
10
+
11
+ <div align=center>
12
+ <img src="https://user-images.githubusercontent.com/40661020/143870776-c81168f5-e8b2-44ee-978b-509e4372c5c9.png"/>
13
+ </div>
14
+
15
+ ## Results and Models
16
+
17
+ | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download |
18
+ | :------: | :-----: | :-----: | :------: | :------------: | :----: | :----------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
19
+ | R-50 | pytorch | 1x | 3.7 | 19.7 | 39.4 | [config](atss_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209-985f7bd0.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209_102539.log.json) |
20
+ | R-101 | pytorch | 1x | 5.6 | 12.3 | 41.5 | [config](atss_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r101_fpn_1x_coco/atss_r101_fpn_1x_20200825-dfcadd6f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r101_fpn_1x_coco/atss_r101_fpn_1x_20200825-dfcadd6f.log.json) |
21
+
22
+ ## Citation
23
+
24
+ ```latex
25
+ @article{zhang2019bridging,
26
+ title = {Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection},
27
+ author = {Zhang, Shifeng and Chi, Cheng and Yao, Yongqiang and Lei, Zhen and Li, Stan Z.},
28
+ journal = {arXiv preprint arXiv:1912.02424},
29
+ year = {2019}
30
+ }
31
+ ```
configs/backup/atss/atss_r101_fpn_1x_coco.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ _base_ = './atss_r50_fpn_1x_coco.py'
2
+ model = dict(
3
+ backbone=dict(
4
+ depth=101,
5
+ init_cfg=dict(type='Pretrained',
6
+ checkpoint='torchvision://resnet101')))
configs/backup/atss/atss_r101_fpn_8xb8-amp-lsj-200e_coco.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ _base_ = './atss_r50_fpn_8xb8-amp-lsj-200e_coco.py'
2
+
3
+ model = dict(
4
+ backbone=dict(
5
+ depth=101,
6
+ init_cfg=dict(type='Pretrained',
7
+ checkpoint='torchvision://resnet101')))
configs/backup/atss/atss_r18_fpn_8xb8-amp-lsj-200e_coco.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ _base_ = './atss_r50_fpn_8xb8-amp-lsj-200e_coco.py'
2
+
3
+ model = dict(
4
+ backbone=dict(
5
+ depth=18,
6
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18')),
7
+ neck=dict(in_channels=[64, 128, 256, 512]))
configs/backup/atss/atss_r50_fpn_1x_coco.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../_base_/datasets/coco_detection.py',
3
+ '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
4
+ ]
5
+
6
+ # model settings
7
+ model = dict(
8
+ type='ATSS',
9
+ data_preprocessor=dict(
10
+ type='DetDataPreprocessor',
11
+ mean=[123.675, 116.28, 103.53],
12
+ std=[58.395, 57.12, 57.375],
13
+ bgr_to_rgb=True,
14
+ pad_size_divisor=32),
15
+ backbone=dict(
16
+ type='ResNet',
17
+ depth=50,
18
+ num_stages=4,
19
+ out_indices=(0, 1, 2, 3),
20
+ frozen_stages=1,
21
+ norm_cfg=dict(type='BN', requires_grad=True),
22
+ norm_eval=True,
23
+ style='pytorch',
24
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
25
+ neck=dict(
26
+ type='FPN',
27
+ in_channels=[256, 512, 1024, 2048],
28
+ out_channels=256,
29
+ start_level=1,
30
+ add_extra_convs='on_output',
31
+ num_outs=5),
32
+ bbox_head=dict(
33
+ type='ATSSHead',
34
+ num_classes=80,
35
+ in_channels=256,
36
+ stacked_convs=4,
37
+ feat_channels=256,
38
+ anchor_generator=dict(
39
+ type='AnchorGenerator',
40
+ ratios=[1.0],
41
+ octave_base_scale=8,
42
+ scales_per_octave=1,
43
+ strides=[8, 16, 32, 64, 128]),
44
+ bbox_coder=dict(
45
+ type='DeltaXYWHBBoxCoder',
46
+ target_means=[.0, .0, .0, .0],
47
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
48
+ loss_cls=dict(
49
+ type='FocalLoss',
50
+ use_sigmoid=True,
51
+ gamma=2.0,
52
+ alpha=0.25,
53
+ loss_weight=1.0),
54
+ loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
55
+ loss_centerness=dict(
56
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
57
+ # training and testing settings
58
+ train_cfg=dict(
59
+ assigner=dict(type='ATSSAssigner', topk=9),
60
+ allowed_border=-1,
61
+ pos_weight=-1,
62
+ debug=False),
63
+ test_cfg=dict(
64
+ nms_pre=1000,
65
+ min_bbox_size=0,
66
+ score_thr=0.05,
67
+ nms=dict(type='nms', iou_threshold=0.6),
68
+ max_per_img=100))
69
+ # optimizer
70
+ optim_wrapper = dict(
71
+ optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
configs/backup/atss/atss_r50_fpn_8xb8-amp-lsj-200e_coco.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = '../common/lsj-200e_coco-detection.py'
2
+
3
+ image_size = (1024, 1024)
4
+ batch_augments = [dict(type='BatchFixedSizePad', size=image_size)]
5
+
6
+ model = dict(
7
+ type='ATSS',
8
+ data_preprocessor=dict(
9
+ type='DetDataPreprocessor',
10
+ mean=[123.675, 116.28, 103.53],
11
+ std=[58.395, 57.12, 57.375],
12
+ bgr_to_rgb=True,
13
+ pad_size_divisor=32,
14
+ batch_augments=batch_augments),
15
+ backbone=dict(
16
+ type='ResNet',
17
+ depth=50,
18
+ num_stages=4,
19
+ out_indices=(0, 1, 2, 3),
20
+ frozen_stages=1,
21
+ norm_cfg=dict(type='BN', requires_grad=True),
22
+ norm_eval=True,
23
+ style='pytorch',
24
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
25
+ neck=dict(
26
+ type='FPN',
27
+ in_channels=[256, 512, 1024, 2048],
28
+ out_channels=256,
29
+ start_level=1,
30
+ add_extra_convs='on_output',
31
+ num_outs=5),
32
+ bbox_head=dict(
33
+ type='ATSSHead',
34
+ num_classes=80,
35
+ in_channels=256,
36
+ stacked_convs=4,
37
+ feat_channels=256,
38
+ anchor_generator=dict(
39
+ type='AnchorGenerator',
40
+ ratios=[1.0],
41
+ octave_base_scale=8,
42
+ scales_per_octave=1,
43
+ strides=[8, 16, 32, 64, 128]),
44
+ bbox_coder=dict(
45
+ type='DeltaXYWHBBoxCoder',
46
+ target_means=[.0, .0, .0, .0],
47
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
48
+ loss_cls=dict(
49
+ type='FocalLoss',
50
+ use_sigmoid=True,
51
+ gamma=2.0,
52
+ alpha=0.25,
53
+ loss_weight=1.0),
54
+ loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
55
+ loss_centerness=dict(
56
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
57
+ # training and testing settings
58
+ train_cfg=dict(
59
+ assigner=dict(type='ATSSAssigner', topk=9),
60
+ allowed_border=-1,
61
+ pos_weight=-1,
62
+ debug=False),
63
+ test_cfg=dict(
64
+ nms_pre=1000,
65
+ min_bbox_size=0,
66
+ score_thr=0.05,
67
+ nms=dict(type='nms', iou_threshold=0.6),
68
+ max_per_img=100))
69
+
70
+ train_dataloader = dict(batch_size=8, num_workers=4)
71
+
72
+ # Enable automatic-mixed-precision training with AmpOptimWrapper.
73
+ optim_wrapper = dict(
74
+ type='AmpOptimWrapper',
75
+ optimizer=dict(
76
+ type='SGD', lr=0.01 * 4, momentum=0.9, weight_decay=0.00004))
77
+
78
+ # NOTE: `auto_scale_lr` is for automatically scaling LR,
79
+ # USER SHOULD NOT CHANGE ITS VALUES.
80
+ # base_batch_size = (8 GPUs) x (8 samples per GPU)
81
+ auto_scale_lr = dict(base_batch_size=64)
configs/backup/atss/metafile.yml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Collections:
2
+ - Name: ATSS
3
+ Metadata:
4
+ Training Data: COCO
5
+ Training Techniques:
6
+ - SGD with Momentum
7
+ - Weight Decay
8
+ Training Resources: 8x V100 GPUs
9
+ Architecture:
10
+ - ATSS
11
+ - FPN
12
+ - ResNet
13
+ Paper:
14
+ URL: https://arxiv.org/abs/1912.02424
15
+ Title: 'Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection'
16
+ README: configs/atss/README.md
17
+ Code:
18
+ URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/atss.py#L6
19
+ Version: v2.0.0
20
+
21
+ Models:
22
+ - Name: atss_r50_fpn_1x_coco
23
+ In Collection: ATSS
24
+ Config: configs/atss/atss_r50_fpn_1x_coco.py
25
+ Metadata:
26
+ Training Memory (GB): 3.7
27
+ inference time (ms/im):
28
+ - value: 50.76
29
+ hardware: V100
30
+ backend: PyTorch
31
+ batch size: 1
32
+ mode: FP32
33
+ resolution: (800, 1333)
34
+ Epochs: 12
35
+ Results:
36
+ - Task: Object Detection
37
+ Dataset: COCO
38
+ Metrics:
39
+ box AP: 39.4
40
+ Weights: https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209-985f7bd0.pth
41
+
42
+ - Name: atss_r101_fpn_1x_coco
43
+ In Collection: ATSS
44
+ Config: configs/atss/atss_r101_fpn_1x_coco.py
45
+ Metadata:
46
+ Training Memory (GB): 5.6
47
+ inference time (ms/im):
48
+ - value: 81.3
49
+ hardware: V100
50
+ backend: PyTorch
51
+ batch size: 1
52
+ mode: FP32
53
+ resolution: (800, 1333)
54
+ Epochs: 12
55
+ Results:
56
+ - Task: Object Detection
57
+ Dataset: COCO
58
+ Metrics:
59
+ box AP: 41.5
60
+ Weights: https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r101_fpn_1x_coco/atss_r101_fpn_1x_20200825-dfcadd6f.pth
configs/backup/autoassign/README.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AutoAssign
2
+
3
+ > [AutoAssign: Differentiable Label Assignment for Dense Object Detection](https://arxiv.org/abs/2007.03496)
4
+
5
+ <!-- [ALGORITHM] -->
6
+
7
+ ## Abstract
8
+
9
+ Determining positive/negative samples for object detection is known as label assignment. Here we present an anchor-free detector named AutoAssign. It requires little human knowledge and achieves appearance-aware through a fully differentiable weighting mechanism. During training, to both satisfy the prior distribution of data and adapt to category characteristics, we present Center Weighting to adjust the category-specific prior distributions. To adapt to object appearances, Confidence Weighting is proposed to adjust the specific assign strategy of each instance. The two weighting modules are then combined to generate positive and negative weights to adjust each location's confidence. Extensive experiments on the MS COCO show that our method steadily surpasses other best sampling strategies by large margins with various backbones. Moreover, our best model achieves 52.1% AP, outperforming all existing one-stage detectors. Besides, experiments on other datasets, e.g., PASCAL VOC, Objects365, and WiderFace, demonstrate the broad applicability of AutoAssign.
10
+
11
+ <div align=center>
12
+ <img src="https://user-images.githubusercontent.com/40661020/143870875-33567e44-0584-4470-9a90-0df0fb6c1fe2.png"/>
13
+ </div>
14
+
15
+ ## Results and Models
16
+
17
+ | Backbone | Style | Lr schd | Mem (GB) | box AP | Config | Download |
18
+ | :------: | :---: | :-----: | :------: | :----: | :---------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
19
+ | R-50 | caffe | 1x | 4.08 | 40.4 | [config](autoassign_r50-caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/autoassign/auto_assign_r50_fpn_1x_coco/auto_assign_r50_fpn_1x_coco_20210413_115540-5e17991f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/autoassign/auto_assign_r50_fpn_1x_coco/auto_assign_r50_fpn_1x_coco_20210413_115540-5e17991f.log.json) |
20
+
21
+ **Note**:
22
+
23
+ 1. We find that the performance is unstable with 1x setting and may fluctuate by about 0.3 mAP. mAP 40.3 ~ 40.6 is acceptable. Such fluctuation can also be found in the original implementation.
24
+ 2. You can get a more stable results ~ mAP 40.6 with a schedule total 13 epoch, and learning rate is divided by 10 at 10th and 13th epoch.
25
+
26
+ ## Citation
27
+
28
+ ```latex
29
+ @article{zhu2020autoassign,
30
+ title={AutoAssign: Differentiable Label Assignment for Dense Object Detection},
31
+ author={Zhu, Benjin and Wang, Jianfeng and Jiang, Zhengkai and Zong, Fuhang and Liu, Songtao and Li, Zeming and Sun, Jian},
32
+ journal={arXiv preprint arXiv:2007.03496},
33
+ year={2020}
34
+ }
35
+ ```
configs/backup/autoassign/autoassign_r50-caffe_fpn_1x_coco.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # We follow the original implementation which
2
+ # adopts the Caffe pre-trained backbone.
3
+ _base_ = [
4
+ '../_base_/datasets/coco_detection.py',
5
+ '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
6
+ ]
7
+ # model settings
8
+ model = dict(
9
+ type='AutoAssign',
10
+ data_preprocessor=dict(
11
+ type='DetDataPreprocessor',
12
+ mean=[102.9801, 115.9465, 122.7717],
13
+ std=[1.0, 1.0, 1.0],
14
+ bgr_to_rgb=False,
15
+ pad_size_divisor=32),
16
+ backbone=dict(
17
+ type='ResNet',
18
+ depth=50,
19
+ num_stages=4,
20
+ out_indices=(0, 1, 2, 3),
21
+ frozen_stages=1,
22
+ norm_cfg=dict(type='BN', requires_grad=False),
23
+ norm_eval=True,
24
+ style='caffe',
25
+ init_cfg=dict(
26
+ type='Pretrained',
27
+ checkpoint='open-mmlab://detectron2/resnet50_caffe')),
28
+ neck=dict(
29
+ type='FPN',
30
+ in_channels=[256, 512, 1024, 2048],
31
+ out_channels=256,
32
+ start_level=1,
33
+ add_extra_convs=True,
34
+ num_outs=5,
35
+ relu_before_extra_convs=True,
36
+ init_cfg=dict(type='Caffe2Xavier', layer='Conv2d')),
37
+ bbox_head=dict(
38
+ type='AutoAssignHead',
39
+ num_classes=80,
40
+ in_channels=256,
41
+ stacked_convs=4,
42
+ feat_channels=256,
43
+ strides=[8, 16, 32, 64, 128],
44
+ loss_bbox=dict(type='GIoULoss', loss_weight=5.0)),
45
+ train_cfg=None,
46
+ test_cfg=dict(
47
+ nms_pre=1000,
48
+ min_bbox_size=0,
49
+ score_thr=0.05,
50
+ nms=dict(type='nms', iou_threshold=0.6),
51
+ max_per_img=100))
52
+
53
+ # learning rate
54
+ param_scheduler = [
55
+ dict(
56
+ type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
57
+ end=1000),
58
+ dict(
59
+ type='MultiStepLR',
60
+ begin=0,
61
+ end=12,
62
+ by_epoch=True,
63
+ milestones=[8, 11],
64
+ gamma=0.1)
65
+ ]
66
+
67
+ # optimizer
68
+ optim_wrapper = dict(
69
+ optimizer=dict(lr=0.01), paramwise_cfg=dict(norm_decay_mult=0.))