|
|
|
"""Get image shape on CrowdHuman dataset. |
|
|
|
Here is an example to run this script. |
|
|
|
Example: |
|
python tools/misc/get_crowdhuman_id_hw.py ${CONFIG} \ |
|
--dataset ${DATASET_TYPE} |
|
""" |
|
import argparse |
|
import json |
|
import logging |
|
import os.path as osp |
|
from multiprocessing import Pool |
|
|
|
import mmcv |
|
from mmengine.config import Config |
|
from mmengine.fileio import dump, get, get_text |
|
from mmengine.logging import print_log |
|
|
|
|
|
def parse_args(): |
|
parser = argparse.ArgumentParser(description='Collect image metas') |
|
parser.add_argument('config', help='Config file path') |
|
parser.add_argument( |
|
'--dataset', |
|
choices=['train', 'val'], |
|
help='Collect image metas from which dataset') |
|
parser.add_argument( |
|
'--nproc', |
|
default=10, |
|
type=int, |
|
help='Processes used for get image metas') |
|
args = parser.parse_args() |
|
return args |
|
|
|
|
|
def get_image_metas(anno_str, img_prefix): |
|
id_hw = {} |
|
anno_dict = json.loads(anno_str) |
|
img_path = osp.join(img_prefix, f"{anno_dict['ID']}.jpg") |
|
img_id = anno_dict['ID'] |
|
img_bytes = get(img_path) |
|
img = mmcv.imfrombytes(img_bytes, backend='cv2') |
|
id_hw[img_id] = img.shape[:2] |
|
return id_hw |
|
|
|
|
|
def main(): |
|
args = parse_args() |
|
|
|
|
|
cfg = Config.fromfile(args.config) |
|
dataset = args.dataset |
|
dataloader_cfg = cfg.get(f'{dataset}_dataloader') |
|
ann_file = osp.join(dataloader_cfg.dataset.data_root, |
|
dataloader_cfg.dataset.ann_file) |
|
img_prefix = osp.join(dataloader_cfg.dataset.data_root, |
|
dataloader_cfg.dataset.data_prefix['img']) |
|
|
|
|
|
print_log( |
|
f'loading CrowdHuman {dataset} annotation...', level=logging.INFO) |
|
anno_strs = get_text(ann_file).strip().split('\n') |
|
pool = Pool(args.nproc) |
|
|
|
id_hw_temp = pool.starmap( |
|
get_image_metas, |
|
zip(anno_strs, [img_prefix for _ in range(len(anno_strs))]), |
|
) |
|
pool.close() |
|
|
|
|
|
id_hw = {} |
|
for sub_dict in id_hw_temp: |
|
id_hw.update(sub_dict) |
|
|
|
data_root = osp.dirname(ann_file) |
|
save_path = osp.join(data_root, f'id_hw_{dataset}.json') |
|
print_log( |
|
f'\nsaving "id_hw_{dataset}.json" in "{data_root}"', |
|
level=logging.INFO) |
|
dump(id_hw, save_path, file_format='json') |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|