| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110 |
- # Copyright (c) OpenMMLab. All rights reserved.
- import argparse
- import os
- import os.path as osp
- import tempfile
- import zipfile
- import mmcv
- import numpy as np
- from mmengine.utils import mkdir_or_exist
- def parse_args():
- parser = argparse.ArgumentParser(
- description='Convert REFUGE dataset to mmsegmentation format')
- parser.add_argument('--raw_data_root', help='the root path of raw data')
- parser.add_argument('--tmp_dir', help='path of the temporary directory')
- parser.add_argument('-o', '--out_dir', help='output path')
- args = parser.parse_args()
- return args
- def extract_img(root: str,
- cur_dir: str,
- out_dir: str,
- mode: str = 'train',
- file_type: str = 'img') -> None:
- """_summary_
- Args:
- Args:
- root (str): root where the extracted data is saved
- cur_dir (cur_dir): dir where the zip_file exists
- out_dir (str): root dir where the data is saved
- mode (str, optional): Defaults to 'train'.
- file_type (str, optional): Defaults to 'img',else to 'mask'.
- """
- zip_file = zipfile.ZipFile(cur_dir)
- zip_file.extractall(root)
- for cur_dir, dirs, files in os.walk(root):
- # filter child dirs and directories with "Illustration" and "MACOSX"
- if len(dirs) == 0 and \
- cur_dir.split('\\')[-1].find('Illustration') == -1 and \
- cur_dir.find('MACOSX') == -1:
- file_names = [
- file for file in files
- if file.endswith('.jpg') or file.endswith('.bmp')
- ]
- for filename in sorted(file_names):
- img = mmcv.imread(osp.join(cur_dir, filename))
- if file_type == 'annotations':
- img = img[:, :, 0]
- img[np.where(img == 0)] = 1
- img[np.where(img == 128)] = 2
- img[np.where(img == 255)] = 0
- mmcv.imwrite(
- img,
- osp.join(out_dir, file_type, mode,
- osp.splitext(filename)[0] + '.png'))
- def main():
- args = parse_args()
- raw_data_root = args.raw_data_root
- if args.out_dir is None:
- out_dir = osp.join('./data', 'REFUGE')
- else:
- out_dir = args.out_dir
- print('Making directories...')
- mkdir_or_exist(out_dir)
- mkdir_or_exist(osp.join(out_dir, 'images'))
- mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
- mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
- mkdir_or_exist(osp.join(out_dir, 'images', 'test'))
- mkdir_or_exist(osp.join(out_dir, 'annotations'))
- mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
- mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
- mkdir_or_exist(osp.join(out_dir, 'annotations', 'test'))
- print('Generating images and annotations...')
- # process data from the child dir on the first rank
- cur_dir, dirs, files = list(os.walk(raw_data_root))[0]
- print('====================')
- files = list(filter(lambda x: x.endswith('.zip'), files))
- with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
- for file in files:
- # search data folders for training,validation,test
- mode = list(
- filter(lambda x: file.lower().find(x) != -1,
- ['training', 'test', 'validation']))[0]
- file_root = osp.join(tmp_dir, file[:-4])
- file_type = 'images' if file.find('Anno') == -1 and file.find(
- 'GT') == -1 else 'annotations'
- extract_img(file_root, osp.join(cur_dir, file), out_dir, mode,
- file_type)
- print('Done!')
- if __name__ == '__main__':
- main()
|