from mmdet import __version__ from mmdet.datasets import get_dataset from mmdet.apis import (train_detector, init_dist, get_root_logger, set_random_seed) from mmdet.models import build_detector import torch
defparse_args(): parser = argparse.ArgumentParser(description='Train a detector') parser.add_argument('config', help='train config file path') parser.add_argument('--work_dir', help='the dir to save logs and models') parser.add_argument( '--resume_from', help='the checkpoint file to resume from') parser.add_argument( '--validate', action='store_true', help='whether to evaluate the checkpoint during training') parser.add_argument( '--gpus', type=int, default=1, help='number of gpus to use ' '(only applicable to non-distributed training)') parser.add_argument('--seed', type=int, default=None, help='random seed') parser.add_argument( '--launcher', choices=['none', 'pytorch', 'slurm', 'mpi'], default='none', help='job launcher') parser.add_argument('--local_rank', type=int, default=0) args = parser.parse_args()
return args
defmain(): args = parse_args()
cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir isnotNone: cfg.work_dir = args.work_dir if args.resume_from isnotNone: cfg.resume_from = args.resume_from cfg.gpus = args.gpus if cfg.checkpoint_config isnotNone: # save mmdet version in checkpoints as meta data cfg.checkpoint_config.meta = dict( mmdet_version=__version__, config=cfg.text)
# init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params)
# init logger before other steps logger = get_root_logger(cfg.log_level) logger.info('Distributed training: {}'.format(distributed))
# set random seeds if args.seed isnotNone: logger.info('Set random seed to {}'.format(args.seed)) set_random_seed(args.seed)
model = build_detector( cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
import torch from mmcv.runner import Runner, DistSamplerSeedHook from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmdet.core import (DistOptimizerHook, DistEvalmAPHook, CocoDistEvalRecallHook, CocoDistEvalmAPHook) from mmdet.datasets import build_dataloader from mmdet.models import RPN from .env import get_root_logger
defparse_losses(losses): log_vars = OrderedDict() for loss_name, loss_value in losses.items(): ifisinstance(loss_value, torch.Tensor): log_vars[loss_name] = loss_value.mean() elifisinstance(loss_value, list): log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) else: raise TypeError( '{} is not a tensor or list of tensors'.format(loss_name))
loss = sum(_value for _key, _value in log_vars.items() if'loss'in _key)
log_vars['loss'] = loss for name in log_vars: log_vars[name] = log_vars[name].item()
import mmcv import numpy as np from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval
from .recall import eval_recalls
defcoco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)): for res_type in result_types: assert res_type in [ 'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints' ]
if mmcv.is_str(coco): coco = COCO(coco) assertisinstance(coco, COCO)
if result_types == ['proposal_fast']: ar = fast_eval_recall(result_file, coco, np.array(max_dets)) for i, num inenumerate(max_dets): print('AR@{}\t= {:.4f}'.format(num, ar[i])) return
Average Precision (AP): AP % AP at IoU=.50:.05:.95 (primary challenge metric) APIoU=.50 % AP at IoU=.50 (PASCAL VOC metric) APIoU=.75 % AP at IoU=.75 (strict metric) AP Across Scales: APsmall % AP for small objects: area < 322 APmedium % AP for medium objects: 322 < area < 962 APlarge % AP for large objects: area > 962 Average Recall (AR): ARmax=1 % AR given 1 detection per image ARmax=10 % AR given 10 detections per image ARmax=100 % AR given 100 detections per image AR Across Scales: ARsmall % AR for small objects: area < 322 ARmedium % AR for medium objects: 322 < area < 962 ARlarge % AR for large objects: area > 962
import torch import mmcv from mmcv.runner import load_checkpoint, parallel_test, obj_from_dict from mmcv.parallel import scatter, collate, MMDataParallel
from mmdet import datasets from mmdet.core import results2json, coco_eval from mmdet.datasets import build_dataloader from mmdet.models import build_detector, detectors
defsingle_test(model, data_loader, show=False): model.eval() results = [] dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) for i, data inenumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, rescale=not show, **data) results.append(result)
if show: model.module.show_result(data, result, dataset.img_norm_cfg, dataset=dataset.CLASSES)
batch_size = data['img'][0].size(0) for _ inrange(batch_size): prog_bar.update() return results
def_data_func(data, device_id): data = scatter(collate([data], samples_per_gpu=1), [device_id])[0] returndict(return_loss=False, rescale=True, **data)
defparse_args(): parser = argparse.ArgumentParser(description='MMDet test detector') parser.add_argument('config', help='test config file path') parser.add_argument('checkpoint', help='checkpoint file') parser.add_argument( '--gpus', default=1, type=int, help='GPU number used for testing') parser.add_argument( '--proc_per_gpu', default=1, type=int, help='Number of processes per GPU') parser.add_argument('--out', help='output result file') parser.add_argument( '--eval', type=str, nargs='+', choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'], help='eval types') parser.add_argument('--show', action='store_true', help='show results') args = parser.parse_args() return args
defmain(): args = parse_args()
if args.out isnotNoneandnot args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.')
cfg = mmcv.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True
dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True)) if args.gpus == 1: model = build_detector( cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) load_checkpoint(model, args.checkpoint) model = MMDataParallel(model, device_ids=[0])
definit_weights(self, pretrained=None): super(SingleStageDetector, self).init_weights(pretrained) self.backbone.init_weights(pretrained=pretrained) if self.with_neck: ifisinstance(self.neck, nn.Sequential): for m in self.neck: m.init_weights() else: self.neck.init_weights() self.bbox_head.init_weights()
defextract_feat(self, img): x = self.backbone(img) if self.with_neck: x = self.neck(x) return x
from .base import BaseDetector from .test_mixins import RPNTestMixin, BBoxTestMixin, MaskTestMixin from .. import builder from ..registry import DETECTORS from mmdet.core import bbox2roi, bbox2result, build_assigner, build_sampler
@property defwith_rpn(self): returnhasattr(self, 'rpn_head') and self.rpn_head isnotNone
definit_weights(self, pretrained=None): super(TwoStageDetector, self).init_weights(pretrained) self.backbone.init_weights(pretrained=pretrained) if self.with_neck: ifisinstance(self.neck, nn.Sequential): for m in self.neck: m.init_weights() else: self.neck.init_weights() if self.with_rpn: self.rpn_head.init_weights() if self.with_bbox: self.bbox_roi_extractor.init_weights() self.bbox_head.init_weights() if self.with_mask: self.mask_roi_extractor.init_weights() self.mask_head.init_weights()
defextract_feat(self, img): x = self.backbone(img) if self.with_neck: x = self.neck(x) return x
# assign gts and sample proposals if self.with_bbox or self.with_mask: bbox_assigner = build_assigner(self.train_cfg.rcnn.assigner) bbox_sampler = build_sampler( self.train_cfg.rcnn.sampler, context=self) num_imgs = img.size(0) if gt_bboxes_ignore isNone: gt_bboxes_ignore = [Nonefor _ inrange(num_imgs)] sampling_results = [] for i inrange(num_imgs): assign_result = bbox_assigner.assign( proposal_list[i], gt_bboxes[i], gt_bboxes_ignore[i], gt_labels[i]) sampling_result = bbox_sampler.sample( assign_result, proposal_list[i], gt_bboxes[i], gt_labels[i], feats=[lvl_feat[i][None] for lvl_feat in x]) sampling_results.append(sampling_result)
# bbox head forward and loss if self.with_bbox: rois = bbox2roi([res.bboxes for res in sampling_results]) # TODO: a more flexible way to decide which feature maps to use bbox_feats = self.bbox_roi_extractor( x[:self.bbox_roi_extractor.num_inputs], rois) cls_score, bbox_pred = self.bbox_head(bbox_feats)
# mask head forward and loss if self.with_mask: pos_rois = bbox2roi([res.pos_bboxes for res in sampling_results]) mask_feats = self.mask_roi_extractor( x[:self.mask_roi_extractor.num_inputs], pos_rois) mask_pred = self.mask_head(mask_feats)
mask_targets = self.mask_head.get_target( sampling_results, gt_masks, self.train_cfg.rcnn) pos_labels = torch.cat( [res.pos_gt_labels for res in sampling_results]) loss_mask = self.mask_head.loss(mask_pred, mask_targets, pos_labels) losses.update(loss_mask)
return losses
defsimple_test(self, img, img_meta, proposals=None, rescale=False): """Test without augmentation.""" assert self.with_bbox, "Bbox head must be implemented."
x = self.extract_feat(img)
proposal_list = self.simple_test_rpn( x, img_meta, self.test_cfg.rpn) if proposals isNoneelse proposals
defaug_test(self, imgs, img_metas, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ # recompute feats to save memory proposal_list = self.aug_test_rpn( self.extract_feats(imgs), img_metas, self.test_cfg.rpn) det_bboxes, det_labels = self.aug_test_bboxes( self.extract_feats(imgs), img_metas, proposal_list, self.test_cfg.rcnn)
defsimple_test(self, img, img_meta, proposals=None, rescale=False): """Test without augmentation.""" assert self.with_bbox, "Bbox head must be implemented."
x = self.extract_feat(img)
proposal_list = self.simple_test_rpn( x, img_meta, self.test_cfg.rpn) if proposals isNoneelse proposals
def_filter_imgs(self, min_size=32): """Filter images too small or without ground truths.""" valid_inds = [] ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values()) for i, img_info inenumerate(self.img_infos): if self.img_ids[i] notin ids_with_ann: continue ifmin(img_info['width'], img_info['height']) >= min_size: valid_inds.append(i) return valid_inds
def_parse_ann_info(self, ann_info, with_mask=True): """Parse bbox and mask annotation. Args: ann_info (list[dict]): Annotation info of an image. with_mask (bool): Whether to parse mask annotations. Returns: dict: A dict containing the following keys: bboxes, bboxes_ignore, labels, masks, mask_polys, poly_lens. """ gt_bboxes = [] gt_labels = [] gt_bboxes_ignore = [] # Two formats are provided. # 1. mask: a binary map of the same size of the image. # 2. polys: each mask consists of one or several polys, each poly is a # list of float. if with_mask: gt_masks = [] gt_mask_polys = [] gt_poly_lens = [] for i, ann inenumerate(ann_info): if ann.get('ignore', False): continue x1, y1, w, h = ann['bbox'] if ann['area'] <= 0or w < 1or h < 1: continue bbox = [x1, y1, x1 + w - 1, y1 + h - 1] if ann['iscrowd']: gt_bboxes_ignore.append(bbox) else: gt_bboxes.append(bbox) gt_labels.append(self.cat2label[ann['category_id']]) if with_mask: gt_masks.append(self.coco.annToMask(ann)) mask_polys = [ p for p in ann['segmentation'] iflen(p) >= 6 ] # valid polygons have >= 3 points (6 coordinates) poly_lens = [len(p) for p in mask_polys] gt_mask_polys.append(mask_polys) gt_poly_lens.extend(poly_lens) if gt_bboxes: gt_bboxes = np.array(gt_bboxes, dtype=np.float32) gt_labels = np.array(gt_labels, dtype=np.int64) else: gt_bboxes = np.zeros((0, 4), dtype=np.float32) gt_labels = np.array([], dtype=np.int64)
ann = dict( bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
if with_mask: ann['masks'] = gt_masks # poly format is not used in the current implementation ann['mask_polys'] = gt_mask_polys ann['poly_lens'] = gt_poly_lens return ann
The annotation of a dataset is a list of dict, each dict corresponds
to an image. There are 3 field filename (relative path),
width, height for testing, and an additional
field ann for training. ann is also a dict
containing at least 2 fields: bboxes and
labels, both of which are numpy arrays. Some datasets may
provide annotations like crowd/difficult/ignored bboxes, we use
bboxes_ignore and labels_ignore to cover
them.
You can write a new Dataset class inherited from
CustomDataset, and overwrite two methods
load_annotations(self, ann_file) and
get_ann_info(self, idx), like CocoDataset
and VOCDataset.
offline conversion
You can convert the annotation format to the expected format above
and save it to a pickle or json file, like pascal_voc.py.
Then you can simply use CustomDataset.
Each object instance annotation contains a series of fields,
including the category id and segmentation mask of the object. The
segmentation format depends on whether the instance represents a single
object (iscrowd=0 in which case polygons are used) or a collection of
objects (iscrowd=1 in which case RLE is used). Note that a single object
(iscrowd=0) may require multiple polygons, for example if occluded.
Crowd annotations (iscrowd=1) are used to label large groups of objects
(e.g. a crowd of people). In addition, an enclosing bounding box
is provided for each object (box coordinates are measured from the top
left image corner and are 0-indexed).
1 2 3 4 5 6 7 8 9
annotation{ "id": int, "image_id": int, "category_id": int, "segmentation": RLE or [polygon], "area": float, "bbox":[x,y,width,height], "iscrowd":0 or 1, }
for i, ann inenumerate(ann_info): if ann.get('ignore', False): continue x1, y1, w, h = ann['bbox'] if ann['area'] <= 0or w < 1or h < 1: continue bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
defbbox2result(bboxes, labels, num_classes): """Convert detection results to a list of numpy arrays. Args: bboxes (Tensor): shape (n, 5) labels (Tensor): shape (n, ) num_classes (int): class number, including background class Returns: list(ndarray): bbox results of each class """ if bboxes.shape[0] == 0: return [ np.zeros((0, 5), dtype=np.float32) for i inrange(num_classes - 1) ] else: bboxes = bboxes.cpu().numpy() labels = labels.cpu().numpy() return [bboxes[labels == i, :] for i inrange(num_classes - 1)]
# get bboxes and scores of this class if multi_bboxes.shape[1] == 4: _bboxes = multi_bboxes[cls_inds, :] else: _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4] _scores = multi_scores[cls_inds, i]