Support Mask R-CNN

Ting PAN
Commit d3ed62db authored Nov 22, 2019 by Ting PAN
Showing with 2602 additions and 1652 deletions
CHANGES
README.md
compile/make.sh
compile/rbox.cc
lib/core/config.py
lib/core/test.py
lib/core/test_engine.py
lib/core/train.py
lib/datasets/coco_evaluator.py
lib/datasets/example.py
lib/datasets/factory.py
lib/datasets/imdb.py
lib/datasets/taas.py
lib/datasets/voc_eval.py
lib/datasets/voc_evaluator.py
lib/faster_rcnn/__init__.py
lib/faster_rcnn/anchor_target.py
lib/faster_rcnn/anchor_target_layer.py
lib/faster_rcnn/data_layer.py → lib/faster_rcnn/data_loader.py
lib/faster_rcnn/data_transformer.py
--- a/CHANGES
+++ b/CHANGES
 ------------------------------------------------------------------------
 The list of most significant changes made over time in SeetaDet.

+SeetaDet 0.3.0 (20191121)
+
+Dragon Minimum Required (Version 0.3.0.dev20191121)
+
+Changes:
+
+Preview Features:
+
+- New algorithm: Mask R-CNN.
+
+- Add MobileNet(V2 and NAS) as backbone.
+
+- Refactor testing module, multi-GPU is supported.
+
+Bugs fixed:
+
+- Remove rotated boxes, use Mask R-CNN instead.
+
+------------------------------------------------------------------------
+
 SeetaDet 0.2.3 (20191101)

 Dragon Minimum Required (Version 0.3.0.dev20191021)

--- a/README.md
+++ b/README.md
@@ -12,6 +12,10 @@ while the style of codes is PyTorch.

 The torch-style codes help us to simplify the hierarchical pipeline of modern detection.

+## Requirements
+
+seeta-dragon >= 0.3.0.dev20191121
+
 ## Installation

 #### 1. Install the required python packages

--- a/compile/make.sh
+++ b/compile/make.sh
@@ -5,7 +5,6 @@ rm -r build install *.c *.cpp

 # Compile cpp modules
 python setup.py build_ext --inplace
-g++ -o ../lib/utils/ctypes_rbox.so -shared -fPIC -O2 rbox.cc -std=c++11 -fopenmp

 # Compile cuda modules
 cd build && cmake .. && make install && cd ..

--- a/compile/rbox.cc
+++ b/compile/rbox.cc
--- a/lib/core/config.py
+++ b/lib/core/config.py
@@ -41,6 +41,9 @@ __C.TRAIN.WEIGHTS = ''
 # Database to train
 __C.TRAIN.DATABASE = ''

+# The number of workers to transform data
+__C.TRAIN.NUM_WORKERS = 3
+
 # Scales to use during training (can list multiple scales)
 # Each scale is the pixel size of an image's shortest side
 __C.TRAIN.SCALES = (600,)
@@ -151,10 +154,10 @@ __C.TEST.SOFT_NMS_SIGMA = 0.5
 # The top-k prior boxes before nms.
 __C.TEST.NMS_TOP_K = 400

-# The threshold for prAttrDicting boxes
+# The threshold for predicting boxes
 __C.TEST.SCORE_THRESH = 0.05

-# The threshold for prAttrDicting masks
+# The threshold for predicting masks
 __C.TEST.BINARY_THRESH = 0.5

 # NMS threshold used on RPN proposals
@@ -192,8 +195,9 @@ __C.MODEL = AttrDict()

 # The type of the model
 # ('faster_rcnn',
-#  'ssd',
+#  'mask_rcnn',
 #  'retinanet,
+#  'ssd',
 # )
 __C.MODEL.TYPE = ''

@@ -361,14 +365,14 @@ __C.SSD.NUM_CONVS = 0
 # Weight for bbox regression loss
 __C.SSD.BBOX_REG_WEIGHT = 1.

-__C.SSD.MULTIBOX = AttrDict()
 # MultiBox configs
+__C.SSD.MULTIBOX = AttrDict()
 __C.SSD.MULTIBOX.STRIDES = []
 __C.SSD.MULTIBOX.MIN_SIZES = []
 __C.SSD.MULTIBOX.MAX_SIZES = []
 __C.SSD.MULTIBOX.ASPECT_RATIOS = []
-__C.SSD.MULTIBOX.ASPECT_ANGLES = []

+# OHEM configs
 __C.SSD.OHEM = AttrDict()
 # The threshold for selecting negative bbox in hard example mining
 __C.SSD.OHEM.NEG_OVERLAP = 0.5

--- a/lib/core/test.py
+++ b/lib/core/test.py
@@ -21,46 +21,56 @@ import cv2
 import dragon

 from lib.core.config import cfg
+from lib.datasets.example import Example
 from lib.datasets.factory import get_imdb
-from lib.faster_rcnn.data_transformer import DataTransformer


-class TestServer(object):
+class _Server(object):
    def __init__(self, output_dir):
-        self.imdb = get_imdb(cfg.TEST.DATABASE)
-        self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE)
-        self.num_images, self.num_classes, self.classes = \
-            self.imdb.num_images, self.imdb.num_classes, self.imdb.classes
-        self.data_reader = dragon.io.DataReader(
-            dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source))
-        self.data_transformer = DataTransformer()
-        self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH * 5)
-        self.data_reader.start()
-        self.gt_recs = collections.OrderedDict()
        self.output_dir = output_dir
        if cfg.VIS_ON_FILE:
            self.vis_dir = os.path.join(self.output_dir, 'vis')
            if not os.path.exists(self.vis_dir):
                os.makedirs(self.vis_dir)

-    def set_transformer(self, transformer_cls):
-        self.data_transformer = transformer_cls()
+    def evaluate_detections(self, all_boxes):
+        pass
+
+    def evaluate_segmentations(self, all_boxes, all_masks):
+        pass

    def get_image(self):
-        example = self.data_reader.q_out.get()
-        image = self.data_transformer.get_image(example)
-        image_id, objects = self.data_transformer.get_annotations(example)
-        self.gt_recs[image_id] = {
-            'objects': objects,
-            'width': image.shape[1],
-            'height': image.shape[0],
-        }
-        return image_id, image
+        pass

    def get_save_filename(self, image_id, ext='.jpg'):
        return os.path.join(self.vis_dir, image_id + ext) \
            if cfg.VIS_ON_FILE else None

+
+class TestServer(_Server):
+    def __init__(self, output_dir):
+        super(TestServer, self).__init__(output_dir)
+        self.imdb = get_imdb(cfg.TEST.DATABASE)
+        self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE)
+        self.classes = self.imdb.classes
+        self.num_images = self.imdb.num_images
+        self.num_classes = self.imdb.num_classes
+        self.data_reader = dragon.io.DataReader(
+            dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source))
+        self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH * 5)
+        self.data_reader.start()
+        self.gt_recs = collections.OrderedDict()
+
+    def get_image(self):
+        example = Example(self.data_reader.q_out.get())
+        image, image_id = example.image, example.id
+        self.gt_recs[image_id] = {
+            'height': example.height,
+            'width': example.width,
+            'objects': example.objects,
+        }
+        return image_id, image
+
    def get_records(self):
        if len(self.gt_recs) != self.num_images:
            raise RuntimeError(
@@ -70,7 +80,7 @@ class TestServer(object):
        return self.gt_recs

    def evaluate_detections(self, all_boxes):
-        if cfg.TEST.PROTOCOL == 'null':
+        if cfg.TEST.PROTOCOL == 'dump':
            self.imdb.dump_detections(all_boxes, self.output_dir)
        else:
            self.imdb.evaluate_detections(
@@ -88,56 +98,20 @@ class TestServer(object):
        )


-class InferServer(object):
+class InferServer(_Server):
    def __init__(self, output_dir):
+        super(InferServer, self).__init__(output_dir)
        self.images_dir = cfg.TEST.DATABASE
-        self.imdb = get_imdb('taas:/empty')
        self.images = os.listdir(self.images_dir)
-        self.num_images, self.num_classes, self.classes = \
-            len(self.images), cfg.MODEL.NUM_CLASSES, cfg.MODEL.CLASSES
-        self.data_transformer = DataTransformer()
-        self.gt_recs = collections.OrderedDict()
+        self.classes = cfg.MODEL.CLASSES
+        self.num_images = len(self.images)
+        self.num_classes = cfg.MODEL.NUM_CLASSES
        self.output_dir = output_dir
        self.image_idx = 0
-        if cfg.VIS_ON_FILE:
-            self.vis_dir = os.path.join(self.output_dir, 'vis')
-            if not os.path.exists(self.vis_dir):
-                os.makedirs(self.vis_dir)
-
-    def set_transformer(self, transformer_cls):
-        self.data_transformer = transformer_cls()

    def get_image(self):
        image_name = self.images[self.image_idx]
        image_id = image_name.split('.')[0]
        image = cv2.imread(os.path.join(self.images_dir, image_name))
        self.image_idx = (self.image_idx + 1) % self.num_images
-        self.gt_recs[image_id] = {'width': image.shape[1], 'height': image.shape[0]}
        return image_id, image
-
-    def get_save_filename(self, image_id, ext='.jpg'):
-        return os.path.join(self.vis_dir, image_id + ext) \
-            if cfg.VIS_ON_FILE else None
-
-    def get_records(self):
-        if len(self.gt_recs) != self.num_images:
-            raise RuntimeError(
-                'Loading {} records, while {} required.'
-                .format(len(self.gt_recs), self.num_images),
-            )
-        return self.gt_recs
-
-    def evaluate_detections(self, all_boxes):
-        self.imdb.evaluate_detections(
-            all_boxes,
-            self.get_records(),
-            self.output_dir,
-        )
-
-    def evaluate_segmentations(self, all_boxes, all_masks):
-        self.imdb.evaluate_segmentations(
-            all_boxes,
-            all_masks,
-            self.get_records(),
-            self.output_dir,
-        )
--- a/lib/core/test_engine.py
+++ b/lib/core/test_engine.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import importlib
+import multiprocessing
+
+import numpy as np
+
+from lib.core.config import cfg
+from lib.utils import time_util
+from lib.utils.vis import vis_one_image
+
+
+def run_test_net(checkpoint, server, devices):
+    classes = server.classes
+    num_images = server.num_images
+    num_classes = server.num_classes
+    devices = devices if devices else [cfg.GPU_ID]
+    num_workers = len(devices)
+
+    test_fn = importlib.import_module(
+        'lib.%s.test' % cfg.MODEL.TYPE).test_net
+
+    _t = time_util.new_timers('im_detect', 'mask_detect', 'misc')
+
+    vis_image_dict = {}
+
+    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
+    all_masks = [[[] for _ in range(num_images)] for _ in range(num_classes)]
+
+    queues = [
+        multiprocessing.Queue()
+        for _ in range(num_workers + 1)
+    ]
+    workers = [
+        multiprocessing.Process(
+            target=test_fn,
+            kwargs={
+                'weights': checkpoint,
+                'num_classes': server.num_classes,
+                'q_in': queues[i],
+                'q_out': queues[-1],
+                'device': devices[i],
+            }
+        ) for i in range(num_workers)
+    ]
+
+    for process in workers:
+        process.start()
+
+    for i in range(num_images):
+        image_id, raw_image = server.get_image()
+        queues[i % num_workers].put((i, raw_image))
+        # Hold the image until the visualization
+        if cfg.VIS or cfg.VIS_ON_FILE:
+            vis_image_dict[i] = (image_id, raw_image)
+
+    for i in range(num_workers):
+        queues[i].put((-1, None))
+
+    for count in range(num_images):
+        i, time_diffs, results = queues[-1].get()
+
+        # Unpack the diverse results
+        boxes_this_image = results['boxes']
+        masks_this_image = results.get('masks', None)
+
+        # Disable some collections
+        if masks_this_image is None:
+            all_masks = None
+
+        # Update time difference
+        for name, diff in time_diffs.items():
+            _t[name].add_diff(diff)
+
+        # Visualize the results if necessary
+        if cfg.VIS or cfg.VIS_ON_FILE:
+            image_id, raw_image = vis_image_dict[i]
+            vis_one_image(
+                raw_image,
+                classes,
+                boxes_this_image,
+                masks_this_image,
+                thresh=cfg.VIS_TH,
+                box_alpha=1.,
+                show_class=True,
+                filename=server.get_save_filename(image_id),
+            )
+            del vis_image_dict[i]
+
+        _t['misc'].tic()
+
+        # Pack the results in the class-major order
+        for j in range(1, num_classes):
+            all_boxes[j][i] = boxes_this_image[j]
+            if all_masks is not None:
+                if j < len(masks_this_image):
+                    all_masks[j][i] = masks_this_image[j]
+
+        # Limit to max_per_image detections *over all classes*
+        max_detections = cfg.TEST.DETECTIONS_PER_IM
+        if max_detections > 0:
+            scores = []
+            for j in range(1, num_classes):
+                if len(all_boxes[j][i]) < 1:
+                    continue
+                scores.append(all_boxes[j][i][:, -1])
+            if len(scores) > 0:
+                scores = np.hstack(scores)
+            if len(scores) > max_detections:
+                thr = np.sort(scores)[-max_detections]
+                for j in range(1, num_classes):
+                    keep = np.where(all_boxes[j][i][:, -1] >= thr)[0]
+                    all_boxes[j][i] = all_boxes[j][i][keep, :]
+                    if all_masks is not None:
+                        all_masks[j][i] = all_masks[j][i][keep]
+
+        _t['misc'].toc()
+
+        print('\rim_detect: {:d}/{:d} {:.3f}s|{:.3f}s {:.3f}s'
+              .format(count + 1, num_images,
+                      _t['im_detect'].average_time,
+                      _t['mask_detect'].average_time,
+                      _t['misc'].average_time),
+              end='')
+
+    print('\n\n>>> Evaluating detections\n')
+    server.evaluate_detections(all_boxes)
+
+    if all_masks is not None:
+        print('>>> Evaluating segmentations\n')
+        server.evaluate_segmentations(all_boxes, all_masks)
--- a/lib/core/train.py
+++ b/lib/core/train.py
@@ -31,9 +31,9 @@ from lib.utils.stats import SmoothedValue

 class SolverWrapper(object):
    def __init__(self, coordinator):
+        self.output_dir = coordinator.checkpoints_dir()
        self.solver = SGDSolver()
        self.detector = self.solver.detector
-        self.output_dir = coordinator.checkpoints_dir()

        # Setup the detector
        self.detector.load_weights(cfg.TRAIN.WEIGHTS)
@@ -89,7 +89,6 @@ class SolverWrapper(object):
        display = self.solver.iter % cfg.SOLVER.DISPLAY == 0
        stats = self.solver.one_step()
        self.add_metrics(stats)
-        self.send_metrics(stats)

        if display:
            logger.info(
@@ -104,6 +103,7 @@ class SolverWrapper(object):
                    continue
                logger.info(' ' * 10 + 'Train net output({}): {}'
                            .format(k, v.GetMedianValue()))
+            self.send_metrics(stats)

    def train_model(self):
        """Network training loop."""

--- a/lib/datasets/coco_evaluator.py
+++ b/lib/datasets/coco_evaluator.py
--- a/lib/datasets/example.py
+++ b/lib/datasets/example.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import cv2
+import numpy as np
+
+from lib.pycocotools import mask_utils
+
+
+class Example(object):
+    """Wrapper for annotated example."""
+
+    def __init__(self, datum):
+        """Create a ``Example``.
+
+        Parameters
+        ----------
+        datum : Dict
+            The data loaded for dataset
+
+        """
+        self._datum = datum
+
+    @property
+    def id(self):
+        """Return the example id.
+
+        Returns
+        -------
+        str
+            The unique id.
+
+        """
+        return self._datum['id']
+
+    @property
+    def image(self):
+        """Return the image data.
+
+        Returns
+        -------
+        numpy.ndarray
+            The image.
+
+        """
+        img = np.frombuffer(self._datum['content'], 'uint8')
+        return cv2.imdecode(img, 3)
+
+    @property
+    def height(self):
+        """Return the image height.
+
+        Returns
+        -------
+        int
+            The height of image.
+
+        """
+        return self._datum['height']
+
+    @property
+    def objects(self):
+        """Return the annotated objects.
+
+        Returns
+        -------
+        Sequence[Dict]
+            The objects.
+
+        """
+        objects = []
+        for ix, obj in enumerate(self._datum['object']):
+            mask = obj.get('mask', None)
+            if 'x3' in obj:
+                poly = np.array([
+                    obj['x1'], obj['y1'],
+                    obj['x2'], obj['y2'],
+                    obj['x3'], obj['y3'],
+                    obj['x4'], obj['y4']
+                ], 'float32')
+                x, y, w, h = cv2.boundingRect(
+                    poly.reshape((-1, 2)))
+                bbox = [x, y, x + w, y + h]
+                mask = mask_utils.poly2bytes(
+                    [poly],
+                    self._datum['height'],
+                    self._datum['width'],
+                )
+            elif 'x2' in obj:
+                bbox = [obj['x1'], obj['y1'], obj['x2'], obj['y2']]
+            elif 'xmin' in obj:
+                bbox = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']]
+            else:
+                bbox = obj['bbox']
+            objects.append({
+                'name': obj['name'],
+                'bbox': bbox,
+                'mask': mask,
+                'difficult': obj.get('difficult', 0),
+            })
+        return objects
+
+    @property
+    def width(self):
+        """Return the image width.
+
+        Returns
+        -------
+        int
+            The width of image.
+
+        """
+        return self._datum['width']
--- a/lib/datasets/factory.py
+++ b/lib/datasets/factory.py
--- a/lib/datasets/imdb.py
+++ b/lib/datasets/imdb.py
@@ -13,84 +13,118 @@
 #
 # ------------------------------------------------------------

+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
 import os
-import shutil
-import dragon
+import uuid

 from lib.core.config import cfg
+from lib.datasets.coco_evaluator import COCOEvaluator
+from lib.datasets.voc_evaluator import VOCEvaluator


 class imdb(object):
-    def __init__(self, name):
-        self._name = name
-        self._num_classes = 0
-        self._classes = []
-
-    @property
-    def name(self):
-        return self._name
+    def __init__(self, source):
+        self._source = source
+        self._num_images = 0
+        self._classes = cfg.MODEL.CLASSES
+        self._class_to_ind = self._class_to_cat_id = \
+            dict(zip(self.classes, range(self.num_classes)))
+        self._salt = str(uuid.uuid4())
+        self.config = {'cleanup': True, 'use_salt': True}

    @property
-    def num_classes(self):
-        return len(self._classes)
+    def cache_path(self):
+        cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache'))
+        if not os.path.exists(cache_path):
+            os.makedirs(cache_path)
+        return cache_path

    @property
    def classes(self):
        return self._classes

    @property
-    def cache_path(self):
-        cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache'))
-        if not os.path.exists(cache_path):
-            os.makedirs(cache_path)
-        return cache_path
+    def class_to_ind(self):
+        return self._class_to_ind

    @property
-    def source(self):
-        excepted_source = os.path.join(self.cache_path, self.name)
-        if not os.path.exists(excepted_source):
-            raise RuntimeError(
-                'Excepted source from: {}, '
-                'but it is not existed.'
-                .format(excepted_source)
-            )
-        return excepted_source
+    def comp_id(self):
+        return '_' + self._salt if self.config['use_salt'] else ''
+
+    @property
+    def num_classes(self):
+        return len(self._classes)

    @property
    def num_images(self):
-        return dragon.io.SeetaRecordDataset(self.source).size
+        return self._num_images
+
+    @property
+    def source(self):
+        return self._source
+
+    def competition_mode(self, on):
+        if on:
+            self.config['use_salt'] = False
+            self.config['cleanup'] = False
+        else:
+            self.config['use_salt'] = True
+            self.config['cleanup'] = True

    def dump_detections(self, all_boxes, output_dir):
-        dataset = dragon.io.SeetaRecordDataset(self.source)
-        for file in ('data.data', 'data.index', 'data.meta'):
-            file = os.path.join(output_dir, file)
-            if os.path.exists(file):
-                os.remove(file)
-        writer = dragon.io.SeetaRecordWriter(output_dir, dataset.protocol)
-        for i in range(len(dataset)):
-            example = dataset.get()
-            example['object'] = []
-            for cls_ind, cls in enumerate(self.classes):
-                if cls == '__background__':
-                    continue
-                detections = all_boxes[cls_ind][i]
-                if len(detections) == 0:
-                    continue
-                for k in range(detections.shape[0]):
-                    if detections[k, -1] < cfg.VIS_TH:
-                        continue
-                    example['object'].append({
-                        'name': cls,
-                        'xmin': float(detections[k][0]),
-                        'ymin': float(detections[k][1]),
-                        'xmax': float(detections[k][2]),
-                        'ymax': float(detections[k][3]),
-                        'difficult': 0,
-                    })
-            writer.write(example)
+        pass

    def evaluate_detections(self, all_boxes, gt_recs, output_dir):
-        pass
+        protocol = cfg.TEST.PROTOCOL
+        if 'voc' in protocol:
+            evaluator = VOCEvaluator(self)
+            evaluator.write_bbox_results(all_boxes, gt_recs, output_dir)
+            if '!' not in protocol:
+                for ovr in (0.5, 0.7):
+                    evaluator.do_bbox_eval(
+                        gt_recs,
+                        output_dir,
+                        iou=ovr,
+                        use_07_metric='2007' in protocol,
+                    )
+        elif 'coco' in protocol:
+            ann_file = cfg.TEST.JSON_FILE
+            evaluator = COCOEvaluator(self, ann_file)
+            if evaluator.coco is None:
+                ann_file = evaluator \
+                    .write_bbox_annotations(
+                        gt_recs, output_dir)
+                evaluator = COCOEvaluator(self, ann_file)
+            res_file = evaluator.write_bbox_results(
+                all_boxes, gt_recs, output_dir)
+            if '!' not in protocol:
+                evaluator.do_bbox_eval(res_file)

-    def evaluate_masks(self, all_boxes, all_masks, output_dir):
-        pass
+    def evaluate_segmentations(self, all_boxes, all_masks, gt_recs, output_dir):
+        protocol = cfg.TEST.PROTOCOL
+        if 'voc' in protocol:
+            evaluator = VOCEvaluator(self)
+            evaluator.write_segm_results(all_boxes, all_masks, output_dir)
+            if '!' not in protocol:
+                for ovr in (0.5, 0.7):
+                    evaluator.do_segm_eval(
+                        gt_recs,
+                        output_dir,
+                        iou=ovr,
+                        use_07_metric='2007' in protocol,
+                    )
+        elif 'coco' in protocol:
+            ann_file = cfg.TEST.JSON_FILE
+            evaluator = COCOEvaluator(self, ann_file)
+            if evaluator.coco is None:
+                ann_file = evaluator \
+                    .write_segm_annotations(
+                        gt_recs, output_dir)
+                evaluator = COCOEvaluator(self, ann_file)
+            res_file = evaluator.write_segm_results(
+                all_boxes, all_masks, gt_recs, output_dir)
+            if '!' not in protocol:
+                evaluator.do_segm_eval(res_file)
--- a/lib/datasets/taas.py
+++ b/lib/datasets/taas.py
--- a/lib/datasets/voc_eval.py
+++ b/lib/datasets/voc_eval.py
@@ -20,15 +20,10 @@ from __future__ import print_function
 import cv2
 import numpy as np

-try:
-    import cPickle
-except:
-    import pickle as cPickle
-
 from lib.core.config import cfg
-from lib.pycocotools.mask_utils import mask_rle2im
-from lib.utils import rotated_boxes
-from lib.utils.boxes import expand_boxes
+from lib.pycocotools import mask_utils
+from lib.utils import boxes as box_util
+from lib.utils.framework import pickle
 from lib.utils.mask import mask_overlap


@@ -66,15 +61,15 @@ def voc_bbox_eval(
    det_file,
    gt_recs,
    cls_name,
-    IoU=0.5,
+    iou=0.5,
    use_07_metric=False,
 ):
    class_recs, n_pos = {}, 0
    for image_name, rec in gt_recs.items():
-        R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
-        bbox = np.array([x['bbox'] for x in R])
-        diff = np.array([x['difficult'] for x in R]).astype(np.bool)
-        det = [False] * len(R)
+        objects = [obj for obj in rec['objects'] if obj['name'] == cls_name]
+        bbox = np.array([x['bbox'] for x in objects])
+        diff = np.array([x['difficult'] for x in objects]).astype(np.bool)
+        det = [False] * len(objects)
        n_pos = n_pos + sum(~diff)
        class_recs[image_name] = {'bbox': bbox, 'difficult': diff, 'det': det}

@@ -100,7 +95,7 @@ def voc_bbox_eval(
    nd = len(image_ids)
    tp, fp = np.zeros(nd), np.zeros(nd)

-    def overlaps4(bb, BBGT):
+    def compute_overlaps(bb, BBGT):
        ixmin = np.maximum(BBGT[:, 0], bb[0])
        iymin = np.maximum(BBGT[:, 1], bb[1])
        ixmax = np.minimum(BBGT[:, 2], bb[2])
@@ -114,9 +109,6 @@ def voc_bbox_eval(
               (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
        return inters / uni

-    def overlaps5(bb, BBGT):
-        return rotated_boxes.bbox_overlaps(bb.reshape((1, 5)), BBGT)[0]
-
    for d in range(nd):
        R = class_recs[image_ids[d]]
        bb = BB[d, :].astype(float)
@@ -124,12 +116,11 @@ def voc_bbox_eval(
        BBGT = R['bbox'].astype(float)

        if BBGT.size > 0:
-            overlaps = overlaps4(bb, BBGT) \
-                if len(bb) == 4 else overlaps5(bb, BBGT)
+            overlaps = compute_overlaps(bb, BBGT)
            ov_max = np.max(overlaps)
            j_max = np.argmax(overlaps)

-        if ov_max > IoU:
+        if ov_max > iou:
            if not R['difficult'][j_max]:
                if not R['det'][j_max]:
                    tp[d] = 1.
@@ -154,23 +145,29 @@ def voc_segm_eval(
    seg_file,
    gt_recs,
    cls_name,
-    IoU=0.5,
+    iou=0.5,
    use_07_metric=False,
 ):
    # 0. Constants
    M = cfg.MRCNN.RESOLUTION
    binary_thresh = cfg.TEST.BINARY_THRESH
-    scale = (M + 2.0) / M
+    scale = (M + 2.) / M
    padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)

    # 1. Get bbox & mask ground truths
    image_names, class_recs, n_pos = [], {}, 0
    for image_name, rec in gt_recs.items():
-        R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
-        bbox = np.array([x['bbox'] for x in R])
-        mask = np.array([mask_rle2im([x['mask']], rec['height'], rec['width'])[0] for x in R])
-        difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
-        det = [False] * len(R)
+        objects = [obj for obj in rec['objects'] if obj['name'] == cls_name]
+        bbox = np.array([x['bbox'] for x in objects])
+        mask = np.array([
+            mask_utils.bytes2img(
+                x['mask'],
+                rec['height'],
+                rec['width']
+            ) for x in objects]
+        )
+        difficult = np.array([x['difficult'] for x in objects]).astype(np.bool)
+        det = [False] * len(objects)
        n_pos = n_pos + sum(~difficult)
        class_recs[image_name] = {
            'bbox': bbox,
@@ -182,9 +179,9 @@ def voc_segm_eval(

    # 2. Get predict pickle file for this class
    with open(det_file, 'rb') as f:
-        boxes_pkl = cPickle.load(f)
+        boxes_pkl = pickle.load(f)
    with open(seg_file, 'rb') as f:
-        masks_pkl = cPickle.load(f)
+        masks_pkl = pickle.load(f)

    # 3. Pre-compute number of total instances to allocate memory
    num_images = len(gt_recs)
@@ -222,7 +219,7 @@ def voc_segm_eval(
    fp = np.zeros((num_pred, 1))
    tp = np.zeros((num_pred, 1))

-    ref_boxes = expand_boxes(new_boxes, scale)
+    ref_boxes = box_util.expand_boxes(new_boxes, scale)
    ref_boxes = ref_boxes.astype(np.int32)

    for i in range(num_pred):
@@ -261,13 +258,19 @@ def voc_segm_eval(
            crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1,
                                     gt_mask_bound[0]:gt_mask_bound[2] + 1]

-            ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask)
+            ov = \
+                mask_overlap(
+                    gt_mask_bound,
+                    pred_mask_bound,
+                    crop_mask,
+                    pred_mask,
+                )

            if ov > ovmax:
                ovmax = ov
                jmax = j

-        if ovmax > IoU:
+        if ovmax > iou:
            if not R['difficult'][jmax]:
                if not R['det'][jmax]:
                    tp[i] = 1.
@@ -281,7 +284,7 @@ def voc_segm_eval(
    fp = np.cumsum(fp)
    tp = np.cumsum(tp)
    rec = tp / float(n_pos)
-    # avoid divide by zero in case the first matches a difficult gt
+    # Avoid divide by zero in case the first matches a difficult gt
    prec = tp / np.maximum(fp + tp, np.finfo(np.float64).eps)
    ap = voc_ap(rec, prec, use_07_metric=use_07_metric)
    return ap
--- a/lib/datasets/voc_evaluator.py
+++ b/lib/datasets/voc_evaluator.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import numpy as np
+
+from lib.datasets import voc_eval
+from lib.utils.framework import pickle
+
+
+class VOCEvaluator(object):
+    def __init__(self, imdb):
+        self.imdb = imdb
+
+    def do_bbox_eval(
+        self,
+        gt_recs,
+        output_dir,
+        iou=0.5,
+        use_07_metric=True,
+    ):
+        aps = []
+        print('~~~~~~ Evaluation IoU@%s ~~~~~~' % str(iou))
+        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
+        for i, cls in enumerate(self.imdb.classes):
+            if cls == '__background__':
+                continue
+            det_file = self.get_results_file(output_dir).format(cls)
+            rec, prec, ap = \
+                voc_eval.voc_bbox_eval(
+                    det_file,
+                    gt_recs, cls,
+                    iou=iou,
+                    use_07_metric=use_07_metric,
+                )
+            if ap > 0:
+                aps += [ap]
+            print('AP for {} = {:.4f}'.format(cls, ap))
+        print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
+
+    def do_segm_eval(
+        self,
+        gt_recs,
+        output_dir,
+        iou=0.5,
+        use_07_metric=True,
+    ):
+        aps = []
+        print('~~~~~~ Evaluation IoU@%s ~~~~~~' % str(iou))
+        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
+        for i, cls in enumerate(self.imdb.classes):
+            if cls == '__background__':
+                continue
+            segm_filename = self.get_results_file(output_dir, 'segm').format(cls)
+            bbox_filename = segm_filename.replace('segmentations', 'detections')
+            ap = voc_eval.voc_segm_eval(
+                bbox_filename,
+                segm_filename,
+                gt_recs, cls,
+                iou=iou,
+                use_07_metric=use_07_metric,
+            )
+            if ap > 0:
+                aps += [ap]
+            print('AP for {} = {:.4f}'.format(cls, ap))
+        print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
+
+    @staticmethod
+    def get_prefix(type='bbox'):
+        if type == 'bbox':
+            return 'detections'
+        elif type == 'segm':
+            return 'segmentations'
+        elif type == 'kpt':
+            return 'keypoints'
+        return ''
+
+    def get_results_file(self, results_folder, type='bbox'):
+        # experiments/model_id/results/detections_<comp_id>_<class_name>.txt
+        if type == 'bbox':
+            filename = self.get_prefix(type) + self.imdb.comp_id + '_{:s}.txt'
+        elif type == 'segm':
+            filename = self.get_prefix(type) + self.imdb.comp_id + '_{:s}.pkl'
+        else:
+            raise ValueError('Type of results can be either bbox or segm.')
+        if not os.path.exists(results_folder):
+            os.makedirs(results_folder)
+        return os.path.join(results_folder, filename)
+
+    def write_bbox_results(self, all_boxes, gt_recs, output_dir):
+        for cls_ind, cls in enumerate(self.imdb.classes):
+            if cls == '__background__':
+                continue
+            print('Writing {} VOC format bbox results'.format(cls))
+            filename = self.get_results_file(output_dir).format(cls)
+            with open(filename, 'wt') as f:
+                ix = 0
+                for image_id, rec in gt_recs.items():
+                    dets = all_boxes[cls_ind][ix]
+                    ix += 1
+                    if len(dets) == 0:
+                        continue
+                    for k in range(dets.shape[0]):
+                        content = '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}' \
+                            .format(image_id, dets[k, -1],
+                                    dets[k, 0] + 1, dets[k, 1] + 1,
+                                    dets[k, 2] + 1, dets[k, 3] + 1)
+                        if dets.shape[1] == 6:
+                            content += ' {:.2f}'.format(dets[k, 4])
+                        f.write(content + '\n')
+
+    def write_segm_results(self, all_boxes, all_masks, output_dir):
+        for cls_inds, cls in enumerate(self.imdb.classes):
+            if cls == '__background__':
+                continue
+            print('Writing {} VOC format segm results'.format(cls))
+            segm_filename = self.get_results_file(output_dir, 'segm').format(cls)
+            bbox_filename = segm_filename.replace('segmentations', 'detections')
+            with open(bbox_filename, 'wb') as f:
+                pickle.dump(all_boxes[cls_inds], f, pickle.HIGHEST_PROTOCOL)
+            with open(segm_filename, 'wb') as f:
+                pickle.dump(all_masks[cls_inds], f, pickle.HIGHEST_PROTOCOL)
--- a/lib/faster_rcnn/__init__.py
+++ b/lib/faster_rcnn/__init__.py
@@ -13,7 +13,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from lib.faster_rcnn.anchor_target_layer import AnchorTargetLayer
-from lib.faster_rcnn.data_layer import DataLayer
-from lib.faster_rcnn.proposal_layer import ProposalLayer
-from lib.faster_rcnn.proposal_target_layer import ProposalTargetLayer
+from lib.faster_rcnn.anchor_target import AnchorTarget
+from lib.faster_rcnn.data_loader import DataLoader
+from lib.faster_rcnn.proposal import Proposal
+from lib.faster_rcnn.proposal_target import ProposalTarget
+from lib.faster_rcnn.utils import generate_grid_anchors
+from lib.faster_rcnn.utils import map_blobs_to_outputs
+from lib.faster_rcnn.utils import map_rois_to_levels
+from lib.faster_rcnn.utils import map_returns_to_blobs
--- a/lib/faster_rcnn/anchor_target.py
+++ b/lib/faster_rcnn/anchor_target.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import numpy.random as npr
+
+from lib.core.config import cfg
+from lib.faster_rcnn.generate_anchors import generate_anchors
+from lib.faster_rcnn.utils import generate_grid_anchors
+from lib.utils import boxes as box_util
+from lib.utils.framework import new_tensor
+
+
+class AnchorTarget(object):
+    """Assign ground-truth targets to anchors."""
+
+    def __init__(self):
+        super(AnchorTarget, self).__init__()
+        # Load the basic configs
+        self.scales = cfg.RPN.SCALES
+        self.strides = cfg.RPN.STRIDES
+        self.ratios = cfg.RPN.ASPECT_RATIOS
+        self.num_strides = len(self.strides)
+        self.allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
+        # Generate base anchors
+        self.base_anchors = []
+        for i in range(self.num_strides):
+            self.base_anchors.append(
+                generate_anchors(
+                    self.strides[i],
+                    self.ratios,
+                    np.array([self.scales[i]])
+                    if self.num_strides > 1
+                    else np.array(self.scales)
+                )
+            )
+
+    def __call__(self, features, gt_boxes, ims_info):
+        num_images = cfg.TRAIN.IMS_PER_BATCH
+        gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
+
+        # Generate grid anchors from base
+        all_anchors = \
+            generate_grid_anchors(
+                features,
+                self.base_anchors,
+                self.strides,
+            )
+        num_anchors = all_anchors.shape[0]
+
+        # Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
+        labels_wide = -np.ones((num_images, num_anchors,), 'float32')
+        bbox_targets_wide = np.zeros((num_images, num_anchors, 4), 'float32')
+        bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
+        bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
+
+        for ix in range(num_images):
+            # GT boxes (x1, y1, x2, y2, label, ...)
+            gt_boxes = gt_boxes_wide[ix]
+            im_info = ims_info[ix]
+            if self.allowed_border >= 0:
+                # Only keep anchors inside the image
+                inds_inside = np.where(
+                    (all_anchors[:, 0] >= -self.allowed_border) &
+                    (all_anchors[:, 1] >= -self.allowed_border) &
+                    (all_anchors[:, 2] < im_info[1] + self.allowed_border) &
+                    (all_anchors[:, 3] < im_info[0] + self.allowed_border))[0]
+                anchors = all_anchors[inds_inside, :]
+            else:
+                inds_inside, anchors = np.arange(num_anchors), all_anchors
+
+            num_inside = len(inds_inside)
+            labels = np.empty((num_inside,), 'float32')
+            labels.fill(-1)
+
+            # Overlaps between the anchors and the gt boxes
+            overlaps = box_util.bbox_overlaps(anchors, gt_boxes)
+            argmax_overlaps = overlaps.argmax(axis=1)
+            max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
+
+            gt_argmax_overlaps = overlaps.argmax(axis=0)
+            gt_max_overlaps = overlaps[gt_argmax_overlaps,
+                                       np.arange(overlaps.shape[1])]
+            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
+
+            # fg label: for each gt, anchor with highest overlap
+            labels[gt_argmax_overlaps] = 1
+
+            # fg label: above threshold IOU
+            labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
+
+            # bg label: below threshold IOU
+            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
+
+            # Subsample positive labels if we have too many
+            num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
+            fg_inds = np.where(labels == 1)[0]
+            if len(fg_inds) > num_fg:
+                disable_inds = npr.choice(fg_inds, len(fg_inds) - num_fg, False)
+                labels[disable_inds] = -1
+                fg_inds = np.where(labels == 1)[0]
+
+            # Subsample negative labels if we have too many
+            num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
+            bg_inds = np.where(labels == 0)[0]
+            if len(bg_inds) > num_bg:
+                disable_inds = npr.choice(bg_inds, len(bg_inds) - num_bg, False)
+                labels[disable_inds] = -1
+
+            bbox_targets = np.zeros((num_inside, 4), 'float32')
+            bbox_targets[fg_inds, :] = \
+                box_util.bbox_transform(
+                    anchors[fg_inds, :],
+                    gt_boxes[argmax_overlaps[fg_inds], :4],
+                )
+            bbox_inside_weights = np.zeros((num_inside, 4), 'float32')
+            bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
+            bbox_outside_weights = np.zeros((num_inside, 4), 'float32')
+            bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
+            bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
+
+            labels_wide[ix, inds_inside] = labels  # label
+            bbox_targets_wide[ix, inds_inside] = bbox_targets
+            bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
+            bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
+
+        if self.num_strides > 1:
+            labels = labels_wide.reshape((num_images, num_anchors))
+            bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
+            bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
+            bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
+        else:
+            A = self.base_anchors[0].shape[0]
+            height, width = features[0].shape[-2:]
+            labels = labels_wide \
+                .reshape((num_images, height, width, A)) \
+                .transpose(0, 3, 1, 2) \
+                .reshape((num_images, num_anchors))
+
+            bbox_targets = bbox_targets_wide \
+                .reshape((num_images, height, width, A * 4)) \
+                .transpose(0, 3, 1, 2)
+
+            bbox_inside_weights = bbox_inside_weights_wide \
+                .reshape((num_images, height, width, A * 4)) \
+                .transpose(0, 3, 1, 2)
+
+            bbox_outside_weights = bbox_outside_weights_wide \
+                .reshape((num_images, height, width, A * 4)) \
+                .transpose(0, 3, 1, 2)
+
+        return {
+            'labels': new_tensor(labels),
+            'bbox_targets': new_tensor(bbox_targets),
+            'bbox_inside_weights': new_tensor(bbox_inside_weights),
+            'bbox_outside_weights': new_tensor(bbox_outside_weights),
+        }
--- a/lib/faster_rcnn/anchor_target_layer.py
+++ b/lib/faster_rcnn/anchor_target_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import numpy.random as npr
-import dragon.vm.torch as torch
-
-from lib.core.config import cfg
-from lib.faster_rcnn.generate_anchors import generate_anchors
-from lib.utils import logger
-from lib.utils.blob import array2tensor
-from lib.utils.boxes import bbox_overlaps
-from lib.utils.boxes import bbox_transform
-from lib.utils.boxes import dismantle_gt_boxes
-
-
-class AnchorTargetLayer(torch.nn.Module):
-    """Assign anchors to ground-truth targets."""
-
-    def __init__(self):
-        super(AnchorTargetLayer, self).__init__()
-        # Load the basic configs
-        # C4 backbone takes the first stride
-        self.scales = cfg.RPN.SCALES
-        self.stride = cfg.RPN.STRIDES[0]
-        self.ratios = cfg.RPN.ASPECT_RATIOS
-
-        # Allow boxes to sit over the edge by a small amount
-        self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
-
-        # Generate base anchors
-        self.base_anchors = generate_anchors(
-            base_size=self.stride,
-            ratios=self.ratios,
-            scales=np.array(self.scales),
-        )
-
-    def forward(self, features, gt_boxes, ims_info):
-        """Produces anchor classification labels and bounding-box regression targets.
-
-        Parameters
-        ----------
-        features : sequence of dragon.vm.torch.Tensor
-            The features of specific conv layers.
-        gt_boxes : numpy.ndarray
-            The packed ground-truth boxes.
-        ims_info : numpy.ndarray
-            The information of input images.
-
-        """
-        num_images = cfg.TRAIN.IMS_PER_BATCH
-        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
-
-        if len(gt_boxes_wide) != num_images:
-            logger.fatal(
-                'Input {} images, got {} slices of gt boxes.'
-                .format(num_images, len(gt_boxes_wide))
-            )
-
-        # Generate proposals from shifted anchors
-        height, width = features[0].shape[-2:]
-        shift_x = np.arange(0, width) * self.stride
-        shift_y = np.arange(0, height) * self.stride
-        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
-        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
-                            shift_x.ravel(), shift_y.ravel())).transpose()
-        # Add A anchors (1, A, 4) to
-        # cell K shifts (K, 1, 4) to get
-        # shift anchors (K, A, 4)
-        # Reshape to (K * A, 4) shifted anchors
-        A = self.base_anchors.shape[0]
-        K = shifts.shape[0]
-        all_anchors = (self.base_anchors.reshape((1, A, 4)) +
-                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
-        all_anchors = all_anchors.reshape((K * A, 4))
-        total_anchors = int(K * A)
-
-        # label: 1 is positive, 0 is negative, -1 is don not care
-        all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32)
-        all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
-        all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
-        all_bbox_outside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
-
-        for ix in range(num_images):
-            # GT boxes (x1, y1, x2, y2, label)
-            gt_boxes = gt_boxes_wide[ix]
-            im_info = ims_info[ix]
-
-            if self._allowed_border >= 0:
-                # Only keep anchors inside the image
-                inds_inside = np.where(
-                    (all_anchors[:, 0] >= -self._allowed_border) &
-                    (all_anchors[:, 1] >= -self._allowed_border) &
-                    (all_anchors[:, 2] < im_info[1] + self._allowed_border) &
-                    (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
-                anchors = all_anchors[inds_inside, :]
-            else:
-                inds_inside = np.arange(all_anchors.shape[0])
-                anchors = all_anchors
-            num_inside = len(inds_inside)
-
-            # label: 1 is positive, 0 is negative, -1 is don't care
-            labels = np.empty((num_inside,), dtype=np.float32)
-            labels.fill(-1)
-
-            # Overlaps between the anchors and the gt boxes
-            overlaps = bbox_overlaps(anchors, gt_boxes)
-            argmax_overlaps = overlaps.argmax(axis=1)
-            max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
-            gt_argmax_overlaps = overlaps.argmax(axis=0)
-            gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
-            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
-
-            if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
-                # Assign bg labels first so that positive labels can clobber them
-                labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
-
-            # fg label: for each gt, anchor with highest overlap
-            labels[gt_argmax_overlaps] = 1
-
-            # fg label: above threshold IOU
-            labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
-
-            if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
-                # Assign bg labels last so that negative labels can clobber positives
-                labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
-
-            # Subsample positive labels if we have too many
-            num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
-            fg_inds = np.where(labels == 1)[0]
-            if len(fg_inds) > num_fg:
-                disable_inds = npr.choice(
-                    fg_inds,
-                    size=len(fg_inds) - num_fg,
-                    replace=False,
-                )
-                labels[disable_inds] = -1
-                fg_inds = np.where(labels == 1)[0]
-
-            # Subsample negative labels if we have too many
-            num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
-            bg_inds = np.where(labels == 0)[0]
-            if len(bg_inds) > num_bg:
-                disable_inds = npr.choice(
-                    bg_inds,
-                    size=len(bg_inds) - num_bg,
-                    replace=False,
-                )
-                labels[disable_inds] = -1
-
-            bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_targets[fg_inds, :] = bbox_transform(
-                ex_rois=anchors[fg_inds, :],
-                gt_rois=gt_boxes[argmax_overlaps[fg_inds], :4],
-            )
-            bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
-            bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
-            bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
-
-            all_labels[ix, inds_inside] = labels  # label
-            all_bbox_targets[ix, inds_inside] = bbox_targets
-            all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights
-            all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights
-
-        labels = all_labels \
-            .reshape((num_images, height, width, A)) \
-            .transpose(0, 3, 1, 2) \
-            .reshape((num_images, total_anchors))
-
-        bbox_targets = all_bbox_targets \
-            .reshape((num_images, height, width, A * 4)) \
-            .transpose(0, 3, 1, 2)
-
-        bbox_inside_weights = all_bbox_inside_weights \
-            .reshape((num_images, height, width, A * 4)) \
-            .transpose(0, 3, 1, 2)
-
-        bbox_outside_weights = all_bbox_outside_weights \
-            .reshape((num_images, height, width, A * 4)) \
-            .transpose(0, 3, 1, 2)
-
-        return {
-            'labels': array2tensor(labels),
-            'bbox_targets': array2tensor(bbox_targets),
-            'bbox_inside_weights': array2tensor(bbox_inside_weights),
-            'bbox_outside_weights': array2tensor(bbox_outside_weights),
-        }
--- a/lib/faster_rcnn/data_layer.py
+++ b/lib/faster_rcnn/data_layer.py
@@ -27,11 +27,11 @@ from lib.utils import logger
 from lib.utils.blob import im_list_to_blob


-class DataLayer(torch.nn.Module):
-    """Generate a mini-batch of data."""
+class DataLoader(object):
+    """Provide mini-batches of data."""

    def __init__(self):
-        super(DataLayer, self).__init__()
+        super(DataLoader, self).__init__()
        database = get_imdb(cfg.TRAIN.DATABASE)
        self.data_batch = DataBatch(**{
            'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
@@ -39,12 +39,11 @@ class DataLayer(torch.nn.Module):
            'shuffle': cfg.TRAIN.USE_SHUFFLE,
            'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
            'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
+            'num_transformers': cfg.TRAIN.NUM_WORKERS,
        })

-    def forward(self):
-        # Get an array blob from the Queue
+    def __call__(self):
        outputs = self.data_batch.get()
-        # Zero-Copy the array to tensor
        outputs['data'] = torch.from_numpy(outputs['data'])
        return outputs

@@ -59,14 +58,16 @@ class DataBatch(mp.Process):
        ----------
        dataset : lambda
            The creator of a dataset.
+        classes : Sequence[str]
+            The class names.
        shuffle : bool, optional, default=False
            Whether to shuffle the data.
        num_chunks : int, optional, default=0
            The number of chunks to split.
        batch_size : int, optional, default=2
            The size of a mini-batch.
-        prefetch : int, optional, default=5
-            The prefetch count.
+        num_transformers : int, optional, default=3
+            The number of workers to transform data.

        """
        super(DataBatch, self).__init__()
@@ -83,20 +84,10 @@ class DataBatch(mp.Process):
        self._prefetch = kwargs.get('prefetch', 5)
        self._batch_size = kwargs.get('batch_size', 2)
        self._num_readers = kwargs.get('num_readers', 1)
-        self._num_transformers = kwargs.get('num_transformers', -1)
-        self._max_transformers = kwargs.get('max_transformers', 3)
+        self._num_transformers = kwargs.get('num_transformers', 3)
        self._num_fetchers = kwargs.get('num_fetchers', 1)
        self.daemon = True

-        # Io-Aware Policy
-        if self._num_transformers == -1:
-            self._num_transformers = 2
-            # Add 1 transformer for color augmentation
-            if cfg.TRAIN.USE_COLOR_JITTER:
-                self._num_transformers += 1
-        self._num_transformers = min(
-            self._num_transformers, self._max_transformers)
-
        # Initialize queues
        num_batches = self._prefetch * self._num_readers
        self.Q1 = mp.Queue(num_batches * self._batch_size)

--- a/lib/faster_rcnn/data_transformer.py
+++ b/lib/faster_rcnn/data_transformer.py
@@ -19,9 +19,9 @@ import cv2
 import numpy as np

 from lib.core.config import cfg
-from lib.utils import rotated_boxes
+from lib.datasets.example import Example
+from lib.utils import boxes as box_util
 from lib.utils.blob import prep_im_for_blob
-from lib.utils.boxes import flip_boxes
 from lib.utils.image import get_image_with_target_size


@@ -44,32 +44,32 @@ class DataTransformer(multiprocessing.Process):
        apply_flip=False,
        offsets=None,
    ):
-        n_objects = 0
+        objects, n_objects = example.objects, 0
+        height, width = example.height, example.width
        if not self._use_diff:
-            for obj in example['object']:
+            for obj in objects:
                if obj.get('difficult', 0) == 0:
                    n_objects += 1
        else:
-            n_objects = len(example['object'])
+            n_objects = len(objects)

        roi_dict = {
-            'width': example['width'],
-            'height': example['height'],
-            'gt_classes': np.zeros((n_objects,), 'int32'),
            'boxes': np.zeros((n_objects, 4), 'float32'),
+            'gt_classes': np.zeros((n_objects,), 'int32'),
        }

        # Filter the difficult instances
        object_idx = 0
-        for obj in example['object']:
+        for obj in objects:
            if not self._use_diff and \
                    obj.get('difficult', 0) > 0:
                continue
+            bbox = obj['bbox']
            roi_dict['boxes'][object_idx, :] = [
-                max(0, obj['xmin']),
-                max(0, obj['ymin']),
-                min(obj['xmax'], example['width'] - 1),
-                min(obj['ymax'], example['height'] - 1),
+                max(0, bbox[0]),
+                max(0, bbox[1]),
+                min(bbox[2], width - 1),
+                min(bbox[3], height - 1),
            ]
            roi_dict['gt_classes'][object_idx] = \
                self._class_to_ind[obj['name']]
@@ -77,8 +77,11 @@ class DataTransformer(multiprocessing.Process):

        # Flip the boxes if necessary
        if apply_flip:
-            roi_dict['boxes'] = flip_boxes(
-                roi_dict['boxes'], roi_dict['width'])
+            roi_dict['boxes'] = \
+                box_util.flip_boxes(
+                    roi_dict['boxes'],
+                    width,
+                )

        # Scale the boxes to the detecting scale
        roi_dict['boxes'] *= im_scale
@@ -94,61 +97,32 @@ class DataTransformer(multiprocessing.Process):

        return roi_dict

-    @classmethod
-    def get_image(cls, example):
-        img = np.frombuffer(example['content'], np.uint8)
-        return cv2.imdecode(img, -1)
-
-    @classmethod
-    def get_annotations(cls, example):
-        objects = []
-        for ix, obj in enumerate(example['object']):
-            if 'x3' in obj:
-                bbox = rotated_boxes.vertices2box(
-                    [obj['x1'], obj['y1'],
-                     obj['x2'], obj['y2'],
-                     obj['x3'], obj['y3'],
-                     obj['x4'], obj['y4']]
-                )
-            elif 'x2' in obj:
-                bbox = [obj['x1'], obj['y1'], obj['x2'], obj['y2']]
-            elif 'xmin' in obj:
-                bbox = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']]
-            else:
-                bbox = obj['bbox']
-            objects.append({
-                'name': obj['name'],
-                'difficult': obj.get('difficult', 0),
-                'bbox': bbox,
-            })
-        return example['id'], objects
-
    def get(self, example):
-        img = np.frombuffer(example['content'], np.uint8)
-        img = cv2.imdecode(img, 1)
+        example = Example(example)
+        img = example.image

        # Scale
-        scale_indices = np.random.randint(len(cfg.TRAIN.SCALES))
-        target_size = cfg.TRAIN.SCALES[scale_indices]
-        im, im_scale, jitter = prep_im_for_blob(img, target_size, cfg.TRAIN.MAX_SIZE)
+        max_size = cfg.TRAIN.MAX_SIZE
+        target_size = cfg.TRAIN.SCALES[np.random.randint(len(cfg.TRAIN.SCALES))]
+        img, im_scale, jitter = prep_im_for_blob(img, target_size, max_size)

        # Flip
        apply_flip = False
        if self._use_flipped:
            if np.random.randint(2) > 0:
-                im = im[:, ::-1, :]
+                img = img[:, ::-1]
                apply_flip = True

        # Random Crop or RandomPad
        offsets = None
        if cfg.TRAIN.MAX_SIZE > 0:
-            if jitter != 1.0:
+            if jitter != 1:
                # To a rectangle (scale, max_size)
-                target_size = (np.array(im.shape[0:2]) / jitter).astype(np.int)
-                im, offsets = get_image_with_target_size(target_size, im)
+                target_size = (np.array(img.shape[:2]) / jitter).astype(np.int32)
+                img, offsets = get_image_with_target_size(target_size, img)
        else:
            # To a square (target_size, target_size)
-            im, offsets = get_image_with_target_size([target_size] * 2, im)
+            img, offsets = get_image_with_target_size([target_size] * 2, img)

        # Example -> RoIDict
        roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets)
@@ -158,7 +132,7 @@ class DataTransformer(multiprocessing.Process):
        gt_boxes = np.empty((len(roi_dict['gt_classes']), 5), dtype=np.float32)
        gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']

-        return im, im_scale, gt_boxes
+        return img, im_scale, gt_boxes

    def run(self):
        # Fix the process-local random seed

--- a/lib/faster_rcnn/proposal.py
+++ b/lib/faster_rcnn/proposal.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+import numpy as np
+
+from lib.core.config import cfg
+from lib.faster_rcnn.generate_anchors import generate_anchors
+from lib.faster_rcnn.utils import generate_grid_anchors
+from lib.nms import nms_wrapper
+from lib.utils import boxes as box_util
+
+
+class Proposal(object):
+    """Compute proposals by applying transformations anchors."""
+
+    def __init__(self):
+        super(Proposal, self).__init__()
+        # Load the basic configs
+        self.scales = cfg.RPN.SCALES
+        self.strides = cfg.RPN.STRIDES
+        self.ratios = cfg.RPN.ASPECT_RATIOS
+        self.num_strides = len(self.strides)
+        self.defaults = collections.OrderedDict([
+            ('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
+        ])
+        # Generate base anchors
+        self.base_anchors = []
+        for i in range(self.num_strides):
+            self.base_anchors.append(
+                generate_anchors(
+                    self.strides[i],
+                    self.ratios,
+                    np.array([self.scales[i]])
+                    if self.num_strides > 1
+                    else np.array(self.scales)
+                )
+            )
+
+    def __call__(self, features, cls_prob, bbox_pred, ims_info):
+        pre_nms_top_n = cfg.TRAIN.RPN_PRE_NMS_TOP_N
+        post_nms_top_n = cfg.TRAIN.RPN_POST_NMS_TOP_N
+        nms_thresh = cfg.TRAIN.RPN_NMS_THRESH
+        min_size = cfg.TRAIN.RPN_MIN_SIZE
+
+        # Get resources
+        num_images = ims_info.shape[0]
+        all_anchors = \
+            generate_grid_anchors(
+                features,
+                self.base_anchors,
+                self.strides,
+            )
+
+        # Prepare for the outputs
+        batch_rois = []
+        cls_prob = cls_prob.numpy(True)
+        bbox_pred = bbox_pred.numpy(True)
+        if self.num_strides > 1:
+            # (?, 4, A * K) -> (?, A * K, 4)
+            bbox_pred = bbox_pred.transpose((0, 2, 1))
+        else:
+            # (?, A * 4, H, W) -> (?, H, W, A * 4)
+            cls_prob = cls_prob.transpose((0, 2, 3, 1))
+            bbox_pred = bbox_pred.transpose((0, 2, 3, 1))
+
+        # Extract RoIs separately
+        for ix in range(num_images):
+            # [?, N] -> [? * N, 1]
+            scores = cls_prob[ix].reshape((-1, 1))
+            if self.num_strides > 1:
+                deltas = bbox_pred[ix]
+            else:
+                deltas = bbox_pred[ix].reshape((-1, 4))
+
+            if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
+                order = np.argsort(-scores.squeeze())
+            else:
+                # Avoid sorting possibly large arrays; First partition to get top K
+                # unsorted and then sort just those (~20x faster for 200k scores)
+                inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
+                order = np.argsort(-scores[inds].squeeze())
+                order = inds[order]
+
+            deltas = deltas[order]
+            anchors = all_anchors[order]
+            scores = scores[order]
+
+            # Convert anchors into proposals via bbox transformations
+            proposals = box_util.bbox_transform_inv(anchors, deltas)
+
+            # Clip predicted boxes to image
+            proposals = box_util.clip_tiled_boxes(proposals, ims_info[ix, :2])
+
+            # Remove predicted boxes with either height or width < threshold
+            keep = box_util.filter_boxes(proposals, min_size * ims_info[ix, 2])
+            proposals = proposals[keep, :]
+            scores = scores[keep]
+
+            # Apply nms (e.g. threshold = 0.7)
+            # Take after_nms_topN (e.g. 300)
+            # Return the top proposals (-> RoIs top)
+            keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
+            if post_nms_top_n > 0:
+                keep = keep[:post_nms_top_n]
+            proposals = proposals[keep, :]
+
+            # Attach RoIs with batch indices
+            batch_inds = np.empty((proposals.shape[0], 1), 'float32')
+            batch_inds.fill(ix)
+            rpn_rois = np.hstack((batch_inds, proposals.astype('float32', copy=False)))
+            batch_rois.append(rpn_rois)
+
+        # Merge RoIs into a blob
+        return np.concatenate(batch_rois, 0)
--- a/lib/faster_rcnn/proposal_layer.py
+++ b/lib/faster_rcnn/proposal_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# --------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import dragon.vm.torch as torch
-import numpy as np
-
-from lib.core.config import cfg
-from lib.faster_rcnn.generate_anchors import generate_anchors
-from lib.nms import nms_wrapper
-from lib.utils.blob import array2tensor
-from lib.utils.boxes import bbox_transform_inv
-from lib.utils.boxes import clip_tiled_boxes
-from lib.utils.boxes import filter_boxes
-
-
-class ProposalLayer(torch.nn.Module):
-    """Compute proposals by applying transformations to anchors."""
-
-    def __init__(self):
-        super(ProposalLayer, self).__init__()
-        # Load the basic configs
-        self.scales = cfg.RPN.SCALES
-        self.stride = cfg.RPN.STRIDES[0]
-        self.ratios = cfg.RPN.ASPECT_RATIOS
-
-        # Generate base anchors
-        self.base_anchors = generate_anchors(
-            base_size=self.stride,
-            ratios=self.ratios,
-            scales=np.array(self.scales),
-        )
-
-    def forward(self, features, cls_prob, bbox_pred, ims_info):
-        cfg_key = 'TRAIN' if self.training else 'TEST'
-        pre_nms_top_n = cfg[cfg_key].RPN_PRE_NMS_TOP_N
-        post_nms_top_n = cfg[cfg_key].RPN_POST_NMS_TOP_N
-        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
-        min_size = cfg[cfg_key].RPN_MIN_SIZE
-
-        # Get resources
-        num_images = ims_info.shape[0]
-
-        # Generate proposals from shifted anchors
-        height, width = cls_prob.shape[-2:]
-        shift_x = np.arange(0, width) * self.stride
-        shift_y = np.arange(0, height) * self.stride
-        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
-        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
-                            shift_x.ravel(), shift_y.ravel())).transpose()
-        # Add A anchors (1, A, 4) to
-        # cell K shifts (K, 1, 4) to get
-        # shift anchors (K, A, 4)
-        # Reshape to (K * A, 4) shifted anchors
-        A = self.base_anchors.shape[0]
-        K = shifts.shape[0]
-        anchors = \
-            self.base_anchors.reshape((1, A, 4)) + \
-            shifts.reshape((1, K, 4)).transpose((1, 0, 2))
-        all_anchors = anchors.reshape((K * A, 4))
-
-        # Prepare for the outputs
-        batch_rois = []
-        # scores & deltas are (1, A, H, W) format
-        # Transpose to (1, H, W, A)
-        batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1))
-        batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1))
-
-        # Extract RoIs separately
-        for ix in range(num_images):
-            scores = batch_scores[ix].reshape((-1, 1))  # [1, n] -> [n, 1]
-            deltas = batch_deltas[ix].reshape((-1, 4))
-
-            if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
-                order = np.argsort(-scores.squeeze())
-            else:
-                # Avoid sorting possibly large arrays; First partition to get top K
-                # unsorted and then sort just those (~20x faster for 200k scores)
-                inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
-                order = np.argsort(-scores[inds].squeeze())
-                order = inds[order]
-
-            deltas = deltas[order]
-            anchors = all_anchors[order]
-            scores = scores[order]
-
-            # 1. Convert anchors into proposals via bbox transformations
-            proposals = bbox_transform_inv(anchors, deltas)
-
-            # 2. Clip predicted boxes to image
-            proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
-
-            # 3. remove predicted boxes with either height or width < threshold
-            # (NOTE: convert min_size to input image scale stored in im_info[2])
-            keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
-            proposals = proposals[keep, :]
-            scores = scores[keep]
-
-            # 6. Apply nms (e.g. threshold = 0.7)
-            # 7. Take after_nms_top_n (e.g. 300)
-            # 8. Return the top proposals (-> RoIs top)
-            keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
-            if post_nms_top_n > 0:
-                keep = keep[:post_nms_top_n]
-            proposals = proposals[keep, :]
-
-            # Output rois blob
-            batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
-            batch_inds.fill(ix)
-            rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
-            batch_rois.append(rpn_rois)
-
-        # Merge RoIs into a blob
-        rpn_rois = np.concatenate(batch_rois, axis=0)
-        if cfg_key == 'TRAIN':
-            return rpn_rois
-        else:
-            return [array2tensor(rpn_rois)]
--- a/lib/faster_rcnn/proposal_target.py
+++ b/lib/faster_rcnn/proposal_target.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+import numpy as np
+import numpy.random as npr
+
+from lib.core.config import cfg
+from lib.faster_rcnn.utils import map_blobs_to_outputs
+from lib.faster_rcnn.utils import map_returns_to_blobs
+from lib.faster_rcnn.utils import map_rois_to_levels
+from lib.utils import boxes as box_util
+from lib.utils.framework import new_tensor
+
+
+class ProposalTarget(object):
+    """Assign ground-truth targets to proposals."""
+
+    def __init__(self):
+        super(ProposalTarget, self).__init__()
+        self.num_strides = len(cfg.RPN.STRIDES)
+        self.num_classes = cfg.MODEL.NUM_CLASSES
+        self.defaults = collections.OrderedDict([
+            ('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
+            ('labels', np.array([-1], 'float32')),
+            ('bbox_targets', np.zeros((1, self.num_classes * 4), 'float32')),
+            ('bbox_inside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
+            ('bbox_outside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
+        ])
+
+    def __call__(self, rpn_rois, gt_boxes):
+        num_images = cfg.TRAIN.IMS_PER_BATCH
+        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
+        all_rois = rpn_rois
+        # GT boxes (x1, y1, x2, y2, label)
+        gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
+
+        # Prepare for the outputs
+        keys = self.defaults.keys()
+        blobs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
+
+        # Generate targets separately
+        for ix in range(num_images):
+            gt_boxes = gt_boxes_wide[ix]
+            # Extract proposals for this image
+            rois = all_rois[np.where(all_rois[:, 0].astype('int32') == ix)[0]]
+            # Include ground-truth boxes in the set of candidate rois
+            inds = np.ones((gt_boxes.shape[0], 1), gt_boxes.dtype) * ix
+            rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, :4]))))
+            # Sample a batch of RoIs for training
+            rois_per_image = cfg.TRAIN.BATCH_SIZE
+            fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
+            map_returns_to_blobs(
+                sample_rois(
+                    rois,
+                    gt_boxes,
+                    rois_per_image,
+                    fg_rois_per_image,
+                    self.num_classes,
+                ), blobs, keys,
+            )
+
+        # Stack into continuous blobs
+        for k, v in blobs.items():
+            blobs[k] = np.concatenate(blobs[k], 0)
+
+        if self.num_strides > 1:
+            # Distribute RoIs into pyramids
+            min_lvl = cfg.FPN.ROI_MIN_LEVEL
+            max_lvl = cfg.FPN.ROI_MAX_LEVEL
+            k = max_lvl - min_lvl + 1
+            levels = map_rois_to_levels(blobs['rois'], min_lvl, max_lvl)
+            outputs = map_blobs_to_outputs(
+                blobs,
+                self.defaults,
+                [np.where(levels == (i + min_lvl))[0] for i in range(k)],
+            )
+            return {
+                'rois': [new_tensor(outputs['rois'][i]) for i in range(k)],
+                'labels': new_tensor(np.concatenate(outputs['labels'], 0)),
+                'bbox_targets': new_tensor(np.vstack(outputs['bbox_targets'])),
+                'bbox_inside_weights': new_tensor(np.vstack(outputs['bbox_inside_weights'])),
+                'bbox_outside_weights': new_tensor(np.vstack(outputs['bbox_outside_weights'])),
+            }
+        else:
+            # Return RoIs directly for CX-stride
+            return {
+                'rois': [new_tensor(blobs['rois'])],
+                'labels': new_tensor(blobs['labels']),
+                'bbox_targets': new_tensor(blobs['bbox_targets']),
+                'bbox_inside_weights': new_tensor(blobs['bbox_inside_weights']),
+                'bbox_outside_weights': new_tensor(blobs['bbox_outside_weights']),
+            }
+
+
+def get_targets(ex_rois, gt_rois, gt_labels, num_classes):
+    """Compute bounding-box regression targets for an image."""
+    assert ex_rois.shape[0] == gt_rois.shape[0]
+    assert ex_rois.shape[1] == 4
+    assert gt_rois.shape[1] == 4
+    # Compute bbox regression targets
+    fg_inds = np.where(gt_labels > 0)[0]
+    targets = box_util.bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
+    bbox_targets = np.zeros((ex_rois.shape[0], 4 * num_classes), 'float32')
+    inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
+    for i in fg_inds:
+        start = int(4 * gt_labels[i])
+        bbox_targets[i, start:start + 4] = targets[i]
+        inside_weights[i, start:start + 4] = (1., 1., 1., 1.)
+    outside_weights = np.array(inside_weights > 0).astype('float32')
+    return bbox_targets, inside_weights, outside_weights
+
+
+def sample_rois(
+    all_rois,
+    gt_boxes,
+    num_rois,
+    num_fg_rois,
+    num_classes,
+):
+    """Sample a batch of RoIs comprising foreground and background examples."""
+    overlaps = box_util.bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
+    gt_assignment = overlaps.argmax(axis=1)
+    max_overlaps = overlaps.max(axis=1)
+    labels = gt_boxes[gt_assignment, 4]
+
+    # Select foreground RoIs as those with >= FG_THRESH overlap
+    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
+    fg_rois_per_this_image = int(min(num_fg_rois, fg_inds.size))
+    # Sample foreground regions without replacement
+    if fg_inds.size > 0:
+        fg_inds = npr.choice(fg_inds, fg_rois_per_this_image, False)
+
+    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
+    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
+                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
+    # Compute number of background RoIs to take from this image
+    bg_rois_per_this_image = num_rois - fg_rois_per_this_image
+    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
+    # Sample background regions without replacement
+    if bg_inds.size > 0:
+        bg_inds = npr.choice(bg_inds, bg_rois_per_this_image, False)
+
+    # The indices that we're selecting (both fg and bg)
+    keep_inds = np.append(fg_inds, bg_inds)
+    # Select sampled values from various arrays
+    rois, labels = all_rois[keep_inds], labels[keep_inds]
+    # Clamp labels for the background RoIs to 0
+    labels[fg_rois_per_this_image:] = 0
+    # Clamp the image indices for the background RoIs to -1
+    rois[fg_rois_per_this_image:][0] = -1
+
+    # Compute the target from RoIs
+    outputs = [rois, labels]
+    outputs += get_targets(
+        rois[:, 1:5],
+        gt_boxes[gt_assignment[keep_inds], :4],
+        labels,
+        num_classes,
+    )
+    return outputs
--- a/lib/faster_rcnn/proposal_target_layer.py
+++ b/lib/faster_rcnn/proposal_target_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# --------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import dragon.vm.torch as torch
-import numpy as np
-import numpy.random as npr
-
-from lib.core.config import cfg
-from lib.utils.blob import array2tensor
-from lib.utils.boxes import bbox_overlaps
-from lib.utils.boxes import bbox_transform
-from lib.utils.boxes import dismantle_gt_boxes
-
-
-class ProposalTargetLayer(torch.nn.Module):
-    """Assign object detection proposals to ground-truth targets."""
-
-    def __init__(self):
-        super(ProposalTargetLayer, self).__init__()
-        self.num_classes = cfg.MODEL.NUM_CLASSES
-
-    def forward(self, rpn_rois, gt_boxes):
-        num_images = cfg.TRAIN.IMS_PER_BATCH
-        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
-        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
-        all_rois = rpn_rois
-        # GT boxes (x1, y1, x2, y2, label)
-        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
-
-        # Prepare for the outputs
-        keys = ['labels', 'rois', 'bbox_targets',
-                'bbox_inside_weights', 'bbox_outside_weights']
-        batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
-
-        # Generate targets separately
-        for ix in range(num_images):
-            gt_boxes = gt_boxes_wide[ix]
-            # Extract proposals for this image
-            rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
-            # Include ground-truth boxes in the set of candidate rois
-            inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
-            rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
-            # Sample a batch of rois for training
-            rois_per_image = cfg.TRAIN.BATCH_SIZE
-            fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
-            labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
-                rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
-            bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
-            _fmap_batch([
-                labels,
-                rois, 
-                bbox_targets,
-                bbox_inside_weights,
-                bbox_outside_weights],
-                batch_outputs,
-                keys,
-            )
-
-        # Merge targets into blobs
-        for k, v in batch_outputs.items():
-            batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
-
-        return {
-            'rois': [array2tensor(batch_outputs['rois'])],
-            'labels': array2tensor(batch_outputs['labels']),
-            'bbox_targets': array2tensor(batch_outputs['bbox_targets']),
-            'bbox_inside_weights': array2tensor(batch_outputs['bbox_inside_weights']),
-            'bbox_outside_weights': array2tensor(batch_outputs['bbox_outside_weights']),
-        }
-
-
-def _get_bbox_regression_labels(bbox_target_data, num_classes):
-    """Bounding-box regression targets (bbox_target_data) are stored in a
-    compact form N x (class, tx, ty, tw, th)
-
-    This function expands those targets into the 4-of-4*K representation used
-    by the network (i.e. only one class has non-zero targets).
-
-    Returns:
-        bbox_target (ndarray): N x 4K blob of regression targets
-        bbox_inside_weights (ndarray): N x 4K blob of loss weights
-
-    """
-    clss = bbox_target_data[:, 0]
-    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
-    bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
-    inds = np.where(clss > 0)[0]
-    for ind in inds:
-        cls = clss[ind]
-        start = 4 * cls
-        end = start + 4
-        bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
-        bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
-
-    return bbox_targets, bbox_inside_weights
-
-
-def _compute_targets(ex_rois, gt_rois, labels):
-    """Compute bounding-box regression targets for an image."""
-    assert ex_rois.shape[0] == gt_rois.shape[0]
-    assert ex_rois.shape[1] == 4
-    assert gt_rois.shape[1] == 4
-    targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
-    return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
-
-
-def _sample_rois(
-    all_rois,
-    gt_boxes,
-    fg_rois_per_image,
-    rois_per_image,
-    num_classes,
-):
-    """Generate a random sample of RoIs."""
-    overlaps = bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
-    gt_assignment = overlaps.argmax(axis=1)
-    max_overlaps = overlaps.max(axis=1)
-    labels = gt_boxes[gt_assignment, 4]
-
-    # Select foreground RoIs as those with >= FG_THRESH overlap
-    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
-    # Guard against the case when an image has fewer than fg_rois_per_image
-    # foreground RoIs
-    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
-    # Sample foreground regions without replacement
-    if fg_inds.size > 0:
-        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
-
-    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
-    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
-                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
-    # Compute number of background RoIs to take from this image (guarding
-    # against there being fewer than desired)
-    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
-    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
-    # Sample background regions without replacement
-    if bg_inds.size > 0:
-        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
-
-    # The indices that we're selecting (both fg and bg)
-    keep_inds = np.append(fg_inds, bg_inds)
-    # Select sampled values from various arrays:
-    labels = labels[keep_inds]
-    # Clamp labels for the background RoIs to 0
-    labels[fg_rois_per_this_image:] = 0
-    rois = all_rois[keep_inds]
-
-    bbox_target_data = _compute_targets(
-        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
-
-    bbox_targets, bbox_inside_weights = \
-        _get_bbox_regression_labels(bbox_target_data, num_classes)
-
-    return labels, rois, bbox_targets, bbox_inside_weights
-
-
-def _fmap_batch(inputs, outputs, keys):
-    for i, key in enumerate(keys):
-        outputs[key].append(inputs[i])
--- a/lib/faster_rcnn/test.py
+++ b/lib/faster_rcnn/test.py
@@ -17,14 +17,13 @@ import dragon.vm.torch as torch
 import numpy as np

 from lib.core.config import cfg
+from lib.modeling.detector import new_detector
 from lib.nms import nms_wrapper
+from lib.utils import boxes as box_util
 from lib.utils import framework
 from lib.utils import time_util
 from lib.utils.blob import im_list_to_blob
-from lib.utils.boxes import bbox_transform_inv
-from lib.utils.boxes import clip_tiled_boxes
 from lib.utils.image import scale_image
-from lib.utils.vis import vis_one_image


 def im_detect(detector, raw_image):
@@ -39,69 +38,65 @@ def im_detect(detector, raw_image):
    ], dtype=np.float32)

    # Do Forward
-    if not hasattr(detector, 'frozen_graph'):
-        inputs = {
-            'data': torch.from_numpy(blobs['data']),
-            'ims_info': torch.from_numpy(blobs['ims_info']),
-        }
+    if not hasattr(detector, 'graph'):
+        with framework.new_workspace().as_default():
+            data = torch.from_numpy(blobs['data'])
+            ims_info = torch.from_numpy(blobs['ims_info'])
            with torch.no_grad():
-            with torch.jit.Recorder(retain_ops=True):
+                with torch.jit.Tracer(retain_ops=True):
+                    inputs = {'data': data, 'ims_info': ims_info}
                    outputs = detector.forward(inputs)
-                detector.frozen_graph = \
-                    framework.FrozenGraph(
-                        {'data': inputs['data'],
-                         'ims_info': inputs['ims_info']},
-                        {'rois': outputs['rois'],
+                    detector.graph = \
+                        framework.Graph(inputs, {
+                            'rois': outputs['rois'],
                            'cls_prob': outputs['cls_prob'],
-                         'bbox_pred': outputs['bbox_pred']},
-                    )
-    outputs = detector.frozen_graph(**blobs)
+                            'bbox_pred': outputs['bbox_pred']
+                        })
+    outputs = detector.graph(**blobs)

    # Decode results
-    batch_rois = outputs['rois']
-    batch_scores = outputs['cls_prob']
-    batch_deltas = outputs['bbox_pred']
-    batch_boxes = bbox_transform_inv(
-        batch_rois[:, 1:5],
-        batch_deltas,
+    rois = outputs['rois']
+    scores, boxes, batch_inds = [], [], []
+    pred_boxes = \
+        box_util.bbox_transform_inv(
+            rois[:, 1:5],
+            outputs['bbox_pred'],
            cfg.BBOX_REG_WEIGHTS,
        )

-    scores_wide, boxes_wide = [], []
+    for i in range(len(ims)):
+        inds = np.where(rois[:, 0].astype(np.int32) == i)[0]
+        im_boxes = pred_boxes[inds] / ims_scale[i]
+        scores.append(outputs['cls_prob'][inds])
+        boxes.append(box_util.clip_tiled_boxes(im_boxes, raw_image.shape))

-    for im_idx in range(len(ims)):
-        indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0]
-        boxes = batch_boxes[indices]
-        boxes /= ims_scale[im_idx]
-        clip_tiled_boxes(boxes, raw_image.shape)
-        scores_wide.append(batch_scores[indices])
-        boxes_wide.append(boxes)
+    return (
+        np.vstack(scores) if len(ims) > 0 else scores[0],
+        np.vstack(boxes) if len(ims) > 0 else boxes[0],
+    )

-    return (np.vstack(scores_wide), np.vstack(boxes_wide)) \
-        if len(scores_wide) > 1 else (scores_wide[0], boxes_wide[0])

+def test_net(weights, num_classes, q_in, q_out, device):
+    num_classes, cfg.GPU_ID = num_classes, device
+    detector = new_detector(device, weights)

-def test_net(detector, server):
-    # Load settings
-    classes = server.classes
-    num_images = server.num_images
-    num_classes = server.num_classes
-    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
+    _t = time_util.new_timers('im_detect', 'misc')

-    _t = {'im_detect': time_util.Timer(), 'misc': time_util.Timer()}
+    while True:
+        idx, raw_image = q_in.get()
+        if raw_image is None:
+            break

-    for i in range(num_images):
-        image_id, raw_image = server.get_image()
+        boxes_this_image = [[]]

        with _t['im_detect'].tic_and_toc():
            scores, boxes = im_detect(detector, raw_image)

        _t['misc'].tic()
-        boxes_this_image = [[]]
        for j in range(1, num_classes):
            inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
            cls_scores = scores[inds, j]
-            cls_boxes = boxes[inds, j*4:(j+1)*4]
+            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_detections = np.hstack(
                (cls_boxes, cls_scores[:, np.newaxis])
            ).astype(np.float32, copy=False)
@@ -119,43 +114,16 @@ def test_net(detector, server):
                    force_cpu=True,
                )
            cls_detections = cls_detections[keep, :]
-            all_boxes[j][i] = cls_detections
            boxes_this_image.append(cls_detections)
-
-        if cfg.VIS or cfg.VIS_ON_FILE:
-            vis_one_image(
-                raw_image,
-                classes,
-                boxes_this_image,
-                thresh=cfg.VIS_TH,
-                box_alpha=1.,
-                show_class=True,
-                filename=server.get_save_filename(image_id),
-            )
-
-        # Limit to max_per_image detections *over all classes*
-        if cfg.TEST.DETECTIONS_PER_IM > 0:
-            image_scores = []
-            for j in range(1, num_classes):
-                if len(all_boxes[j][i]) < 1:
-                    continue
-                image_scores.append(all_boxes[j][i][:, -1])
-            if len(image_scores) > 0:
-                image_scores = np.hstack(image_scores)
-            if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
-                image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
-                for j in range(1, num_classes):
-                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
-                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

-        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
-              .format(i + 1, num_images,
-                      _t['im_detect'].average_time,
-                      _t['misc'].average_time),
-              end='')
-
-    print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
-
-    print('Evaluating detections')
-    server.evaluate_detections(all_boxes)
+        q_out.put((
+            idx,
+            {
+                'im_detect': _t['im_detect'].average_time,
+                'misc': _t['misc'].average_time,
+            },
+            {
+                'boxes': boxes_this_image,
+            },
+        ))
--- a/lib/faster_rcnn/utils.py
+++ b/lib/faster_rcnn/utils.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import numpy as np
+
+from lib.core.config import cfg
+
+
+def generate_grid_anchors(features, base_anchors, strides):
+    num_strides = len(strides)
+    if len(features) != num_strides:
+        raise ValueError(
+            'Given %d features for %d strides.'
+            % (len(features), num_strides)
+        )
+    # Generate proposals from shifted anchors
+    anchors_to_pack = []
+    for i in range(len(features)):
+        height, width = features[i].shape[-2:]
+        shift_x = np.arange(0, width) * strides[i]
+        shift_y = np.arange(0, height) * strides[i]
+        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                            shift_x.ravel(), shift_y.ravel())).transpose()
+        # Add A anchors (1, A, 4) to
+        # cell K shifts (K, 1, 4) to get
+        # shift anchors (K, A, 4)
+        # Reshape to (K * A, 4) shifted anchors
+        A = base_anchors[i].shape[0]
+        K = shifts.shape[0]
+        anchors = (base_anchors[i].reshape((1, A, 4)) +
+                   shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
+        if num_strides > 1:
+            # Transpose from (K, A, 4) to (A, K, 4)
+            # We will pack it with other strides to
+            # match the data format of (N, C, H, W)
+            anchors = anchors.transpose((1, 0, 2))
+            anchors = anchors.reshape((A * K, 4))
+            anchors_to_pack.append(anchors)
+        else:
+            # Original order of Faster R-CNN
+            return anchors.reshape((K * A, 4))
+    return np.vstack(anchors_to_pack)
+
+
+def map_returns_to_blobs(returns, blobs, keys):
+    """Map returns of image to blobs."""
+    for i, key in enumerate(keys):
+        blobs[key].append(returns[i])
+
+
+def map_rois_to_levels(rois, k_min, k_max):
+    """Map rois to fpn levels."""
+    if len(rois) == 0:
+        return []
+    ws = rois[:, 3] - rois[:, 1] + 1
+    hs = rois[:, 4] - rois[:, 2] + 1
+    s = np.sqrt(ws * hs)
+    s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
+    lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4
+    target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
+    return np.clip(target_levels, k_min, k_max)
+
+
+def map_blobs_to_outputs(blobs, defaults, lvl_inds):
+    """Map blobs to outputs according to fpn indices."""
+    outputs = collections.defaultdict(list)
+    for inds in lvl_inds:
+        for key, blob in blobs.items():
+            outputs[key].append(
+                blob[inds]
+                if len(inds) > 0
+                else defaults[key]
+            )
+    return outputs
--- a/lib/fpn/anchor_target_layer.py
+++ b/lib/fpn/anchor_target_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-
-import dragon.vm.torch as torch
-import numpy as np
-import numpy.random as npr
-
-from lib.core.config import cfg
-from lib.faster_rcnn.generate_anchors import generate_anchors
-from lib.utils import logger
-from lib.utils.blob import array2tensor
-from lib.utils.boxes import bbox_overlaps
-from lib.utils.boxes import bbox_transform
-from lib.utils.boxes import dismantle_gt_boxes
-
-
-class AnchorTargetLayer(torch.nn.Module):
-    """Assign anchors to ground-truth targets."""
-
-    def __init__(self):
-        super(AnchorTargetLayer, self).__init__()
-        # Load the basic configs
-        self.scales = cfg.RPN.SCALES
-        self.strides = cfg.RPN.STRIDES
-        self.ratios = cfg.RPN.ASPECT_RATIOS
-        if len(self.scales) != len(self.strides):
-            logger.fatal(
-                'Given {} scales and {} strides.'
-                .format(len(self.scales), len(self.strides))
-            )
-
-        # Allow boxes to sit over the edge by a small amount
-        self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
-
-        # Generate base anchors
-        self.base_anchors = []
-        for i in range(len(self.strides)):
-            base_size, scale = self.strides[i], self.scales[i]
-            if not isinstance(scale, collections.Iterable):
-                scale = [scale]
-            self.base_anchors.append(
-                generate_anchors(
-                    base_size=base_size,
-                    ratios=self.ratios,
-                    scales=np.array(scale),
-                )
-            )
-
-    def forward(self, features, gt_boxes, ims_info):
-        """Produces anchor classification labels and bounding-box regression targets."""
-        num_images = cfg.TRAIN.IMS_PER_BATCH
-        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
-
-        if len(gt_boxes_wide) != num_images:
-            logger.fatal(
-                'Input {} images, got {} slices of gt boxes.'
-                .format(num_images, len(gt_boxes_wide))
-            )
-
-        # Generate proposals from shifted anchors
-        all_anchors, total_anchors = [], 0
-        for i in range(len(self.strides)):
-            height, width = features[i].shape[-2:]
-            shift_x = np.arange(0, width) * self.strides[i]
-            shift_y = np.arange(0, height) * self.strides[i]
-            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
-            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
-                                shift_x.ravel(), shift_y.ravel())).transpose()
-            # Add A anchors (1, A, 4) to
-            # cell K shifts (K, 1, 4) to get
-            # shift anchors (K, A, 4)
-            # Reshape to (K * A, 4) shifted anchors
-            A = self.base_anchors[i].shape[0]
-            K = shifts.shape[0]
-            anchors = (self.base_anchors[i].reshape((1, A, 4)) +
-                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
-            # [K, A, 4] -> [A, K, 4]
-            anchors = anchors.transpose((1, 0, 2))
-            anchors = anchors.reshape((A * K, 4))
-            all_anchors.append(anchors)
-            total_anchors += anchors.shape[0]
-
-        all_anchors = np.vstack(all_anchors)
-
-        # label: 1 is positive, 0 is negative, -1 is don't care
-        labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
-        bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
-        bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
-        bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
-
-        for ix in range(num_images):
-            # GT boxes (x1, y1, x2, y2, label, has_mask)
-            gt_boxes = gt_boxes_wide[ix]
-            im_info = ims_info[ix]
-            if self._allowed_border >= 0:
-                # Only keep anchors inside the image
-                inds_inside = np.where(
-                    (all_anchors[:, 0] >= -self._allowed_border) &
-                    (all_anchors[:, 1] >= -self._allowed_border) &
-                    (all_anchors[:, 2] < im_info[1] + self._allowed_border) &
-                    (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
-                anchors = all_anchors[inds_inside, :]
-            else:
-                inds_inside = np.arange(all_anchors.shape[0])
-                anchors = all_anchors
-            num_inside = len(inds_inside)
-
-            # label: 1 is positive, 0 is negative, -1 is don't care
-            labels = np.empty((num_inside,), dtype=np.float32)
-            labels.fill(-1)
-
-            # Overlaps between the anchors and the gt boxes
-            overlaps = bbox_overlaps(anchors, gt_boxes)
-            argmax_overlaps = overlaps.argmax(axis=1)
-            max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
-
-            gt_argmax_overlaps = overlaps.argmax(axis=0)
-            gt_max_overlaps = overlaps[gt_argmax_overlaps,
-                                       np.arange(overlaps.shape[1])]
-            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
-
-            # fg label: for each gt, anchor with highest overlap
-            labels[gt_argmax_overlaps] = 1
-
-            # fg label: above threshold IOU
-            labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
-
-            # bg label: below threshold IOU
-            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
-
-            # Subsample positive labels if we have too many
-            num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
-            fg_inds = np.where(labels == 1)[0]
-            if len(fg_inds) > num_fg:
-                disable_inds = npr.choice(
-                    fg_inds, size=(len(fg_inds) - num_fg), replace=False)
-                labels[disable_inds] = -1
-                fg_inds = np.where(labels == 1)[0]
-
-            # Subsample negative labels if we have too many
-            num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
-            bg_inds = np.where(labels == 0)[0]
-            if len(bg_inds) > num_bg:
-                disable_inds = npr.choice(
-                    bg_inds, size=(len(bg_inds) - num_bg), replace=False)
-                labels[disable_inds] = -1
-
-            bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_targets[fg_inds, :] = bbox_transform(
-                anchors[fg_inds, :],
-                gt_boxes[argmax_overlaps[fg_inds], :4],
-            )
-            bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
-            bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
-            bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
-
-            labels_wide[ix, inds_inside] = labels  # label
-            bbox_targets_wide[ix, inds_inside] = bbox_targets
-            bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
-            bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
-
-        labels = labels_wide.reshape((num_images, total_anchors))
-        bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
-        bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
-        bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
-
-        return {
-            'labels': array2tensor(labels),
-            'bbox_targets': array2tensor(bbox_targets),
-            'bbox_inside_weights': array2tensor(bbox_inside_weights),
-            'bbox_outside_weights': array2tensor(bbox_outside_weights),
-        }
--- a/lib/fpn/proposal_layer.py
+++ b/lib/fpn/proposal_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import dragon.vm.torch as torch
-import numpy as np
-
-from lib.core.config import cfg
-from lib.faster_rcnn.generate_anchors import generate_anchors
-from lib.nms import nms_wrapper
-from lib.utils import logger
-from lib.utils.blob import array2tensor
-from lib.utils.boxes import bbox_transform_inv
-from lib.utils.boxes import clip_tiled_boxes
-from lib.utils.boxes import filter_boxes
-
-
-class ProposalLayer(torch.nn.Module):
-    """Compute proposals by applying transformations anchors."""
-
-    def __init__(self):
-        super(ProposalLayer, self).__init__()
-        # Load the basic configs
-        self.scales = cfg.RPN.SCALES
-        self.strides = cfg.RPN.STRIDES
-        self.ratios = cfg.RPN.ASPECT_RATIOS
-        if len(self.scales) != len(self.strides):
-            logger.fatal(
-                'Given {} scales and {} strides.'
-                .format(len(self.scales), len(self.strides))
-            )
-
-        # Generate base anchors
-        self.base_anchors = []
-        for i in range(len(self.strides)):
-            base_size, scale = self.strides[i], self.scales[i]
-            if not isinstance(scale, collections.Iterable):
-                scale = [scale]
-            self.base_anchors.append(
-                generate_anchors(
-                    base_size=base_size,
-                    ratios=self.ratios,
-                    scales=np.array(scale),
-                )
-            )
-
-    def generate_grid_anchors(self, features):
-        # Generate proposals from shifted anchors
-        anchors_wide = []
-        for i in range(len(self.strides)):
-            height, width = features[i].shape[-2:]
-            shift_x = np.arange(0, width) * self.strides[i]
-            shift_y = np.arange(0, height) * self.strides[i]
-            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
-            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
-                                shift_x.ravel(), shift_y.ravel())).transpose()
-            # Add A anchors (1, A, 4) to
-            # cell K shifts (K, 1, 4) to get
-            # shift anchors (K, A, 4)
-            # Reshape to (K * A, 4) shifted anchors
-            A = self.base_anchors[i].shape[0]
-            K = shifts.shape[0]
-            anchors = (self.base_anchors[i].reshape((1, A, 4)) +
-                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
-            # [K, A, 4] -> [A, K, 4]
-            anchors = anchors.transpose((1, 0, 2))
-            anchors = anchors.reshape((A * K, 4))
-            anchors_wide.append(anchors)
-        return np.vstack(anchors_wide)
-
-    def forward(self, features, cls_prob, bbox_pred, ims_info):
-        cfg_key = 'TRAIN' if self.training else 'TEST'
-        pre_nms_top_n = cfg[cfg_key].RPN_PRE_NMS_TOP_N
-        post_nms_top_n = cfg[cfg_key].RPN_POST_NMS_TOP_N
-        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
-        min_size = cfg[cfg_key].RPN_MIN_SIZE
-
-        # Get resources
-        num_images = ims_info.shape[0]
-        all_anchors = self.generate_grid_anchors(features)  # [n, 4]
-
-        if cls_prob.shape[0] != num_images or \
-                bbox_pred.shape[0] != num_images:
-            logger.fatal('Incorrect num of images: {}'.format(num_images))
-
-        # Prepare for the outputs
-        batch_rois = []
-        batch_scores = cls_prob.numpy(True)
-        batch_deltas = bbox_pred.numpy(True) \
-            .transpose((0, 2, 1))  # [?, 4, n] -> [?, n, 4]
-
-        # Extract RoIs separately
-        for ix in range(num_images):
-            scores = batch_scores[ix].reshape((-1, 1))  # [1, n] -> [n, 1]
-            deltas = batch_deltas[ix]  # [n, 4]
-
-            if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
-                order = np.argsort(-scores.squeeze())
-            else:
-                # Avoid sorting possibly large arrays; First partition to get top K
-                # unsorted and then sort just those (~20x faster for 200k scores)
-                inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
-                order = np.argsort(-scores[inds].squeeze())
-                order = inds[order]
-
-            deltas = deltas[order]
-            anchors = all_anchors[order]
-            scores = scores[order]
-
-            # 1. Convert anchors into proposals via bbox transformations
-            proposals = bbox_transform_inv(anchors, deltas)
-
-            # 2. Clip predicted boxes to image
-            proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
-
-            # 3. remove predicted boxes with either height or width < threshold
-            keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
-            proposals = proposals[keep, :]
-            scores = scores[keep]
-
-            # 6. Apply nms (e.g. threshold = 0.7)
-            # 7. Take after_nms_topN (e.g. 300)
-            # 8. Return the top proposals (-> RoIs top)
-            keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
-            if post_nms_top_n > 0:
-                keep = keep[:post_nms_top_n]
-            proposals = proposals[keep, :]
-
-            # Output rois blob
-            batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
-            batch_inds.fill(ix)
-            rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
-            batch_rois.append(rpn_rois)
-
-        # Merge RoIs into a blob
-        rpn_rois = np.concatenate(batch_rois, axis=0)
-
-        if cfg_key == 'TRAIN':
-            return rpn_rois
-        else:
-            # Distribute rois into K levels
-            min_level = cfg.FPN.ROI_MIN_LEVEL
-            max_level = cfg.FPN.ROI_MAX_LEVEL
-            k = max_level - min_level + 1
-            fpn_levels = _map_rois_to_fpn_levels(rpn_rois, min_level, max_level)
-            all_rois = []
-            for i in range(k):
-                lv_indices = np.where(fpn_levels == (i + min_level))[0]
-                if len(lv_indices) == 0:
-                    # Fake a tiny roi to avoid empty roi pooling
-                    all_rois.append(array2tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
-                else:
-                    all_rois.append(array2tensor(rpn_rois[lv_indices]))
-            return all_rois
-
-
-def _map_rois_to_fpn_levels(rois, k_min, k_max):
-    """
-    Determine which FPN level each RoI in a set of RoIs
-    should map to based on the heuristic in the FPN paper.
-    """
-    if len(rois) == 0:
-        return []
-    ws = rois[:, 3] - rois[:, 1] + 1
-    hs = rois[:, 4] - rois[:, 2] + 1
-    s = np.sqrt(ws * hs)
-    s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
-    lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4
-    target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
-    return np.clip(target_levels, k_min, k_max)
--- a/lib/fpn/proposal_target_layer.py
+++ b/lib/fpn/proposal_target_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import dragon.vm.torch as torch
-import numpy as np
-import numpy.random as npr
-
-from lib.core.config import cfg
-from lib.utils.blob import array2tensor
-from lib.utils.boxes import bbox_overlaps
-from lib.utils.boxes import bbox_transform
-from lib.utils.boxes import dismantle_gt_boxes
-
-
-class ProposalTargetLayer(torch.nn.Module):
-    """Assign object detection proposals to ground-truth targets.
-
-    Produces proposal classification labels and bounding-box regression targets.
-
-    """
-    def __init__(self):
-        super(ProposalTargetLayer, self).__init__()
-        self.num_classes = cfg.MODEL.NUM_CLASSES
-        self.fake_outputs = {
-            'rois': np.array([[0, 0, 0, 1, 1]], dtype=np.float32),
-            'labels': np.array([-1], dtype=np.float32),
-            'bbox_targets': np.zeros((1, self.num_classes * 4), dtype=np.float32),
-            'bbox_inside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
-            'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
-        }
-
-    def forward(self, rpn_rois, gt_boxes):
-        num_images = cfg.TRAIN.IMS_PER_BATCH
-        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
-        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
-        all_rois = rpn_rois
-        # GT boxes (x1, y1, x2, y2, label)
-        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
-
-        # Prepare for the outputs
-        keys = ['labels', 'rois', 'bbox_targets',
-                'bbox_inside_weights', 'bbox_outside_weights']
-        outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
-        batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
-
-        # Generate targets separately
-        for ix in range(num_images):
-            gt_boxes = gt_boxes_wide[ix]
-            # Extract proposals for this image
-            rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
-            # Include ground-truth boxes in the set of candidate rois
-            inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
-            rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
-            # Sample a batch of rois for training
-            rois_per_image = cfg.TRAIN.BATCH_SIZE
-            fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
-            labels, rois, bbox_targets, bbox_inside_weights = \
-                _sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
-            bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
-
-            _fmap_batch([
-                labels,
-                rois,
-                bbox_targets,
-                bbox_inside_weights,
-                bbox_outside_weights],
-                batch_outputs,
-                keys,
-            )
-
-        # Merge targets into blobs
-        for k, v in batch_outputs.items():
-            batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
-
-        # Distribute rois into K levels
-        min_level = cfg.FPN.ROI_MIN_LEVEL
-        max_level = cfg.FPN.ROI_MAX_LEVEL
-        k = max_level - min_level + 1
-        fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
-        lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(k)]
-        _fmap_rois(
-            inputs=[batch_outputs[key] for key in keys],
-            fake_outputs=self.fake_outputs,
-            outputs=outputs,
-            keys=keys,
-            levels=lvs_indices,
-        )
-
-        return {
-            'rois': [array2tensor(outputs['rois'][i]) for i in range(k)],
-            'labels': array2tensor(np.concatenate(outputs['labels'], axis=0)),
-            'bbox_targets': array2tensor(np.vstack(outputs['bbox_targets'])),
-            'bbox_inside_weights': array2tensor(np.vstack(outputs['bbox_inside_weights'])),
-            'bbox_outside_weights': array2tensor(np.vstack(outputs['bbox_outside_weights'])),
-        }
-
-
-def _get_bbox_regression_labels(bbox_target_data, num_classes):
-    """Bounding-box regression targets (bbox_target_data) are stored in a
-    compact form N x (class, tx, ty, tw, th)
-
-    This function expands those targets into the 4-of-4*K representation used
-    by the network (i.e. only one class has non-zero targets).
-
-    Returns:
-        bbox_target (ndarray): N x 4K blob of regression targets
-        bbox_inside_weights (ndarray): N x 4K blob of loss weights
-
-    """
-    clss = bbox_target_data[:, 0]
-    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
-    bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
-    inds = np.where(clss > 0)[0]
-    for ind in inds:
-        cls = clss[ind]
-        start = 4 * cls
-        end = start + 4
-        bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
-        bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
-    return bbox_targets, bbox_inside_weights
-
-
-def _compute_targets(ex_rois, gt_rois, labels):
-    """Compute bounding-box regression targets for an image."""
-    assert ex_rois.shape[0] == gt_rois.shape[0]
-    assert ex_rois.shape[1] == 4
-    assert gt_rois.shape[1] == 4
-    targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
-    return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
-
-
-def _map_rois_to_fpn_levels(rois, k_min, k_max):
-    """
-    Determine which FPN level each RoI in a set of RoIs
-    should map to based on the heuristic in the FPN paper.
-    """
-    if len(rois) == 0:
-        return []
-    ws = rois[:, 3] - rois[:, 1] + 1
-    hs = rois[:, 4] - rois[:, 2] + 1
-    s = np.sqrt(ws * hs)
-    s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
-    lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4
-    target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
-    return np.clip(target_levels, k_min, k_max)
-
-
-def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
-    """Sample a batch of RoIs comprising foreground and background examples."""
-    # overlaps: (rois x gt_boxes)
-    overlaps = bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
-    gt_assignment = overlaps.argmax(axis=1)
-    max_overlaps = overlaps.max(axis=1)
-    labels = gt_boxes[gt_assignment, 4]
-
-    # Select foreground RoIs as those with >= FG_THRESH overlap
-    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
-    # Guard against the case when an image has fewer than fg_rois_per_image
-    # foreground RoIs
-
-    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
-    # Sample foreground regions without replacement
-    if fg_inds.size > 0:
-        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
-
-    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
-    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
-                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
-    # Compute number of background RoIs to take from this image (guarding
-    # against there being fewer than desired)
-    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
-    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
-    # Sample background regions without replacement
-    if bg_inds.size > 0:
-        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
-
-    # The indices that we're selecting (both fg and bg)
-    keep_inds = np.append(fg_inds, bg_inds)
-    # Select sampled values from various arrays:
-    labels = labels[keep_inds]
-    # Clamp labels for the background RoIs to 0
-    labels[fg_rois_per_this_image:] = 0
-    rois = all_rois[keep_inds]
-
-    bbox_target_data = _compute_targets(
-        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
-
-    bbox_targets, bbox_inside_weights = \
-        _get_bbox_regression_labels(bbox_target_data, num_classes)
-
-    return labels, rois, bbox_targets, bbox_inside_weights
-
-
-def _fmap_batch(inputs, outputs, keys):
-    for i, key in enumerate(keys):
-        outputs[key].append(inputs[i])
-
-
-def _fmap_rois(inputs, fake_outputs, outputs, keys, levels):
-    def impl(a, b, indices):
-        return a[indices] if len(indices) > 0 else b
-    for k in range(len(levels)):
-        inds = levels[k]
-        for i, key in enumerate(keys):
-            outputs[key].append(impl(inputs[i], fake_outputs[key], inds))
--- a/lib/fpn/__init__.py
+++ b/lib/fpn/__init__.py
@@ -13,6 +13,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from lib.fpn.anchor_target_layer import AnchorTargetLayer
-from lib.fpn.proposal_layer import ProposalLayer
-from lib.fpn.proposal_target_layer import ProposalTargetLayer
+from lib.faster_rcnn.anchor_target import AnchorTarget
+from lib.faster_rcnn.proposal import Proposal
+from lib.mask_rcnn.data_loader import DataLoader
+from lib.mask_rcnn.proposal_target import ProposalTarget
--- a/lib/mask_rcnn/data_loader.py
+++ b/lib/mask_rcnn/data_loader.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import multiprocessing as mp
+import time
+
+import dragon
+import dragon.vm.torch as torch
+import numpy as np
+
+from lib.core.config import cfg
+from lib.mask_rcnn.data_transformer import DataTransformer
+from lib.datasets.factory import get_imdb
+from lib.utils import logger
+from lib.utils.blob import im_list_to_blob
+from lib.utils.blob import mask_list_to_blob
+
+
+class DataLoader(object):
+    """Provide mini-batches of data."""
+
+    def __init__(self):
+        super(DataLoader, self).__init__()
+        database = get_imdb(cfg.TRAIN.DATABASE)
+        self.data_batch = DataBatch(**{
+            'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
+            'classes': database.classes,
+            'shuffle': cfg.TRAIN.USE_SHUFFLE,
+            'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
+            'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
+            'num_transformers': cfg.TRAIN.NUM_WORKERS,
+        })
+
+    def __call__(self):
+        outputs = self.data_batch.get()
+        outputs['data'] = torch.from_numpy(outputs['data'])
+        return outputs
+
+
+class DataBatch(mp.Process):
+    """Prefetch the batch of data."""
+
+    def __init__(self, **kwargs):
+        """Construct a ``DataBatch``.
+
+        Parameters
+        ----------
+        dataset : lambda
+            The creator of a dataset.
+        classes : Sequence[str]
+            The class names.
+        shuffle : bool, optional, default=False
+            Whether to shuffle the data.
+        num_chunks : int, optional, default=0
+            The number of chunks to split.
+        batch_size : int, optional, default=2
+            The size of a mini-batch.
+        num_transformers : int, optional, default=3
+            The number of workers to transform data.
+
+        """
+        super(DataBatch, self).__init__()
+        # Distributed settings
+        rank, group_size = 0, 1
+        process_group = dragon.distributed.get_group()
+        if process_group is not None and kwargs.get(
+                'phase', 'TRAIN') == 'TRAIN':
+            group_size = process_group.size
+            rank = dragon.distributed.get_rank(process_group)
+        kwargs['group_size'] = group_size
+
+        # Configuration
+        self._prefetch = kwargs.get('prefetch', 5)
+        self._batch_size = kwargs.get('batch_size', 2)
+        self._num_readers = kwargs.get('num_readers', 1)
+        self._num_transformers = kwargs.get('num_transformers', 3)
+        self._num_fetchers = kwargs.get('num_fetchers', 1)
+        self.daemon = True
+
+        # Initialize queues
+        num_batches = self._prefetch * self._num_readers
+        self.Q1 = mp.Queue(num_batches * self._batch_size)
+        self.Q21 = mp.Queue(num_batches * self._batch_size)
+        self.Q22 = mp.Queue(num_batches * self._batch_size)
+        self.Q3 = mp.Queue(num_batches)
+
+        # Initialize readers
+        self._readers = []
+        for i in range(self._num_readers):
+            part_idx, num_parts = i, self._num_readers
+            num_parts *= group_size
+            part_idx += rank * self._num_readers
+            self._readers.append(dragon.io.DataReader(
+                num_parts=num_parts, part_idx=part_idx, **kwargs))
+            self._readers[i]._seed += part_idx
+            self._readers[i].q_out = self.Q1
+            self._readers[i].start()
+            time.sleep(0.1)
+
+        # Initialize transformers
+        self._transformers = []
+        for i in range(self._num_transformers):
+            transformer = DataTransformer(**kwargs)
+            transformer._seed += (i + rank * self._num_transformers)
+            transformer.q_in = self.Q1
+            transformer.q1_out, transformer.q2_out = self.Q21, self.Q22
+            transformer.start()
+            self._transformers.append(transformer)
+            time.sleep(0.1)
+
+        # Initialize batch-producer
+        self.start()
+
+        # Register cleanup callbacks
+        def cleanup():
+            def terminate(processes):
+                for process in processes:
+                    process.terminate()
+                    process.join()
+            terminate([self])
+            logger.info('Terminate DataBatch.')
+            terminate(self._transformers)
+            logger.info('Terminate DataTransformer.')
+            terminate(self._readers)
+            logger.info('Terminate DataReader.')
+
+        import atexit
+        atexit.register(cleanup)
+
+    def get(self):
+        """Get a batch.
+
+        Returns
+        -------
+        dict
+            The batch dict.
+
+        """
+        return self.Q3.get()
+
+    def run(self):
+        """Start the process to produce batches."""
+        def produce(q_in):
+            processed_ims, ims_info = [], []
+            packed_boxes, packed_masks = [], []
+            for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
+                im, im_scale, gt_boxes, gt_masks = q_in.get()
+                processed_ims.append(im)
+                ims_info.append(list(im.shape[:2]) + [im_scale])
+                im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
+                im_boxes[:, :gt_boxes.shape[1]], im_boxes[:, -1] = gt_boxes, image_index
+                packed_boxes.append(im_boxes)
+                packed_masks.append(gt_masks)
+            return {
+                'data': im_list_to_blob(processed_ims),
+                'ims_info': np.array(ims_info, 'float32'),
+                'gt_boxes': np.concatenate(packed_boxes, 0),
+                'gt_masks': mask_list_to_blob(packed_masks),
+            }
+
+        # Two queues to implement aspect-grouping
+        # This is necessary to reduce the gpu memory
+        # from fetching a huge square batch blob
+        q1, q2 = self.Q21, self.Q22
+
+        # Main prefetch loop
+        while True:
+            if q1.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
+                self.Q3.put(produce(q1))
+            elif q2.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
+                self.Q3.put(produce(q2))
+            q1, q2 = q2, q1  # Uniform sampling trick
--- a/lib/mask_rcnn/data_transformer.py
+++ b/lib/mask_rcnn/data_transformer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import multiprocessing
+
+import numpy as np
+
+from lib.core.config import cfg
+from lib.datasets.example import Example
+from lib.pycocotools import mask_utils
+from lib.utils import boxes as box_util
+from lib.utils.blob import prep_im_for_blob
+from lib.utils.image import get_image_with_target_size
+
+
+class DataTransformer(multiprocessing.Process):
+    def __init__(self, **kwargs):
+        super(DataTransformer, self).__init__()
+        self._seed = cfg.RNG_SEED
+        self._use_flipped = cfg.TRAIN.USE_FLIPPED
+        self._use_diff = cfg.TRAIN.USE_DIFF
+        self._classes = kwargs.get('classes', ('__background__',))
+        self._num_classes = len(self._classes)
+        self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
+        self.q_in = self.q1_out = self.q2_out = None
+        self.daemon = True
+
+    def make_roi_dict(self, example, im_scale, apply_flip=False):
+        objects, n_objects = example.objects, 0
+        height, width = example.height, example.width
+        if not self._use_diff:
+            for obj in objects:
+                if obj.get('difficult', 0) == 0:
+                    n_objects += 1
+        else:
+            n_objects = len(objects)
+
+        roi_dict = {
+            'boxes': np.zeros((n_objects, 4), 'float32'),
+            'masks': np.empty((n_objects, height, width), 'uint8'),
+            'gt_classes': np.zeros((n_objects, 1), 'int32'),
+            'mask_flags': np.ones((n_objects, 1), 'float32'),
+        }
+
+        # Filter the difficult instances
+        object_idx = 0
+        for obj in objects:
+            if not self._use_diff and \
+                    obj.get('difficult', 0) > 0:
+                continue
+            bbox, mask = obj['bbox'], obj['mask']
+            roi_dict['boxes'][object_idx, :] = [
+                max(0, bbox[0]),
+                max(0, bbox[1]),
+                min(bbox[2], width - 1),
+                min(bbox[3], height - 1),
+            ]
+            if mask is not None:
+                roi_dict['masks'][object_idx] = (
+                    mask_utils.bytes2img(
+                        obj['mask'],
+                        height,
+                        width,
+                    ))
+            else:
+                roi_dict['mask_flags'][object_idx] = 0.
+            roi_dict['gt_classes'][object_idx] = \
+                self._class_to_ind[obj['name']]
+            object_idx += 1
+
+        # Flip the boxes if necessary
+        if apply_flip:
+            roi_dict['boxes'] = \
+                box_util.flip_boxes(
+                    roi_dict['boxes'],
+                    width,
+                )
+
+        # Scale the boxes to the detecting scale
+        roi_dict['boxes'] *= im_scale
+
+        return roi_dict
+
+    def get(self, example):
+        example = Example(example)
+        img = example.image
+
+        # Scale
+        max_size = cfg.TRAIN.MAX_SIZE
+        target_size = cfg.TRAIN.SCALES[np.random.randint(len(cfg.TRAIN.SCALES))]
+        img, im_scale, jitter = prep_im_for_blob(img, target_size, max_size)
+
+        # Flip
+        apply_flip = False
+        if self._use_flipped:
+            if np.random.randint(2) > 0:
+                img = img[:, ::-1]
+                apply_flip = True
+
+        # Example -> RoIDict
+        roi_dict = self.make_roi_dict(example, im_scale, apply_flip)
+
+        # Post-Process for gt boxes
+        # Shape like: [num_objects, {x1, y1, x2, y2, cls, flag}]
+        gt_boxes = \
+            np.concatenate([
+                roi_dict['boxes'],
+                roi_dict['gt_classes'],
+                roi_dict['mask_flags']
+            ], axis=1)
+
+        # Post-Process for gt masks
+        # Shape like: [num_objects, im_h, im_w]
+        if gt_boxes.shape[0] > 0:
+            gt_masks = roi_dict['masks']
+            if apply_flip:
+                gt_masks = gt_masks[:, :, ::-1]
+        else:
+            gt_masks = None
+
+        return img, im_scale, gt_boxes, gt_masks
+
+    def run(self):
+        # Fix the process-local random seed
+        np.random.seed(self._seed)
+
+        # Main prefetch loop
+        while True:
+            outputs = self.get(self.q_in.get())
+            if len(outputs[2]) < 1:
+                continue  # Ignore the non-object image
+            aspect_ratio = float(outputs[0].shape[0]) / outputs[0].shape[1]
+            if aspect_ratio > 1.:
+                self.q1_out.put(outputs)
+            else:
+                self.q2_out.put(outputs)
--- a/lib/mask_rcnn/proposal_target.py
+++ b/lib/mask_rcnn/proposal_target.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+import numpy as np
+import numpy.random as npr
+
+from lib.core.config import cfg
+from lib.faster_rcnn.utils import map_blobs_to_outputs
+from lib.faster_rcnn.utils import map_returns_to_blobs
+from lib.faster_rcnn.utils import map_rois_to_levels
+from lib.utils import boxes as box_util
+from lib.utils import mask as mask_util
+from lib.utils.framework import new_tensor
+
+
+class ProposalTarget(object):
+    """Assign proposals to ground-truth targets."""
+
+    def __init__(self):
+        super(ProposalTarget, self).__init__()
+        self.resolution = cfg.MRCNN.RESOLUTION
+        self.num_classes = cfg.MODEL.NUM_CLASSES
+        self.defaults = collections.OrderedDict([
+            ('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
+            ('labels', np.array([-1], 'float32')),
+            ('bbox_targets', np.zeros((1, self.num_classes * 4), 'float32')),
+            ('bbox_inside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
+            ('bbox_outside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
+            ('mask_targets', -np.ones((1, self.resolution, self.resolution), 'float32')),
+        ])
+
+    def __call__(self, rpn_rois, gt_boxes, gt_masks, ims_info):
+        num_images = cfg.TRAIN.IMS_PER_BATCH
+        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
+        all_rois = rpn_rois
+        # GT boxes (x1, y1, x2, y2, label)
+        # GT masks (num_objects, im_h, im_w)
+        gt_boxes_wide, gt_masks_wide = \
+            mask_util.dismantle_masks(
+                gt_boxes,
+                gt_masks,
+                num_images,
+            )
+
+        # Prepare for the outputs
+        keys = self.defaults.keys()
+        blobs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
+
+        # Generate targets separately
+        for ix in range(num_images):
+            gt_boxes = gt_boxes_wide[ix]
+            gt_masks = gt_masks_wide[ix]
+            # Extract proposals for this image
+            rois = all_rois[np.where(all_rois[:, 0].astype('int32') == ix)[0]]
+            # Include ground-truth boxes in the set of candidate rois
+            inds = np.ones((gt_boxes.shape[0], 1), gt_boxes.dtype) * ix
+            rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, :4]))))
+            # Sample a batch of RoIs for training
+            rois_per_image = cfg.TRAIN.BATCH_SIZE
+            fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
+            map_returns_to_blobs(
+                sample_rois(
+                    rois,
+                    gt_boxes,
+                    gt_masks,
+                    rois_per_image,
+                    fg_rois_per_image,
+                    self.num_classes,
+                    ims_info[ix][2],
+                ), blobs, keys,
+            )
+
+        # Stack into continuous blobs
+        for k, v in blobs.items():
+            blobs[k] = np.concatenate(blobs[k], 0)
+
+        # Distribute rois into pyramids
+        k_min = cfg.FPN.ROI_MIN_LEVEL
+        k_max = cfg.FPN.ROI_MAX_LEVEL
+        k = k_max - k_min + 1
+        levels = map_rois_to_levels(blobs['rois'], k_min, k_max)
+        outputs = \
+            map_blobs_to_outputs(
+                blobs,
+                self.defaults,
+                [np.where(levels == (i + k_min))[0] for i in range(k)],
+            )
+
+        # Select the foreground RoIs only for mask branch
+        for i in range(k):
+            inds = np.where(outputs['labels'][i] > 0)[0]
+            inds = inds if len(inds) > 0 else np.array([0], 'int64')
+            outputs['mask_rois'].append(outputs['rois'][i][inds])
+            outputs['mask_targets'][i] = outputs['mask_targets'][i][inds]
+            outputs['mask_labels'].append(outputs['labels'][i][inds].astype('int64') - 1)
+
+        # Use the sparse indices to select logits
+        # Reduce the overhead on feeding dense class-specific targets
+        mask_labels = np.concatenate(outputs['mask_labels'], 0)
+        mask_indices = np.arange(len(mask_labels)) * (self.num_classes - 1)
+
+        return {
+            'rois': [new_tensor(outputs['rois'][i]) for i in range(k)],
+            'labels': new_tensor(np.concatenate(outputs['labels'], 0)),
+            'bbox_targets': new_tensor(np.vstack(outputs['bbox_targets'])),
+            'bbox_inside_weights': new_tensor(np.vstack(outputs['bbox_inside_weights'])),
+            'bbox_outside_weights': new_tensor(np.vstack(outputs['bbox_outside_weights'])),
+            'mask_rois': [new_tensor(outputs['mask_rois'][i]) for i in range(k)],
+            'mask_targets': new_tensor(np.vstack(outputs['mask_targets'])),
+            'mask_indices': new_tensor(mask_indices + mask_labels),
+        }
+
+
+def get_targets(
+    ex_rois,
+    gt_rois,
+    gt_labels,
+    gt_masks,
+    mask_flags,
+    mask_size,
+    num_classes,
+    im_scale,
+):
+    """Compute the bounding-box regression targets."""
+    assert ex_rois.shape[0] == gt_rois.shape[0]
+    assert ex_rois.shape[1] == 4
+    assert gt_rois.shape[1] == 4
+    # Compute bbox regression targets
+    fg_inds = np.where(gt_labels > 0)[0]
+    targets = box_util.bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
+    bbox_targets = np.zeros((ex_rois.shape[0], 4 * num_classes), 'float32')
+    inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
+    for i in fg_inds:
+        start = int(4 * gt_labels[i])
+        bbox_targets[i, start:start + 4] = targets[i]
+        inside_weights[i, start:start + 4] = (1., 1., 1., 1.)
+    outside_weights = np.array(inside_weights > 0).astype('float32')
+    # Compute mask classification targets
+    mask_shape = [mask_size] * 2
+    ex_rois_ori = np.round(ex_rois / im_scale).astype(int)
+    gt_rois_ori = np.round(gt_rois / im_scale).astype(int)
+    mask_targets = -np.ones([len(gt_labels)] + mask_shape, 'float32')
+    for i in fg_inds:
+        if mask_flags[i] > 0:
+            box_mask = \
+                mask_util.intersect_box_mask(
+                    ex_rois_ori[i],
+                    gt_rois_ori[i],
+                    gt_masks[i],
+                )
+            if box_mask is not None:
+                mask_targets[i] = \
+                    mask_util.resize_mask(
+                        mask=box_mask,
+                        size=mask_shape,
+                    )
+    return bbox_targets, inside_weights, outside_weights, mask_targets
+
+
+def sample_rois(
+    all_rois,
+    gt_boxes,
+    gt_masks,
+    num_rois,
+    num_fg_rois,
+    num_classes,
+    im_scale,
+):
+    """Sample a batch of RoIs comprising foreground and background examples."""
+    overlaps = box_util.bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
+    gt_assignment = overlaps.argmax(axis=1)
+    max_overlaps = overlaps.max(axis=1)
+    labels = gt_boxes[gt_assignment, 4]
+
+    # Select foreground RoIs as those with >= FG_THRESH overlap
+    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
+    fg_rois_per_this_image = int(min(num_fg_rois, fg_inds.size))
+    # Sample foreground regions without replacement
+    if fg_inds.size > 0:
+        fg_inds = npr.choice(fg_inds, fg_rois_per_this_image, False)
+
+    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
+    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
+                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
+    # Compute number of background RoIs to take from this image
+    bg_rois_per_this_image = num_rois - fg_rois_per_this_image
+    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
+    # Sample background regions without replacement
+    if bg_inds.size > 0:
+        bg_inds = npr.choice(bg_inds, bg_rois_per_this_image, False)
+
+    # The indices that we're selecting (both fg and bg)
+    keep_inds = np.append(fg_inds, bg_inds)
+    # Select sampled values from various arrays
+    rois, labels = all_rois[keep_inds], labels[keep_inds]
+    # Clamp labels for the background RoIs to 0
+    labels[fg_rois_per_this_image:] = 0
+    # Clamp the image indices for the background RoIs to -1
+    rois[fg_rois_per_this_image:][0] = -1
+
+    # Compute the target from RoIs
+    outputs = [rois, labels]
+    outputs += get_targets(
+        rois[:, 1:5],
+        gt_boxes[gt_assignment[keep_inds], :4],
+        labels,
+        gt_masks[gt_assignment[fg_inds]],
+        gt_boxes[gt_assignment[fg_inds], 5],
+        cfg.MRCNN.RESOLUTION,
+        num_classes,
+        im_scale,
+    )
+    return outputs
--- a/lib/mask_rcnn/test.py
+++ b/lib/mask_rcnn/test.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dragon.vm.torch as torch
+import numpy as np
+
+from lib.core.config import cfg
+from lib.faster_rcnn import map_rois_to_levels
+from lib.faster_rcnn import map_blobs_to_outputs
+from lib.modeling.detector import new_detector
+from lib.nms import nms_wrapper
+from lib.utils import framework
+from lib.utils import time_util
+from lib.utils import boxes as box_util
+from lib.utils.blob import im_list_to_blob
+from lib.utils.image import scale_image
+
+
+def im_detect(detector, raw_image):
+    """Detect a image, with single or multiple scales."""
+    ims, ims_scale = scale_image(raw_image)
+
+    # Prepare blobs
+    blobs = {'data': im_list_to_blob(ims)}
+    blobs['ims_info'] = np.array([
+        list(blobs['data'].shape[1:3]) + [im_scale]
+        for im_scale in ims_scale
+    ], dtype=np.float32)
+
+    # Do Forward
+    if not hasattr(detector, 'graph'):
+        with framework.new_workspace().as_default():
+            data = torch.from_numpy(blobs['data'])
+            ims_info = torch.from_numpy(blobs['ims_info'])
+            with torch.no_grad():
+                with torch.jit.Tracer(retain_ops=True):
+                    inputs = {'data': data, 'ims_info': ims_info}
+                    outputs = detector.forward(inputs)
+                    detector.graph = \
+                        framework.Graph(inputs, {
+                            'rois': outputs['rois'],
+                            'cls_prob': outputs['cls_prob'],
+                            'bbox_pred': outputs['bbox_pred']
+                        })
+    outputs = detector.graph(**blobs)
+
+    # Decode results
+    rois = outputs['rois']
+    scores, boxes, batch_inds = [], [], []
+    pred_boxes = \
+        box_util.bbox_transform_inv(
+            rois[:, 1:5],
+            outputs['bbox_pred'],
+            cfg.BBOX_REG_WEIGHTS,
+        )
+
+    for i in range(len(ims)):
+        inds = np.where(rois[:, 0].astype(np.int32) == i)[0]
+        im_boxes = pred_boxes[inds] / ims_scale[i]
+        scores.append(outputs['cls_prob'][inds])
+        boxes.append(box_util.clip_tiled_boxes(im_boxes, raw_image.shape))
+        batch_inds.append(np.ones((len(inds), 1), 'int32') * i)
+
+    return (
+        np.vstack(scores) if len(ims) > 0 else scores[0],
+        np.vstack(boxes) if len(ims) > 0 else boxes[0],
+        np.vstack(batch_inds) if len(ims) > 0 else batch_inds[0],
+        np.array(ims_scale, 'float64'),
+    )
+
+
+def mask_detect(detector, rois):
+    k_min = cfg.FPN.ROI_MIN_LEVEL
+    k_max = cfg.FPN.ROI_MAX_LEVEL
+    k = k_max - k_min + 1
+    levels = map_rois_to_levels(rois, k_min, k_max)
+    level_inds = [np.where(levels == (i + k_min))[0] for i in range(k)]
+    fpn_rois = map_blobs_to_outputs(
+        {'rois': rois[:, :5]},
+        {'rois': np.array([[-1, 0, 0, 1, 1]], 'float32')},
+        level_inds)['rois']
+    workspace = detector.graph.workspace
+    placeholders = detector.graph.placeholders
+    score_fn = detector.rcnn.compute_mask_score
+    with workspace.as_default():
+        if 'rois' not in placeholders:
+            placeholders['rois'] = \
+                [framework.new_placeholder(cfg.GPU_ID) for _ in range(k)]
+            placeholders['mask_inds'] = \
+                framework.new_placeholder(cfg.GPU_ID)
+        for i, v in enumerate(fpn_rois):
+            framework.feed_tensor(placeholders['rois'][i], v.astype('float32'))
+        with torch.no_grad():
+            mask_score = score_fn(rois=placeholders['rois'])
+        nc, i = mask_score.shape[1], 0
+        mask_inds = {}
+        for inds in level_inds:
+            for idx in inds:
+                cls = int(rois[idx, 5])
+                mask_inds[idx] = (i * nc + cls)
+                i += 1
+            if len(inds) == 0:
+                i += 1
+        mask_inds = list(map(mask_inds.get, sorted(mask_inds)))
+        framework.feed_tensor(
+            placeholders['mask_inds'],
+            np.array(mask_inds, 'int64'),
+        )
+        with torch.no_grad():
+            mask_pred = mask_score.index_select(
+                (0, 1), placeholders['mask_inds'])
+            return detector.rcnn.sigmoid(mask_pred).numpy(True).copy()
+
+
+def test_net(weights, num_classes, q_in, q_out, device):
+    num_classes, cfg.GPU_ID = num_classes, device
+    detector = new_detector(device, weights)
+
+    _t = time_util.new_timers('im_detect', 'mask_detect', 'misc')
+
+    while True:
+        idx, raw_image = q_in.get()
+        if raw_image is None:
+            break
+
+        rois_this_image = []
+        boxes_this_image = [[]]
+        masks_this_image = [[]]
+
+        with _t['im_detect'].tic_and_toc():
+            scores, boxes, batch_inds, ims_scale = \
+                im_detect(detector, raw_image)
+
+        _t['misc'].tic()
+        for j in range(1, num_classes):
+            inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
+            cls_scores = scores[inds, j]
+            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
+            cls_batch_inds = batch_inds[inds]
+            cls_detections = np.hstack(
+                (cls_boxes, cls_scores[:, np.newaxis])
+            ).astype(np.float32, copy=False)
+            if cfg.TEST.USE_SOFT_NMS:
+                keep = nms_wrapper.soft_nms(
+                    cls_detections,
+                    thresh=cfg.TEST.NMS,
+                    method=cfg.TEST.SOFT_NMS_METHOD,
+                    sigma=cfg.TEST.SOFT_NMS_SIGMA,
+                )
+            else:
+                keep = nms_wrapper.nms(
+                    cls_detections,
+                    thresh=cfg.TEST.NMS,
+                    force_cpu=True,
+                )
+            cls_detections = cls_detections[keep, :]
+            cls_batch_inds = cls_batch_inds[keep]
+            boxes_this_image.append(cls_detections)
+            rois_this_image.append(
+                np.hstack((
+                    cls_batch_inds,
+                    cls_detections[:, :4] * ims_scale[cls_batch_inds],
+                    np.ones((len(keep), 1)) * (j - 1),
+                )))
+        mask_rois = np.concatenate(rois_this_image)
+        _t['misc'].toc()
+
+        if len(mask_rois) > 0:
+            k = 0
+            _t['mask_detect'].tic()
+            mask_pred = mask_detect(detector, mask_rois)
+            for j in range(1, num_classes):
+                num_pred = len(boxes_this_image[j])
+                cls_masks = mask_pred[k:k + num_pred]
+                masks_this_image.append(cls_masks)
+                k += num_pred
+            _t['mask_detect'].toc()
+
+        q_out.put((
+            idx,
+            {
+                'im_detect': _t['im_detect'].average_time,
+                'mask_detect': _t['mask_detect'].average_time,
+                'misc': _t['misc'].average_time,
+            },
+            {
+                'boxes': boxes_this_image,
+                'masks': masks_this_image,
+            },
+        ))
--- a/lib/modeling/__init__.py
+++ b/lib/modeling/__init__.py
@@ -14,12 +14,9 @@ from __future__ import division
 from __future__ import print_function

 # Import custom modules
-from lib.modeling.base import affine
-from lib.modeling.base import bn
-from lib.modeling.base import conv1x1
-from lib.modeling.base import conv3x3
 from lib.modeling.fast_rcnn import FastRCNN
 from lib.modeling.fpn import FPN
+from lib.modeling.mask_rcnn import MaskRCNN
 from lib.modeling.retinanet import RetinaNet
 from lib.modeling.rpn import RPN
 from lib.modeling.ssd import SSD
--- a/lib/modeling/airnet.py
+++ b/lib/modeling/airnet.py
@@ -15,20 +15,19 @@ from __future__ import print_function

 import dragon.vm.torch as torch

-from lib.modeling import affine
-from lib.modeling import conv1x1
-from lib.modeling import conv3x3
+from lib.modules import init
+from lib.modules import nn


-class WideResBlock(torch.nn.Module):
+class WideResBlock(nn.Module):
    def __init__(self, dim_in, dim_out, stride=1, downsample=None):
        super(WideResBlock, self).__init__()
-        self.conv1 = conv3x3(dim_in, dim_out, stride)
-        self.bn1 = affine(dim_out)
-        self.conv2 = conv3x3(dim_out, dim_out)
-        self.bn2 = affine(dim_out)
+        self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
+        self.bn1 = nn.Affine(dim_out)
+        self.conv2 = nn.Conv3x3(dim_out, dim_out)
+        self.bn2 = nn.Affine(dim_out)
        self.downsample = downsample
-        self.relu = torch.nn.ReLU(inplace=True)
+        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        residual = x
@@ -48,20 +47,20 @@ class WideResBlock(torch.nn.Module):
        return out


-class InceptionBlock(torch.nn.Module):
+class InceptionBlock(nn.Module):
    def __init__(self, dim_in, dim_out):
        super(InceptionBlock, self).__init__()
-        self.conv1 = conv1x1(dim_in, dim_out)
-        self.bn1 = affine(dim_out)
-        self.conv2 = conv3x3(dim_out, dim_out // 2)
-        self.bn2 = affine(dim_out // 2)
-        self.conv3a = conv3x3(dim_out // 2, dim_out)
-        self.bn3a = affine(dim_out)
-        self.conv3b = conv3x3(dim_out, dim_out)
-        self.bn3b = affine(dim_out)
-        self.conv4 = conv3x3(dim_out * 3, dim_out)
-        self.bn4 = affine(dim_out)
-        self.relu = torch.nn.ReLU(inplace=True)
+        self.conv1 = nn.Conv1x1(dim_in, dim_out)
+        self.bn1 = nn.Affine(dim_out)
+        self.conv2 = nn.Conv3x3(dim_out, dim_out // 2)
+        self.bn2 = nn.Affine(dim_out // 2)
+        self.conv3a = nn.Conv3x3(dim_out // 2, dim_out)
+        self.bn3a = nn.Affine(dim_out)
+        self.conv3b = nn.Conv3x3(dim_out, dim_out)
+        self.bn3b = nn.Affine(dim_out)
+        self.conv4 = nn.Conv3x3(dim_out * 3, dim_out)
+        self.bn4 = nn.Affine(dim_out)
+        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        residual = x
@@ -82,7 +81,7 @@ class InceptionBlock(torch.nn.Module):
        out_3x3_b = self.bn3b(out)
        out_3x3_b = self.relu(out_3x3_b)

-        out = torch.cat([out_1x1, out_3x3_a, out_3x3_b], dim=1)
+        out = torch.cat([out_1x1, out_3x3_a, out_3x3_b], 1)
        out = self.conv4(out)
        out = self.bn4(out)

@@ -91,22 +90,22 @@ class InceptionBlock(torch.nn.Module):
        return out


-class AirNet(torch.nn.Module):
+class AirNet(nn.Module):
    def __init__(self, blocks, num_stages):
        super(AirNet, self).__init__()
        self.dim_in, filters = 64, [64, 128, 256, 384]
        self.feature_dims = [None, None] + \
                            filters[1:num_stages - 1]
-        self.conv1 = torch.nn.Conv2d(
+        self.conv1 = nn.Conv2d(
            3, 64,
            kernel_size=7,
            stride=2,
            padding=3,
            bias=False,
        )
-        self.bn1 = affine(self.dim_in)
-        self.relu = torch.nn.ReLU(inplace=True)
-        self.maxpool = torch.nn.MaxPool2d(
+        self.bn1 = nn.Affine(self.dim_in)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(
            kernel_size=2,
            stride=2,
            padding=0,
@@ -121,19 +120,14 @@ class AirNet(torch.nn.Module):
        self.reset_parameters()

    def reset_parameters(self):
-        # The Kaiming Initialization
        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                torch.nn.init.kaiming_uniform_(
-                    m.weight,
-                    # Fix the gain for [-127, 127]
-                    a=1,
-                )  # Xavier Initialization
+            if isinstance(m, nn.Conv2d):
+                init.xaiver(m.weight)

    def make_blocks(self, dim_out, blocks, stride=1):
-        downsample = torch.nn.Sequential(
-            conv1x1(self.dim_in, dim_out, stride=stride),
-            affine(dim_out),
+        downsample = nn.Sequential(
+            nn.Conv1x1(self.dim_in, dim_out, stride=stride),
+            nn.Affine(dim_out),
        )
        layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)]
        self.dim_in = dim_out
@@ -144,7 +138,7 @@ class AirNet(torch.nn.Module):
                layers.append(InceptionBlock(dim_out, dim_out))
            else:
                raise ValueError('Unknown block flag: ' + blocks[i])
-        return torch.nn.Sequential(*layers)
+        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)

--- a/lib/modeling/base.py
+++ b/lib/modeling/base.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-"""Define some basic structures."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import dragon.vm.torch as torch
-
-
-def affine(dim_in, inplace=True):
-    """AffineBN, weight and bias are fixed."""
-    return torch.nn.Affine(
-        dim_in,
-        fix_weight=True,
-        fix_bias=True,
-        inplace=inplace,
-    )
-
-
-def bn(dim_in, eps=1e-5):
-    """The BatchNorm."""
-    return torch.nn.BatchNorm2d(dim_in, eps=eps)
-
-
-def conv1x1(dim_in, dim_out, stride=1, bias=False):
-    """1x1 convolution."""
-    return torch.nn.Conv2d(
-        dim_in,
-        dim_out,
-        kernel_size=1,
-        stride=stride,
-        bias=bias,
-    )
-
-
-def conv3x3(dim_in, dim_out, stride=1, bias=False):
-    """3x3 convolution with padding."""
-    return torch.nn.Conv2d(
-        dim_in,
-        dim_out,
-        kernel_size=3,
-        stride=stride,
-        padding=1,
-        bias=bias,
-    )
--- a/lib/modeling/detector.py
+++ b/lib/modeling/detector.py
@@ -21,14 +21,16 @@ from lib.core.config import cfg
 from lib.modeling import FPN
 from lib.modeling import RPN
 from lib.modeling import FastRCNN
+from lib.modeling import MaskRCNN
 from lib.modeling import RetinaNet
 from lib.modeling import SSD
 from lib.modeling.factory import get_body_func
-from lib.ops.modules import Bootstrap
-from lib.utils.logger import is_root
+from lib.modules import nn
+from lib.modules import vision
+from lib.utils import logger


-class Detector(torch.nn.Module):
+class Detector(nn.Module):
    """Organize the detection pipelines.

    A bunch of classic algorithms are integrated, see the
@@ -42,19 +44,20 @@ class Detector(torch.nn.Module):
        backbone = cfg.MODEL.BACKBONE.lower().split('.')
        body, modules = backbone[0], backbone[1:]

-        # + Data Loader
-        self.data_layer = importlib.import_module(
-            'lib.{}'.format(model)).DataLayer
-        self.bootstrap = Bootstrap()
+        # + DataLoader
+        self.data_loader_cls = importlib.import_module(
+            'lib.{}'.format(model)).DataLoader
+        self.bootstrap = vision.Bootstrap()

-        # + Feature Extractor
+        # + FeatureExtractor
        self.body = get_body_func(body)()
        feature_dims = self.body.feature_dims

-        # + Feature Enhancer
+        # + FeatureEnhancer
        if 'fpn' in modules:
            self.fpn = FPN(feature_dims)
            feature_dims = self.fpn.feature_dims
+
        elif 'mbox' in modules:
            pass  # Placeholder
        else:
@@ -63,7 +66,10 @@ class Detector(torch.nn.Module):
        # + Detection Modules
        if 'rcnn' in model:
            self.rpn = RPN(feature_dims[0])
-            self.fast_rcnn = FastRCNN(feature_dims[0])
+            if 'faster' in model:
+                self.rcnn = FastRCNN(feature_dims[0])
+            elif 'mask' in model:
+                self.rcnn = MaskRCNN(feature_dims[0])

        if 'retinanet' in model:
            self.retinanet = RetinaNet(feature_dims[0])
@@ -85,7 +91,7 @@ class Detector(torch.nn.Module):
        self.load_state_dict(
            torch.load(weights),
            strict=False,
-            verbose=is_root(),
+            verbose=logger.is_root(),
        )

    def forward(self, inputs=None):
@@ -107,7 +113,7 @@ class Detector(torch.nn.Module):
            # 1) Training: <= DataLayer
            # 2) Inference: <= Given
            if not hasattr(self, 'data_loader'):
-                self.data_loader = self.data_layer()
+                self.data_loader = self.data_loader_cls()
            inputs = self.data_loader()

        # 1. Extract features
@@ -126,7 +132,7 @@ class Detector(torch.nn.Module):
        # 3. Collect detection outputs
        outputs = collections.OrderedDict()

-        # 3.1 Feature -> RPN -> Fast R-CNN
+        # 3.1 Feature -> RPN -> R-CNN
        if hasattr(self, 'rpn'):
            outputs.update(
                self.rpn(
@@ -135,7 +141,7 @@ class Detector(torch.nn.Module):
                )
            )
            outputs.update(
-                self.fast_rcnn(
+                self.rcnn(
                    features=features,
                    rpn_cls_score=outputs['rpn_cls_score'],
                    rpn_bbox_pred=outputs['rpn_bbox_pred'],
@@ -174,8 +180,8 @@ class Detector(torch.nn.Module):
        ##################################
        last_module = None
        for e in self.modules():
-            if isinstance(e, torch.nn.Affine) and \
-                    isinstance(last_module, torch.nn.Conv2d):
+            if isinstance(e, nn.Affine) and \
+                    isinstance(last_module, nn.Conv2d):
                if last_module.bias is None:
                    delattr(last_module, 'bias')
                    e.forward = lambda x: x
@@ -188,8 +194,8 @@ class Detector(torch.nn.Module):
        ######################################
        last_module = None
        for e in self.modules():
-            if isinstance(e, torch.nn.BatchNorm2d) and \
-                    isinstance(last_module, torch.nn.Conv2d):
+            if isinstance(e, nn.BatchNorm2d) and \
+                    nn.is_conv2d(last_module):
                if last_module.bias is None:
                    delattr(last_module, 'bias')
                    e.forward = lambda x: x
@@ -204,3 +210,17 @@ class Detector(torch.nn.Module):
                    else:
                        last_module.weight.data.mul_(term)
            last_module = e
+
+
+def new_detector(device, weights=None, training=False):
+    detector = Detector().cuda(device)
+    if weights is not None:
+        detector.load_weights(weights)
+    if not training:
+        detector.eval()
+        detector.optimize_for_inference()
+    # Enable the fp16 inference support if necessary
+    # Boost a little if TensorCore is available
+    if cfg.MODEL.PRECISION.lower() == 'float16':
+        detector.half()
+    return detector
--- a/lib/modeling/factory.py
+++ b/lib/modeling/factory.py
@@ -43,14 +43,20 @@ for D in ['', '3b', '4b', '5b']:
    _STORE['BODY']['airnet{}'.format(D)] = \
        'lib.modeling.airnet.make_airnet_{}'.format(D)

+# MobileNet
+for D in ['a1', 'v2']:
+    _STORE['BODY']['mobilenet_{}'.format(D)] = \
+        'lib.modeling.mobilenet.make_mobilenet_{}'.format(D)
+

 def get_template_func(name, sets, desc):
    name = name.lower()
    if name not in sets:
        raise ValueError(
            'The {} for {} was not registered.\n'
-            'Registered modules: [{}]'.format(
-                name, desc, ', '.join(sets.keys())))
+            'Registered modules: [{}]'
+            .format(name, desc, ', '.join(sets.keys()))
+        )
    module_name = '.'.join(sets[name].split('.')[0:-1])
    func_name = sets[name].split('.')[-1]
    try:

--- a/lib/modeling/fast_rcnn.py
+++ b/lib/modeling/fast_rcnn.py
@@ -14,13 +14,19 @@ from __future__ import division
 from __future__ import print_function

 import collections
+import functools
+
 import dragon.vm.torch as torch

+from lib import faster_rcnn
 from lib.core.config import cfg
-from lib.ops.modules import RPNDecoder
+from lib.modules import det
+from lib.modules import init
+from lib.modules import nn
+from lib.modules import vision


-class FastRCNN(torch.nn.Module):
+class FastRCNN(nn.Module):
    """Generate proposal regions for R-CNN series.

    The pipeline is as follows:
@@ -32,59 +38,45 @@ class FastRCNN(torch.nn.Module):
    """
    def __init__(self, dim_in=256):
        super(FastRCNN, self).__init__()
-        if len(cfg.RPN.STRIDES) > 1:
-            # RPN with multiple strides(i.e. FPN)
-            from lib.fpn import ProposalLayer, ProposalTargetLayer
-        else:
-            # RPN with single stride(i.e. C4)
-            from lib.faster_rcnn import ProposalLayer, ProposalTargetLayer
        self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
-        self.fc6 = torch.nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
-        self.fc7 = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
-        self.cls_score = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
-        self.bbox_pred = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
-        self.rpn_decoder = RPNDecoder()
-        self.proposal_layer = ProposalLayer()
-        self.proposal_target_layer = ProposalTargetLayer()
-        self.softmax = torch.nn.Softmax(dim=1)
-        self.relu = torch.nn.ReLU(inplace=True)
-        self.sigmoid = torch.nn.Sigmoid(inplace=False)
-        self.roi_func = {
-            'RoIPool': torch.vision.ops.roi_pool,
-            'RoIAlign': torch.vision.ops.roi_align,
-        }[cfg.FRCNN.ROI_XFORM_METHOD]
-        self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
-        self.bbox_loss = torch.nn.SmoothL1Loss(reduction='batch_size')
-        # Compute spatial scales for multiple strides
-        roi_levels = [level for level in range(
-            cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)]
-        self.spatial_scales = [1.0 / (2 ** level) for level in roi_levels]
+        self.fc6 = nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
+        self.fc7 = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
+        self.cls_score = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
+        self.bbox_pred = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
+        self.rpn_decoder = det.RPNDecoder()
+        self.proposal = faster_rcnn.Proposal()
+        self.proposal_target = faster_rcnn.ProposalTarget()
+        self.softmax = nn.Softmax(dim=1)
+        self.relu = nn.ReLU(inplace=True)
+        self.sigmoid = nn.Sigmoid()
+        self.box_roi_feature = functools.partial({
+            'RoIPool': vision.roi_pool,
+            'RoIAlign': vision.roi_align
+        }[cfg.FRCNN.ROI_XFORM_METHOD], size=cfg.FRCNN.ROI_XFORM_RESOLUTION)
+        self.cls_loss = nn.CrossEntropyLoss()
+        self.bbox_loss = nn.SmoothL1Loss()
+        # Compute spatial scales according to strides
+        self.spatial_scales = [
+            1. / (2 ** lvl)
+            for lvl in range(
+                cfg.FPN.ROI_MIN_LEVEL,
+                cfg.FPN.ROI_MAX_LEVEL + 1
+            )]
        self.reset_parameters()

    def reset_parameters(self):
        # Careful initialization for Fast R-CNN
-        torch.nn.init.normal_(self.cls_score.weight, std=0.01)
-        torch.nn.init.normal_(self.bbox_pred.weight, std=0.001)
+        init.normal(self.cls_score.weight, std=0.01)
+        init.normal(self.bbox_pred.weight, std=0.001)
        for name, p in self.named_parameters():
            if 'bias' in name:
-                torch.nn.init.constant_(p, 0)
-
-    def RoIFeatureTransform(self, feature, rois, spatial_scale):
-        return self.roi_func(
-            feature, rois,
-            output_size=(
-                cfg.FRCNN.ROI_XFORM_RESOLUTION,
-                cfg.FRCNN.ROI_XFORM_RESOLUTION,
-            ),
-            spatial_scale=spatial_scale,
-        )
+                init.constant(p, 0)

    def forward(self, **kwargs):
-        # Generate Proposals
-        # Apply the CXX implementation during inference
-        proposal_func = self.proposal_layer \
+        # Generate proposals
+        proposal_func = self.proposal \
            if self.training else self.rpn_decoder
-        self.rcnn_data = {
+        self.data = {
            'rois': proposal_func(
                kwargs['features'],
                self.sigmoid(kwargs['rpn_cls_score'].data),
@@ -93,66 +85,61 @@ class FastRCNN(torch.nn.Module):
            )
        }

-        # Generate Targets from Proposals
+        # Generate targets from proposals
        if self.training:
-            self.rcnn_data.update(
-                self.proposal_target_layer(
-                    rpn_rois=self.rcnn_data['rois'],
+            self.data.update(
+                self.proposal_target(
+                    rpn_rois=self.data['rois'],
                    gt_boxes=kwargs['gt_boxes'],
                )
            )

-        # Transform RoI Feature
-        roi_features = []
-        if len(self.rcnn_data['rois']) > 1:
-            for i, spatial_scale in enumerate(self.spatial_scales):
-                roi_features.append(
-                    self.RoIFeatureTransform(
+        # Transform RoI features
+        if len(self.data['rois']) > 1:
+            roi_features = \
+                torch.cat([
+                    self.box_roi_feature(
                        kwargs['features'][i],
-                        self.rcnn_data['rois'][i],
+                        self.data['rois'][i],
                        spatial_scale,
-                    )
-                )
-            roi_features = torch.cat(roi_features, dim=0)
+                    ) for i, spatial_scale in enumerate(self.spatial_scales)
+                ], dim=0)
        else:
-            spatial_scale = 1.0 / cfg.RPN.STRIDES[0]
            roi_features = \
-                self.RoIFeatureTransform(
+                self.box_roi_feature(
                    kwargs['features'][0],
-                    self.rcnn_data['rois'][0],
-                    spatial_scale,
+                    self.data['rois'][0],
+                    1. / cfg.RPN.STRIDES[0],
                )

        # Apply a simple MLP
        roi_features = roi_features.view(-1, self.roi_head_dim)
-        rcnn_output = self.relu(self.fc6(roi_features))
-        rcnn_output = self.relu(self.fc7(rcnn_output))
+        roi_features = self.relu(self.fc6(roi_features))
+        roi_features = self.relu(self.fc7(roi_features))

-        # Compute rcnn logits
-        cls_score = self.cls_score(rcnn_output).float()
-        outputs = collections.OrderedDict([
-            ('bbox_pred', self.bbox_pred(rcnn_output).float()),
-        ])
+        # Compute logits and losses
+        outputs = collections.OrderedDict()
+        cls_score = self.cls_score(roi_features).float()
+        outputs['bbox_pred'] = self.bbox_pred(roi_features).float()

        if self.training:
            # Compute rcnn losses
            outputs.update(collections.OrderedDict([
                ('cls_loss', self.cls_loss(
-                    cls_score, self.rcnn_data['labels'])),
+                    cls_score, self.data['labels'])),
                ('bbox_loss', self.bbox_loss(
                    outputs['bbox_pred'],
-                    self.rcnn_data['bbox_targets'],
-                    self.rcnn_data['bbox_inside_weights'],
-                    self.rcnn_data['bbox_outside_weights'],
+                    self.data['bbox_targets'],
+                    self.data['bbox_inside_weights'],
+                    self.data['bbox_outside_weights'],
                )),
            ]))
        else:
            # Return the rois to decode the refine boxes
-            if len(self.rcnn_data['rois']) > 1:
-                outputs['rois'] = torch.cat(
-                    self.rcnn_data['rois'], dim=0)
+            if len(self.data['rois']) > 1:
+                outputs['rois'] = torch.cat(self.data['rois'], 0)
            else:
-                outputs['rois'] = self.rcnn_data['rois'][0]
+                outputs['rois'] = self.data['rois'][0]
            # Return the classification prob
            outputs['cls_prob'] = self.softmax(cls_score)


--- a/lib/modeling/fpn.py
+++ b/lib/modeling/fpn.py
@@ -16,43 +16,41 @@ from __future__ import print_function
 import dragon.vm.torch as torch

 from lib.core.config import cfg
-from lib.modeling import conv1x1
-from lib.modeling import conv3x3
+from lib.modules import init
+from lib.modules import nn


 HIGHEST_BACKBONE_LVL = 5  # E.g., "conv5"-like level


-class FPN(torch.nn.Module):
+class FPN(nn.Module):
    """Feature Pyramid Networks for R-CNN and RetinaNet."""

    def __init__(self, feature_dims):
        super(FPN, self).__init__()
-        self.C = torch.nn.ModuleList()
-        self.P = torch.nn.ModuleList()
+        dim = cfg.FPN.DIM
+        self.C = nn.ModuleList()
+        self.P = nn.ModuleList()
        for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1):
-            self.C.append(conv1x1(feature_dims[lvl - 1], cfg.FPN.DIM, bias=True))
-            self.P.append(conv3x3(cfg.FPN.DIM, cfg.FPN.DIM, bias=True))
+            self.C.append(nn.Conv1x1(feature_dims[lvl - 1], dim, bias=True))
+            self.P.append(nn.Conv3x3(dim, dim, bias=True))
        if 'rcnn' in cfg.MODEL.TYPE:
            self.apply_func = self.apply_on_rcnn
-            self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True)
+            self.maxpool = nn.MaxPool2d(1, 2, ceil_mode=True)
        else:
            self.apply_func = self.apply_on_generic
-            self.relu = torch.nn.ReLU(inplace=False)
+            self.relu = nn.ReLU(inplace=False)
            for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
-                dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else cfg.FPN.DIM
-                self.P.append(conv3x3(dim_in, cfg.FPN.DIM, stride=2, bias=True))
+                dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else dim
+                self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True))
+        self.feature_dims = [dim]
        self.reset_parameters()
-        self.feature_dims = [cfg.FPN.DIM]

    def reset_parameters(self):
        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                torch.nn.init.kaiming_uniform_(
-                    m.weight,
-                    a=1,  # Fix the gain for [-127, 127]
-                )  # Xavier Initialization
-                torch.nn.init.constant_(m.bias, 0)
+            if isinstance(m, nn.Conv2d):
+                init.xaiver(m.weight)
+                init.constant(m.bias, 0)

    def apply_on_rcnn(self, features):
        fpn_input = self.C[-1](features[-1])

--- a/lib/modeling/mask_rcnn.py
+++ b/lib/modeling/mask_rcnn.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import functools
+
+import dragon.vm.torch as torch
+
+from lib import mask_rcnn
+from lib.core.config import cfg
+from lib.modules import det
+from lib.modules import init
+from lib.modules import nn
+from lib.modules import vision
+
+
+class MaskRCNN(nn.Module):
+    def __init__(self, dim_in=256):
+        """Generate mask regions for R-CNN series.
+
+        The pipeline is as follows:
+
+        ... -> BoxRoIs  \                          /-> cls_score -> cls_loss
+                         -> RoIFeatureXform -> MLP
+        ... -> Features /                          \-> bbox_pred -> bbox_loss
+
+        ... -> MaskRoIs \
+                         -> RoIFeatureXform -> FCN  -> mask_score -> mask_loss
+        ... -> Features /
+
+        """
+        super(MaskRCNN, self).__init__()
+        self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
+        self.fc6 = nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
+        self.fc7 = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
+        self.fcn = nn.ModuleList([nn.Conv3x3(dim_in, dim_in, bias=True) for _ in range(4)])
+        self.fcn += [nn.ConvTranspose2d(dim_in, dim_in, 2, 2, 0)]
+        self.cls_score = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
+        self.bbox_pred = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
+        self.mask_score = nn.Conv1x1(dim_in, cfg.MODEL.NUM_CLASSES - 1, bias=True)
+        self.rpn_decoder = det.RPNDecoder()
+        self.proposal = mask_rcnn.Proposal()
+        self.proposal_target = mask_rcnn.ProposalTarget()
+        self.sigmoid = nn.Sigmoid()
+        self.softmax = nn.Softmax(dim=1)
+        self.relu = nn.ReLU(True)
+        self.box_roi_feature = functools.partial({
+            'RoIPool': vision.roi_pool,
+            'RoIAlign': vision.roi_align,
+        }[cfg.FRCNN.ROI_XFORM_METHOD], size=cfg.FRCNN.ROI_XFORM_RESOLUTION)
+        self.mask_roi_feature = functools.partial({
+            'RoIPool': vision.roi_pool,
+            'RoIAlign': vision.roi_align,
+        }[cfg.MRCNN.ROI_XFORM_METHOD], size=cfg.MRCNN.ROI_XFORM_RESOLUTION)
+        self.cls_loss = nn.CrossEntropyLoss()
+        self.bbox_loss = nn.SmoothL1Loss()
+        self.mask_loss = nn.BCEWithLogitsLoss()
+        # Compute spatial scales according to strides
+        self.spatial_scales = [
+            1. / (2 ** lvl)
+            for lvl in range(
+                cfg.FPN.ROI_MIN_LEVEL,
+                cfg.FPN.ROI_MAX_LEVEL + 1
+            )]
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        # Careful initialization for Fast R-CNN
+        init.normal(self.cls_score.weight, std=0.01)
+        init.normal(self.bbox_pred.weight, std=0.001)
+        # Careful initialization for Mask R-CNN
+        init.normal(self.mask_score.weight, std=0.001)
+        for m in self.fcn.modules():
+            if hasattr(m, 'weight'):
+                init.kaiming_normal(m.weight)
+        for name, p in self.named_parameters():
+            if 'bias' in name:
+                init.constant(p, 0)
+
+    def get_mask_score(self, features, rois):
+        roi_features = \
+            torch.cat([
+                self.mask_roi_feature(
+                    features[i], rois[i], spatial_scale,
+                ) for i, spatial_scale in enumerate(self.spatial_scales)
+            ], dim=0)
+        for i in range(len(self.fcn)):
+            roi_features = self.relu(self.fcn[i](roi_features))
+        return self.mask_score(roi_features).float()
+
+    def forward(self, **kwargs):
+        # Generate proposals
+        proposal_func = self.proposal \
+            if self.training else self.rpn_decoder
+        self.data = {
+            'rois': proposal_func(
+                kwargs['features'],
+                self.sigmoid(kwargs['rpn_cls_score'].data),
+                kwargs['rpn_bbox_pred'],
+                kwargs['ims_info'],
+            )
+        }
+
+        # Generate targets from proposals
+        if self.training:
+            self.data.update(
+                self.proposal_target(
+                    rpn_rois=self.data['rois'],
+                    gt_boxes=kwargs['gt_boxes'],
+                    gt_masks=kwargs['gt_masks'],
+                    ims_info=kwargs['ims_info'],
+                )
+            )
+
+        # Transform RoI features
+        roi_features = \
+            torch.cat([
+                self.box_roi_feature(
+                    kwargs['features'][i],
+                    self.data['rois'][i],
+                    spatial_scale,
+                ) for i, spatial_scale in enumerate(self.spatial_scales)
+            ], dim=0)
+
+        # Apply a simple MLP
+        roi_features = roi_features.view(-1, self.roi_head_dim)
+        roi_features = self.relu(self.fc6(roi_features))
+        roi_features = self.relu(self.fc7(roi_features))
+
+        # Compute logits and losses
+        outputs = collections.OrderedDict()
+        cls_score = self.cls_score(roi_features).float()
+        outputs['bbox_pred'] = self.bbox_pred(roi_features).float()
+
+        if self.training:
+            # Compute the loss of bbox branch
+            outputs.update(collections.OrderedDict([
+                ('cls_loss', self.cls_loss(
+                    cls_score, self.data['labels'])),
+                ('bbox_loss', self.bbox_loss(
+                    outputs['bbox_pred'],
+                    self.data['bbox_targets'],
+                    self.data['bbox_inside_weights'],
+                    self.data['bbox_outside_weights'],
+                )),
+            ]))
+            # Compute the loss of mask branch
+            mask_score = self.get_mask_score(
+                kwargs['features'], self.data['mask_rois'])
+            mask_score = mask_score.index_select(
+                (0, 1), self.data['mask_indices'])
+            outputs['mask_loss'] = self.mask_loss(
+                mask_score, self.data['mask_targets'])
+        else:
+            # Return the RoIs to decode the refine boxes
+            if len(self.data['rois']) > 1:
+                outputs['rois'] = torch.cat(self.data['rois'], 0)
+            else:
+                outputs['rois'] = self.data['rois'][0]
+            # Return the classification prob
+            outputs['cls_prob'] = self.softmax(cls_score)
+            # Set a callback to decode mask from refine RoIs
+            self.compute_mask_score = \
+                functools.partial(
+                    self.get_mask_score,
+                    features=kwargs['features'],
+                )
+
+        return outputs
--- a/lib/modeling/mobilenet.py
+++ b/lib/modeling/mobilenet.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+
+import dragon.vm.torch as torch
+
+from lib.core.config import cfg
+from lib.modules import init
+from lib.modules import nn
+from lib.modules import vision
+
+
+def conv_triplet(dim_in, dim_out):
+    """1x1 convolution + BN + ReLU."""
+    return [
+        nn.Conv2d(dim_in, dim_out, 1, bias=False),
+        nn.Affine(dim_out),
+        nn.ReLU(True),
+    ]
+
+
+def conv_quintet(dim_in, dim_out, ks, stride):
+    """KxK convolution + BN + ReLU."""
+    return [
+        nn.DepthwiseConv2d(
+            dim_in, dim_in,
+            kernel_size=ks,
+            stride=stride,
+            padding=ks // 2,
+            bias=False,
+        ),
+        nn.Affine(dim_in),
+        nn.ReLU(True),
+        nn.Conv1x1(dim_in, dim_out),
+        nn.Affine(dim_out),
+    ]
+
+
+class Setting(object):
+    V2 = (
+        [2, 3, 4, 3, 3, 1],
+        [2, 2, 2, 1, 2, 1],
+        [32, 16, 24, 32, 64, 96, 160, 320, 1280],
+    )
+    PROXYLESS_MOBILE = (
+        [4, 4, 4, 4, 4, 1],
+        [2, 2, 2, 1, 2, 1],
+        [32, 16, 32, 40, 80, 96, 192, 320, 1280],
+    )
+    PROXYLESS_GPU = (
+        [4, 4, 4, 4, 4, 1],
+        [2, 2, 2, 1, 2, 1],
+        [40, 24, 32, 56, 112, 128, 256, 432, 1280],
+    )
+
+
+def Stem(dim_out, stride=1):
+    return torch.nn.Sequential(
+        torch.nn.Conv2d(
+            3, dim_out,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias=False,
+        ),
+        nn.Affine(dim_out),
+        nn.ReLU(True),
+    )
+
+
+class Choice(nn.Module):
+    def __init__(self, dim_in, dim_out, mb=3, ks=3, stride=1):
+        super(Choice, self).__init__()
+        self.mb = mb
+        dim_hidden = int(round(dim_in * mb))
+        seq = conv_triplet(dim_in, dim_hidden) if mb != 1 else []
+        seq += conv_quintet(dim_hidden, dim_out, ks, stride)
+        self.conv = nn.ModuleList(seq)
+        self.stride = stride
+        self.apply_residual = stride == 1 and dim_in == dim_out
+
+    def forward(self, x):
+        residual = x if self.apply_residual else None
+        for i in range(3):
+            x = self.conv[i](x)
+        y = x if self.stride == 2 else None
+        for i in range(3, len(self.conv)):
+            x = self.conv[i](x)
+        if self.apply_residual:
+            return residual + x, y
+        else:
+            return x, y
+
+
+class NASMobileNet(nn.Module):
+    def __init__(self, choices, preset=Setting.PROXYLESS_MOBILE):
+        super(NASMobileNet, self).__init__()
+
+        # Pre-defined blocks
+        def select_block(choice):
+            return {
+                0: functools.partial(Choice, mb=3, ks=3),
+                1: functools.partial(Choice, mb=6, ks=3),
+                2: functools.partial(Choice, mb=3, ks=5),
+                3: functools.partial(Choice, mb=6, ks=5),
+                4: functools.partial(Choice, mb=3, ks=7),
+                5: functools.partial(Choice, mb=6, ks=7),
+                6: nn.Identity,
+            }[choice]
+
+        # Hand-craft configurations
+        repeats, strides, out_channels = preset
+        names = ['2!', '3!', '4', '4!', '5', '5!']
+        self.num_layers = len(choices)
+        assert sum(repeats) == self.num_layers
+
+        # + Stem
+        self.bootstrap = vision.Bootstrap()
+        self.conv1 = Stem(out_channels[0], stride=2)
+        self.stage1 = Choice(out_channels[0], out_channels[1], mb=1, ks=3)
+        dim_in = out_channels[1]
+        self.feature_dims = [out_channels[-1]]
+
+        # + Body
+        self.layers = []
+        for name, rep, dim_out, stride in zip(
+                names, repeats, out_channels[2:], strides):
+            self.layers.append(select_block(
+                choices[len(self.layers)]
+            )(dim_in, dim_out, stride=stride))
+            if stride == 2:
+                self.feature_dims.insert(
+                    -1, dim_in * self.layers[-1].mb)
+            for i in range(rep - 1):
+                self.layers.append(select_block(
+                    choices[len(self.layers)]
+                )(dim_out, dim_out, stride=1))
+            fullname = 'stage%s' % name.split('!')[0]
+            seq = getattr(self, fullname, [])
+            seq += self.layers[-rep:]
+            seq = nn.Sequential(*seq) if '!' in name else seq
+            setattr(self, fullname, seq)
+            dim_in = dim_out
+
+        self.conv6 = nn.Sequential(*conv_triplet(dim_in, out_channels[-1]))
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        for m in self.modules():
+            if nn.is_conv2d(m):
+                init.kaiming_normal(m.weight, 'fan_out')
+                if m.bias is not None:
+                    init.constant(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                init.constant(m.weight, 1)
+            elif isinstance(m, nn.Linear):
+                if m.bias is not None:
+                    init.constant(m.bias, 0)
+
+        # Stop the gradients if necessary
+        def freeze_func(m):
+            if nn.is_conv2d(m):
+                m.weight.requires_grad = False
+                m._buffers['weight'] = m.weight
+                del m._parameters['weight']
+
+        if cfg.MODEL.FREEZE_AT > 0:
+            self.conv1.apply(freeze_func)
+            self.stage1.apply(freeze_func)
+
+        for i in range(cfg.MODEL.FREEZE_AT, 1, -1):
+            getattr(self, 'stage{}'.format(i)).apply(freeze_func)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x, _ = self.stage1(x)
+        outputs = []
+        for layer in self.layers:
+            x = layer(x)
+            x, y = x if isinstance(x, tuple) else (x, None)
+            if y is not None:
+                outputs.append(y)
+        outputs.append(self.conv6(x))
+        return outputs
+
+
+def make_mobilenet_a1():
+    return NASMobileNet([
+        4, 6, 6, 6,
+        3, 3, 4, 6,
+        2, 4, 0, 4, 1, 5, 3, 5,
+        2, 4, 2, 4,
+        1,
+    ], Setting.PROXYLESS_MOBILE)
+
+
+def make_mobilenet_v2():
+    return NASMobileNet([
+        1, 1,
+        1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1,
+        1,
+    ], Setting.V2)
--- a/lib/modeling/resnet.py
+++ b/lib/modeling/resnet.py
@@ -20,12 +20,11 @@ from __future__ import print_function
 import dragon.vm.torch as torch

 from lib.core.config import cfg
-from lib.modeling import affine
-from lib.modeling import conv1x1
-from lib.modeling import conv3x3
+from lib.modules import nn
+from lib.modules import init


-class BasicBlock(torch.nn.Module):
+class BasicBlock(nn.Module):
    def __init__(
        self,
        dim_in,
@@ -35,11 +34,11 @@ class BasicBlock(torch.nn.Module):
        dropblock=None,
    ):
        super(BasicBlock, self).__init__()
-        self.conv1 = conv3x3(dim_in, dim_out, stride)
-        self.bn1 = affine(dim_out)
+        self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
+        self.bn1 = nn.Affine(dim_out)
        self.relu = torch.nn.ReLU(inplace=True)
-        self.conv2 = conv3x3(dim_out, dim_out)
-        self.bn2 = affine(dim_out)
+        self.conv2 = nn.Conv3x3(dim_out, dim_out)
+        self.bn2 = nn.Affine(dim_out)
        self.downsample = downsample
        self.dropblock = dropblock

@@ -83,12 +82,12 @@ class Bottleneck(torch.nn.Module):
    ):
        super(Bottleneck, self).__init__()
        dim = int(dim_out * self.contraction)
-        self.conv1 = conv1x1(dim_in, dim)
-        self.bn1 = affine(dim)
-        self.conv2 = conv3x3(dim, dim, stride=stride)
-        self.bn2 = affine(dim)
-        self.conv3 = conv1x1(dim, dim_out)
-        self.bn3 = affine(dim_out)
+        self.conv1 = nn.Conv1x1(dim_in, dim)
+        self.bn1 = nn.Affine(dim)
+        self.conv2 = nn.Conv3x3(dim, dim, stride=stride)
+        self.bn2 = nn.Affine(dim)
+        self.conv3 = nn.Conv1x1(dim, dim_out)
+        self.bn3 = nn.Affine(dim_out)
        self.relu = torch.nn.ReLU(inplace=True)
        self.downsample = downsample
        self.dropblock = dropblock
@@ -133,7 +132,7 @@ class ResNet(torch.nn.Module):
            padding=3,
            bias=False,
        )
-        self.bn1 = affine(self.dim_in)
+        self.bn1 = nn.Affine(self.dim_in)
        self.relu = torch.nn.ReLU(inplace=True)
        self.maxpool = torch.nn.MaxPool2d(
            kernel_size=3,
@@ -160,13 +159,9 @@ class ResNet(torch.nn.Module):
        self.reset_parameters()

    def reset_parameters(self):
-        # The Kaiming Initialization
        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                torch.nn.init.kaiming_normal_(
-                    m.weight,
-                    nonlinearity='relu',
-                )
+            if isinstance(m, nn.Conv2d):
+                init.kaiming_normal(m.weight)

        # Stop the gradients if necessary
        def freeze_func(m):
@@ -184,15 +179,15 @@ class ResNet(torch.nn.Module):
    def make_blocks(self, block, dim_out, blocks, stride=1, dropblock=None):
        downsample = None
        if stride != 1 or self.dim_in != dim_out:
-            downsample = torch.nn.Sequential(
-                conv1x1(self.dim_in, dim_out, stride=stride),
-                affine(dim_out),
+            downsample = nn.Sequential(
+                nn.Conv1x1(self.dim_in, dim_out, stride=stride),
+                nn.Affine(dim_out),
            )
        layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)]
        self.dim_in = dim_out
        for i in range(1, blocks):
            layers.append(block(dim_out, dim_out, dropblock=dropblock))
-        return torch.nn.Sequential(*layers)
+        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)

--- a/lib/modeling/retinanet.py
+++ b/lib/modeling/retinanet.py
--- a/lib/modeling/rpn.py
+++ b/lib/modeling/rpn.py
--- a/lib/modeling/ssd.py
+++ b/lib/modeling/ssd.py
--- a/lib/modeling/vgg.py
+++ b/lib/modeling/vgg.py
--- a/lib/ops/__init__.py
+++ b/lib/ops/__init__.py
--- a/lib/ops/functional.py
+++ b/lib/ops/functional.py
--- a/lib/modules/init.py
+++ b/lib/modules/init.py
--- a/lib/modules/nn.py
+++ b/lib/modules/nn.py
--- a/lib/ops/modules.py
+++ b/lib/ops/modules.py
--- a/lib/nms/nms_wrapper.py
+++ b/lib/nms/nms_wrapper.py
--- a/lib/ops/functions.py
+++ b/lib/ops/functions.py
--- a/lib/pycocotools/mask_utils.py
+++ b/lib/pycocotools/mask_utils.py
--- a/lib/retinanet/__init__.py
+++ b/lib/retinanet/__init__.py
--- a/lib/retinanet/anchor_target_layer.py
+++ b/lib/retinanet/anchor_target_layer.py
--- a/lib/retinanet/test.py
+++ b/lib/retinanet/test.py
--- a/lib/solver/lr_scheduler.py
+++ b/lib/solver/lr_scheduler.py
--- a/lib/ssd/__init__.py
+++ b/lib/ssd/__init__.py
--- a/lib/ssd/data_layer.py
+++ b/lib/ssd/data_layer.py
--- a/lib/ssd/data_transformer.py
+++ b/lib/ssd/data_transformer.py
--- a/lib/ssd/generate_anchors.py
+++ b/lib/ssd/generate_anchors.py
--- a/lib/ssd/hard_mining.py
+++ b/lib/ssd/hard_mining.py
--- a/lib/ssd/hard_mining_layer.py
+++ b/lib/ssd/hard_mining_layer.py
--- a/lib/ssd/multibox_layer.py
+++ b/lib/ssd/multibox_layer.py
--- a/lib/ssd/priorbox_layer.py
+++ b/lib/ssd/priorbox_layer.py
--- a/lib/ssd/test.py
+++ b/lib/ssd/test.py
--- a/lib/ssd/transforms.py
+++ b/lib/ssd/transforms.py
--- a/lib/utils/blob.py
+++ b/lib/utils/blob.py
--- a/lib/utils/boxes.py
+++ b/lib/utils/boxes.py
--- a/lib/utils/framework.py
+++ b/lib/utils/framework.py
--- a/lib/utils/image.py
+++ b/lib/utils/image.py
--- a/lib/utils/mask.py
+++ b/lib/utils/mask.py
--- a/lib/utils/rotated_boxes.py
+++ b/lib/utils/rotated_boxes.py
--- a/lib/utils/stats.py
+++ b/lib/utils/stats.py
--- a/lib/utils/time_util.py
+++ b/lib/utils/time_util.py
--- a/lib/utils/vis.py
+++ b/lib/utils/vis.py
--- a/scripts/coco/__init__.py
+++ b/scripts/coco/__init__.py
--- a/scripts/coco/im2rec.py
+++ b/scripts/coco/im2rec.py
--- a/scripts/coco/maker.py
+++ b/scripts/coco/maker.py
--- a/scripts/coco/maskgen.py
+++ b/scripts/coco/maskgen.py
--- a/scripts/rotated/im2rec.py
+++ b/scripts/rotated/im2rec.py
--- a/scripts/voc/im2rec.py
+++ b/scripts/voc/im2rec.py
--- a/tools/export.py
+++ b/tools/export.py
--- a/tools/test.py
+++ b/tools/test.py