Change to the PEP8 code style

Ting PAN
Commit ca255ea0 authored Jul 23, 2019 by Ting PAN
Showing with 1572 additions and 1414 deletions
.gitignore
CHANGES
README.md
compile/CMakeLists.txt
compile/CMake/FindNumPy.cmake → compile/cmake/FindNumPy.cmake
compile/CMake/FindPythonLibs.cmake → compile/cmake/FindPythonLibs.cmake
database/__init__.py
database/frcnn/__init__.py
lib/ssd/data/preprocessing/__init__.py → database/frcnn/pascal_voc/__init__.py
database/frcnn/pascal_voc/make_lmdb.py
database/frcnn/utils/__init__.py
database/frcnn/utils/make_from_xml.py
lib/__init__.py
lib/core/config.py
lib/core/coordinator.py
lib/core/solver.py
lib/core/test.py
lib/core/train.py
lib/datasets/__init__.py
lib/datasets/factory.py
--- a/.gitignore
+++ b/.gitignore
-## General
-
 # Compiled Object files
 *.slo
 *.lo
@@ -7,13 +5,15 @@
 *.cuo

 # Compiled Dynamic libraries
-# *.so
+*.so
+*.dll
 *.dylib

 # Compiled Static libraries
 *.lai
 *.la
-#*.a
+*.a
+*.lib

 # Compiled python
 *.pyc
@@ -40,6 +40,9 @@ __pycache__
 # QtCreator files
 *.user

+# VSCode files
+.vscode
+
 # PyCharm files
 .idea


--- a/CHANGES
+++ b/CHANGES
 ------------------------------------------------------------------------
 The list of most significant changes made over time in SeetaDet.

+SeetaDet 0.1.2 (20190723)
+
+Dragon Minimum Required (Version 0.3.0.0)
+
+Changes:
+
+Preview Features:
+
+- Change to the PEP8 code style.
+
+- Adapt the new Dragon API.
+
+Bugs fixed:
+
+- None
+
+------------------------------------------------------------------------
+
 SeetaDet 0.1.1 (20190409)

 Dragon Minimum Required (Version 0.3.0.0)

--- a/README.md
+++ b/README.md
--- a/compile/CMakeLists.txt
+++ b/compile/CMakeLists.txt
@@ -21,8 +21,8 @@ set(CUDA_ARCH    -gencode arch=compute_30,code=sm_30
 # ---------------- User Config ----------------

 # ---[ Dependencies
-include(${PROJECT_SOURCE_DIR}/CMake/FindPythonLibs.cmake)
-include(${PROJECT_SOURCE_DIR}/CMake/FindNumPy.cmake)
+include(${PROJECT_SOURCE_DIR}/cmake/FindPythonLibs.cmake)
+include(${PROJECT_SOURCE_DIR}/cmake/FindNumPy.cmake)
 FIND_PACKAGE(CUDA REQUIRED)

 set(CMAKE_CXX_STANDARD 11)

--- a/compile/CMake/FindNumPy.cmake
+++ b/compile/CMake/FindNumPy.cmake
--- a/compile/CMake/FindPythonLibs.cmake
+++ b/compile/CMake/FindPythonLibs.cmake
--- a/database/__init__.py
+++ b/database/__init__.py
--- a/database/frcnn/__init__.py
+++ b/database/frcnn/__init__.py
-# --------------------------------------------------------
-# Detectron @ Dragon
-# Copyright(c) 2017 SeetaTech
-# Written by Ting Pan
-# --------------------------------------------------------
\ No newline at end of file
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
--- a/lib/ssd/data/preprocessing/__init__.py
+++ b/lib/ssd/data/preprocessing/__init__.py
@@ -8,8 +8,3 @@
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
-
-from .distort import Distortor
-from .expand import Expander
-from .sample import Sampler
-from .resize import Resizer
\ No newline at end of file
--- a/database/frcnn/pascal_voc/make_lmdb.py
+++ b/database/frcnn/pascal_voc/make_lmdb.py
--- a/database/frcnn/utils/__init__.py
+++ b/database/frcnn/utils/__init__.py
--- a/database/frcnn/utils/make_from_xml.py
+++ b/database/frcnn/utils/make_from_xml.py
@@ -16,6 +16,7 @@ from __future__ import print_function
 import os
 import sys
 import time
+
 import cv2
 import xml.etree.ElementTree as ET
 from dragon.tools.db import LMDB
@@ -23,6 +24,7 @@ from dragon.tools.db import LMDB
 sys.path.insert(0, '../../..')
 from lib.proto import anno_pb2 as pb

+
 ZFILL = 8
 ENCODE_QUALITY = 95

@@ -46,14 +48,23 @@ def make_datum(image_file, xml_file):
    datum = pb.Datum()

    im = cv2.imread(image_file)
+    if im is None or im.shape[0] == 0 or im.shape[1] == 0:
+        print("XML have not objects ignored: ", xml_file)
+        return None
    datum.height, datum.width, datum.channels = im.shape
    datum.encoded = ENCODE_QUALITY != 100
    if datum.encoded:
        result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY])
+        if im is None or im.shape[0] == 0 or im.shape[1] == 0:
+            print("XML have not objects ignored: ", xml_file)
+            return None
    datum.data = im.tostring()
    anno_datum.datum.CopyFrom(datum)
    anno_datum.filename = filename.split('.')[0]

+    if len(objs) == 0:
+        return None
+
    for ix, obj in enumerate(objs):
        anno = pb.Annotation()
        bbox = obj.find('bndbox')
@@ -64,6 +75,7 @@ def make_datum(image_file, xml_file):
        cls = obj.find('name').text.strip()
        anno.x1, anno.y1, anno.x2, anno.y2 = (x1, y1, x2, y2)
        anno.name = cls
+        class_name_set.add(cls)
        anno.difficult = False
        if obj.find('difficult') is not None:
            anno.difficult = int(obj.find('difficult').text) == 1
@@ -72,13 +84,15 @@ def make_datum(image_file, xml_file):
    return anno_datum


-def make_db(database_file,
+def make_db(
+    database_file,
    images_path,
    annotations_path,
    imagesets_path,
-            splits):
+    splits,
+):
    if os.path.isdir(database_file) is True:
-        raise ValueError('The database path is already exist.')
+        print('Warning: The database path is already exist.')
    else:
        root_dir = database_file[:database_file.rfind('/')]
        if not os.path.exists(root_dir):
@@ -95,12 +109,12 @@ def make_db(database_file,

    print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))

-    db = LMDB(max_commit=10000)
+    db = LMDB(max_commit=1000)
    db.open(database_file, mode='w')
    count = 0
    total_line = 0
    start_time = time.time()
-    zfill_flag = '{0:0%d}' % (ZFILL)
+    zfill_flag = '{0:0%d}' % ZFILL

    for db_idx, split in enumerate(splits):
        split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
@@ -109,18 +123,18 @@ def make_db(database_file,
            lines = f.readlines()
            total_line += len(lines)
        for line in lines:
-            count += 1
-            if count % 10000 == 0:
-                now_time = time.time()
-                print('{0} / {1} in {2:.2f} sec'.format(
-                    count, total_line, now_time - start_time))
-                db.commit()
-
            filename = line.strip()
            image_file = os.path.join(images_path[db_idx], filename + '.jpg')
            xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
            datum = make_datum(image_file, xml_file)
+            if datum is not None:
+                count += 1
                db.put(zfill_flag.format(count - 1), datum.SerializeToString())
+                if count % 1000 == 0:
+                    now_time = time.time()
+                    print('{0} / {1} in {2:.2f} sec'.format(
+                        count, total_line, now_time - start_time))
+                    db.commit()

    now_time = time.time()
    print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))

--- a/lib/__init__.py
+++ b/lib/__init__.py
-# --------------------------------------------------------
-# Detectron
-# Copyright(c) 2017 SeetaTech
-# Written by Ting Pan
-# --------------------------------------------------------
\ No newline at end of file
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
--- a/lib/core/config.py
+++ b/lib/core/config.py
@@ -155,11 +155,11 @@ __C.TEST.SCORE_THRESH = 0.05
 # The threshold for predicting masks
 __C.TEST.BINARY_THRESH = 0.5

-## NMS threshold used on RPN proposals
+# NMS threshold used on RPN proposals
 __C.TEST.RPN_NMS_THRESH = 0.7
-## Number of top scoring boxes to keep before apply NMS to RPN proposals
+# Number of top scoring boxes to keep before apply NMS to RPN proposals
 __C.TEST.RPN_PRE_NMS_TOP_N = 6000
-## Number of top scoring boxes to keep after applying NMS to RPN proposals
+# Number of top scoring boxes to keep after applying NMS to RPN proposals
 __C.TEST.RPN_POST_NMS_TOP_N = 300
 # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
 __C.TEST.RPN_MIN_SIZE = 0
@@ -199,7 +199,7 @@ __C.MODEL.TYPE = ''

 # The float precision for training and inference
 # (FLOAT32, FLOAT16,)
-__C.MODEL.DATA_TYPE= 'FLOAT32'
+__C.MODEL.DATA_TYPE = 'FLOAT32'

 # The backbone
 __C.MODEL.BACKBONE = ''
@@ -560,10 +560,11 @@ def _merge_a_into_b(a, b):
    """Merge config dictionary a into config dictionary b, clobbering the
    options in b whenever they are also specified in a.
    """
-    if not isinstance(a, dict): return
+    if not isinstance(a, dict):
+        return
    for k, v in a.items():
        # a must specify keys that are in b
-        if not k in b:
+        if k not in b:
            raise KeyError('{} is not a valid config key'.format(k))
        # the types must match, too
        v = _check_and_coerce_cfg_value_type(v, b[k], k)
@@ -598,15 +599,15 @@ def cfg_from_list(cfg_list):
            assert d.has_key(subkey)
            d = d[subkey]
        subkey = key_list[-1]
-        assert d.has_key(subkey)
+        assert subkey in d
        try:
            value = literal_eval(v)
        except:
-            # handle the case when v is a string literal
+            # Handle the case when v is a string literal
            value = v
        assert type(value) == type(d[subkey]), \
-            'type {} does not match original type {}'.format(
-            type(value), type(d[subkey]))
+            'type {} does not match original type {}'\
+            .format(type(value), type(d[subkey]))
        d[subkey] = value


@@ -618,8 +619,10 @@ def _check_and_coerce_cfg_value_type(value_a, value_b, key):
    # The types must match (with some exceptions)
    type_b = type(value_b)
    type_a = type(value_a)
-    if type_a is type_b: return value_a
-    if type_b is float and type_a is int: return float(value_a)
+    if type_a is type_b:
+        return value_a
+    if type_b is float and type_a is int:
+        return float(value_a)

    # Exceptions: numpy arrays, strings, tuple<->list
    if isinstance(value_b, np.ndarray):

--- a/lib/core/coordinator.py
+++ b/lib/core/coordinator.py
@@ -18,7 +18,8 @@ import shutil
 import time
 import numpy as np

-from lib.core.config import cfg, cfg_from_file
+from lib.core.config import cfg
+from lib.core.config import cfg_from_file


 class Coordinator(object):
@@ -44,7 +45,8 @@ class Coordinator(object):

    def _path_at(self, file, auto_create=True):
        path = os.path.abspath(os.path.join(self.experiment_dir, file))
-        if auto_create and not os.path.exists(path): os.makedirs(path)
+        if auto_create and not os.path.exists(path):
+            os.makedirs(path)
        return path

    def checkpoints_dir(self):
@@ -67,8 +69,10 @@ class Coordinator(object):
                    return os.path.join(self.checkpoints_dir(), files[ix]), step
                steps.append(step)
            if global_step is None:
-                if len(files) == 0: return None, 0
-                last_idx = int(np.argmax(steps)); last_step = steps[last_idx]
+                if len(files) == 0:
+                    return None, 0
+                last_idx = int(np.argmax(steps))
+                last_step = steps[last_idx]
                return os.path.join(self.checkpoints_dir(), files[last_idx]), last_step
            return None, 0
        result = locate()

--- a/lib/core/solver.py
+++ b/lib/core/solver.py
@@ -30,7 +30,8 @@ class Solver(object):
        self.opt_arguments = {
            'scale_gradient': 1. / (
                cfg.SOLVER.LOSS_SCALING *
-                    cfg.SOLVER.ITER_SIZE),
+                cfg.SOLVER.ITER_SIZE
+            ),
            'clip_gradient': float(cfg.SOLVER.CLIP_NORM),
            'weight_decay': cfg.SOLVER.WEIGHT_DECAY,
        }
@@ -57,8 +58,10 @@ class Solver(object):
            }
        ]
        for name, param in self.detector.named_parameters():
-            if 'bias' in name: param_groups[1]['params'].append(param)
-            else: param_groups[0]['params'].append(param)
+            if 'bias' in name:
+                param_groups[1]['params'].append(param)
+            else:
+                param_groups[0]['params'].append(param)
        return param_groups

    def set_learning_rate(self):
@@ -67,8 +70,10 @@ class Solver(object):
            if self._current_step < len(cfg.SOLVER.STEPS) \
                    and self.iter >= cfg.SOLVER.STEPS[self._current_step]:
                self._current_step = self._current_step + 1
-                logger.info('MultiStep Status: Iteration {}, step = {}' \
-                    .format(self.iter, self._current_step))
+                logger.info(
+                    'MultiStep Status: Iteration {}, step = {}'
+                    .format(self.iter, self._current_step)
+                )
                new_lr = cfg.SOLVER.BASE_LR * (
                        cfg.SOLVER.GAMMA ** self._current_step)
                self.optimizer.param_groups[0]['lr'] = \
@@ -77,13 +82,14 @@ class Solver(object):
            raise ValueError('Unknown lr policy: ' + policy)

    def one_step(self):
+        def add_loss(x, y):
+            return y if x is None else x + y
        # Forward & Backward & Compute_loss
        iter_size = cfg.SOLVER.ITER_SIZE
        loss_scaling = cfg.SOLVER.LOSS_SCALING
-        run_time = 0.; stats = {'loss': {'total': 0.}, 'iter': self.iter}
-        add_loss = lambda x, y: y if x is None else x + y
+        stats = {'loss': {'total': 0.}, 'iter': self.iter}

-        tic = time.time()
+        run_time, tic = 0., time.time()

        if iter_size > 1:
            # Dragon is designed for manual gradients accumulating
@@ -99,10 +105,13 @@ class Solver(object):
                        stats['loss'][k] = 0.
                    total_loss = add_loss(total_loss, v)
                    stats['loss'][k] += float(v) * loss_scaling
-            if loss_scaling != 1.: total_loss *= loss_scaling
+            if loss_scaling != 1.:
+                total_loss *= loss_scaling
            stats['loss']['total'] += float(total_loss)
+
            total_loss.backward()
-            if iter_size > 1: self.optimizer.accumulate_grad()
+            if iter_size > 1:
+                self.optimizer.accumulate_grad()

        run_time += (time.time() - tic)

@@ -190,5 +199,8 @@ def get_solver_func(type):
    elif type == 'Adam':
        return AdamSolver
    else:
-        raise ValueError('Unsupported solver type: {}.\n'
-            'Excepted in (MomentumSGD, Nesterov, RMSProp, Adam)'.format(type))
\ No newline at end of file
+        raise ValueError(
+            'Unsupported solver type: {}.\n'
+            'Excepted in (MomentumSGD, Nesterov, RMSProp, Adam).'
+            .format(type)
+        )
--- a/lib/core/test.py
+++ b/lib/core/test.py
@@ -33,25 +33,27 @@ class TestServer(object):
            self.imdb.num_images, self.imdb.num_classes, self.imdb.classes
        self.data_reader = DataReader(**{'source': self.imdb.source})
        self.data_transformer = DataTransformer()
-        self.data_reader.Q_out = Queue(cfg.TEST.IMS_PER_BATCH)
+        self.data_reader.q_out = Queue(cfg.TEST.IMS_PER_BATCH)
        self.data_reader.start()
        self.gt_recs = OrderedDict()
        self.output_dir = output_dir
        if cfg.VIS_ON_FILE:
            self.vis_dir = os.path.join(self.output_dir, 'vis')
-            if not os.path.exists(self.vis_dir): os.makedirs(self.vis_dir)
+            if not os.path.exists(self.vis_dir):
+                os.makedirs(self.vis_dir)

    def set_transformer(self, transformer_cls):
        self.data_transformer = transformer_cls()

    def get_image(self):
-        serialized = self.data_reader.Q_out.get()
+        serialized = self.data_reader.q_out.get()
        image = self.data_transformer.get_image(serialized)
        image_id, objects = self.data_transformer.get_annotations(serialized)
        self.gt_recs[image_id] = {
            'objects': objects,
            'width': image.shape[1],
-            'height': image.shape[0]}
+            'height': image.shape[0],
+        }
        return image_id, image

    def get_save_filename(self, image_id, ext='.jpg'):
@@ -60,9 +62,10 @@ class TestServer(object):

    def get_records(self):
        if len(self.gt_recs) != self.num_images:
-            raise RuntimeError('Loading {} records, '
-                'while the specific database required {}'.format(
-                    len(self.gt_recs), self.num_images))
+            raise RuntimeError(
+                'Loading {} records, while {} required.'
+                .format(len(self.gt_recs), self.num_images),
+            )
        return self.gt_recs

    def evaluate_detections(self, all_boxes):
@@ -87,7 +90,8 @@ class InferServer(object):
        self.image_idx = 0
        if cfg.VIS_ON_FILE:
            self.vis_dir = os.path.join(self.output_dir, 'vis')
-            if not os.path.exists(self.vis_dir): os.makedirs(self.vis_dir)
+            if not os.path.exists(self.vis_dir):
+                os.makedirs(self.vis_dir)

    def set_transformer(self, transformer_cls):
        self.data_transformer = transformer_cls()
@@ -99,7 +103,8 @@ class InferServer(object):
        self.image_idx = (self.image_idx + 1) % self.num_images
        self.gt_recs[image_id] = {
            'width': image.shape[1],
-            'height': image.shape[0]}
+            'height': image.shape[0],
+        }
        return image_id, image

    def get_save_filename(self, image_id, ext='.jpg'):
@@ -108,15 +113,23 @@ class InferServer(object):

    def get_records(self):
        if len(self.gt_recs) != self.num_images:
-            raise RuntimeError('Loading {} records, '
-                'while the specific database required {}'.format(
-                    len(self.gt_recs), self.num_images))
+            raise RuntimeError(
+                'Loading {} records, while {} required.'
+                .format(len(self.gt_recs), self.num_images),
+            )
        return self.gt_recs

    def evaluate_detections(self, all_boxes):
        self.imdb.evaluate_detections(
-            all_boxes, self.get_records(), self.output_dir)
+            all_boxes,
+            self.get_records(),
+            self.output_dir,
+        )

    def evaluate_segmentations(self, all_boxes, all_masks):
        self.imdb.evaluate_segmentations(
-            all_boxes, all_masks, self.get_records(), self.output_dir)
\ No newline at end of file
+            all_boxes,
+            all_masks,
+            self.get_records(),
+            self.output_dir,
+        )
--- a/lib/core/train.py
+++ b/lib/core/train.py
@@ -17,17 +17,17 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import os
+import collections
 import datetime
-from collections import OrderedDict
+import os

 import dragon.vm.torch as torch

 from lib.core.config import cfg
 from lib.core.solver import get_solver_func
-from lib.utils.timer import Timer
-from lib.utils.stats import SmoothedValue
 from lib.utils import logger
+from lib.utils.stats import SmoothedValue
+from lib.utils.timer import Timer


 class SolverWrapper(object):
@@ -51,13 +51,14 @@ class SolverWrapper(object):
        self.solver.detector.cuda(cfg.GPU_ID)

        # Plan the metrics
-        self.metrics = OrderedDict()
+        self.metrics = collections.OrderedDict()
        if cfg.ENABLE_TENSOR_BOARD:
            from dragon.tools.tensorboard import TensorBoard
            self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs')

    def snapshot(self):
-        if not logger.is_root(): return None
+        if not logger.is_root():
+            return None
        filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}'
                    .format(self.solver.iter) + '.pth')
        filename = os.path.join(self.output_dir, filename)
@@ -77,19 +78,35 @@ class SolverWrapper(object):
            self.board.scalar_summary('time', stats['time'], stats['iter'])
            for k, v in self.metrics.items():
                if k == 'total':
-                    self.board.scalar_summary('total_loss', v.GetMedianValue(), stats['iter'])
-                else: self.board.scalar_summary(k, v.GetMedianValue(), stats['iter'])
+                    self.board.scalar_summary(
+                        'total_loss',
+                        v.GetMedianValue(),
+                        stats['iter'],
+                    )
+                else:
+                    self.board.scalar_summary(
+                        k,
+                        v.GetMedianValue(),
+                        stats['iter'],
+                    )

    def step(self, display=False):
        stats = self.solver.one_step()
        self.add_metrics(stats)
        self.send_metrics(stats)
        if display:
-            logger.info('Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (stats['iter'],
-                 stats['lr'], self.metrics['total'].GetMedianValue(), stats['time']))
+            logger.info(
+                'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (
+                    stats['iter'], stats['lr'],
+                    self.metrics['total'].GetMedianValue(),
+                    stats['time'],
+                )
+            )
            for k, v in self.metrics.items():
-                if k == 'total': continue
-                logger.info('        Train net output({}): {}'.format(k, v.GetMedianValue()))
+                if k == 'total':
+                    continue
+                logger.info(' ' * 10 + 'Train net output({}): {}'
+                            .format(k, v.GetMedianValue()))

    def train_model(self):
        """Network training loop."""
@@ -104,9 +121,8 @@ class SolverWrapper(object):
                    start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha)

            # Apply 1-step SGD update
-            timer.tic()
+            with timer.tic_and_toc():
                self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0)
-            timer.toc()

            if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0:
                average_time = timer.average_time
@@ -114,8 +130,10 @@ class SolverWrapper(object):
                    cfg.SOLVER.MAX_ITERS - self.solver.iter)
                eta = str(datetime.timedelta(seconds=int(eta_seconds)))
                progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS
-                logger.info('< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
-                    .format(progress, timer.average_time, eta))
+                logger.info(
+                    '< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
+                    .format(progress, timer.average_time, eta)
+                )

            if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0:
                last_snapshot_iter = self.solver.iter

--- a/lib/datasets/__init__.py
+++ b/lib/datasets/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
--- a/lib/datasets/factory.py
+++ b/lib/datasets/factory.py
@@ -13,6 +13,10 @@
 #
 # ------------------------------------------------------------

+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
 import os
 from lib.datasets.taas import TaaS


--- a/lib/datasets/imdb.py
+++ b/lib/datasets/imdb.py
@@ -61,7 +61,7 @@ class imdb(object):
        return num_entries

    def evaluate_detections(self, all_boxes, gt_recs, output_dir):
-        raise NotImplementedError
+        pass

    def evaluate_masks(self, all_boxes, all_masks, output_dir):
-        raise NotImplementedError
\ No newline at end of file
+        pass
--- a/lib/datasets/taas.py
+++ b/lib/datasets/taas.py
--- a/lib/datasets/voc_eval.py
+++ b/lib/datasets/voc_eval.py
@@ -19,16 +19,16 @@ from __future__ import print_function

 import cv2
 import numpy as np
+
 try:
    import cPickle
 except:
    import pickle as cPickle

 from lib.core.config import cfg
-
-from lib.utils.mask_transform import mask_overlap
-from lib.utils.boxes import expand_boxes
 from lib.pycocotools.mask_utils import mask_rle2im
+from lib.utils.boxes import expand_boxes
+from lib.utils.mask_transform import mask_overlap


 def voc_ap(rec, prec, use_07_metric=False):
@@ -65,8 +65,13 @@ def voc_ap(rec, prec, use_07_metric=False):
    return ap


-def voc_bbox_eval(det_file, gt_recs, cls_name,
-                  IoU=0.5, use_07_metric=False):
+def voc_bbox_eval(
+    det_file,
+    gt_recs,
+    cls_name,
+    IoU=0.5,
+    use_07_metric=False,
+):
    class_recs = {}
    n_pos = 0
    for image_name, rec in gt_recs.items():
@@ -81,35 +86,35 @@ def voc_bbox_eval(det_file, gt_recs, cls_name,
            'det': det
        }

-    # read detections
-    with open(det_file, 'r') as f: lines = f.readlines()
+    # Read detections
+    with open(det_file, 'r') as f:
+        lines = f.readlines()

    splitlines = [x.strip().split(' ') for x in lines]
    image_ids = [x[0] for x in splitlines]
    confidence = np.array([float(x[1]) for x in splitlines])
    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])

-    # avoid IndexError if detecting nothing
-    if len(BB) == 0: return 0, 0, -1
+    # Avoid IndexError if detecting nothing
+    if len(BB) == 0:
+        return 0, 0, -1

-    # sort by confidence
+    # Sort by confidence
    sorted_ind = np.argsort(-confidence)
    BB = BB[sorted_ind, :]
    image_ids = [image_ids[x] for x in sorted_ind]

-    # go down dets and mark TPs and FPs
+    # Go down detections and mark TPs and FPs
    nd = len(image_ids)
-    tp = np.zeros(nd)
-    fp = np.zeros(nd)
+    tp, fp = np.zeros(nd), np.zeros(nd)
    for d in range(nd):
        R = class_recs[image_ids[d]]
        bb = BB[d, :].astype(float)
-        ovmax = -np.inf
+        ovmax, jmax = -np.inf, 0
        BBGT = R['bbox'].astype(float)

        if BBGT.size > 0:
-            # compute overlaps
-            # intersection
+            # Compute overlaps intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
@@ -118,7 +123,7 @@ def voc_bbox_eval(det_file, gt_recs, cls_name,
            ih = np.maximum(iymax - iymin + 1., 0.)
            inters = iw * ih

-            # union
+            # Union
            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
@@ -149,8 +154,14 @@ def voc_bbox_eval(det_file, gt_recs, cls_name,
    return rec, prec, ap


-def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
-                  IoU=0.5, use_07_metric=False):
+def voc_segm_eval(
+    det_file,
+    seg_file,
+    gt_recs,
+    cls_name,
+    IoU=0.5,
+    use_07_metric=False,
+):
    # 0. Constants
    M = cfg.MRCNN.RESOLUTION
    binary_thresh = cfg.TEST.BINARY_THRESH
@@ -175,8 +186,10 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
        image_names.append(image_name)

    # 2. Get predict pickle file for this class
-    with open(det_file, 'rb') as f: boxes_pkl = cPickle.load(f)
-    with open(seg_file, 'rb') as f: masks_pkl = cPickle.load(f)
+    with open(det_file, 'rb') as f:
+        boxes_pkl = cPickle.load(f)
+    with open(seg_file, 'rb') as f:
+        masks_pkl = cPickle.load(f)

    # 3. Pre-compute number of total instances to allocate memory
    num_images = len(gt_recs)
@@ -185,7 +198,8 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
        box_num += len(boxes_pkl[im_i])

    # avoid IndexError if detecting nothing
-    if box_num == 0: return 0, 0, -1
+    if box_num == 0:
+        return 0, 0, -1

    # 4. Re-organize all the predicted boxes
    new_boxes = np.zeros((box_num, 5))
@@ -223,11 +237,10 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
            fp[i] = 1
            continue
        R = class_recs[image_name]
-        im_h, im_w = \
-            gt_recs[image_name]['height'], \
-                gt_recs[image_name]['width']
+        im_h = gt_recs[image_name]['height']
+        im_w = gt_recs[image_name]['width']

-        # decode mask
+        # Decode mask
        ref_box = ref_boxes[i, :4]
        mask = new_masks[i]
        padded_mask[1:-1, 1:-1] = mask[:, :]
@@ -244,14 +257,14 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
        pred_mask = mask[(y1 - ref_box[1]): (y2 - ref_box[1]),
                         (x1 - ref_box[0]): (x2 - ref_box[0])]

-        # calculate max region overlap
-        ovmax = -1; jmax = -1
+        # Calculate max region overlap
+        ovmax, jmax = -1, -1

        for j in range(len(R['det'])):
            gt_mask_bound = R['bbox'][j].astype(int)
            pred_mask_bound = new_boxes[i, :4].astype(int)
-            crop_mask = R['mask'][j][gt_mask_bound[1] : gt_mask_bound[3] + 1,
-                                     gt_mask_bound[0] : gt_mask_bound[2] + 1]
+            crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1,
+                                     gt_mask_bound[0]:gt_mask_bound[2] + 1]

            ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask)


--- a/lib/faster_rcnn/__init__.py
+++ b/lib/faster_rcnn/__init__.py
@@ -13,7 +13,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from lib.faster_rcnn.layers.data_layer import DataLayer
 from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer
+from lib.faster_rcnn.layers.data_layer import DataLayer
 from lib.faster_rcnn.layers.proposal_layer import ProposalLayer
 from lib.faster_rcnn.layers.proposal_target_layer import ProposalTargetLayer
--- a/lib/faster_rcnn/data/__init__.py
+++ b/lib/faster_rcnn/data/__init__.py
--- a/lib/faster_rcnn/data/blob_fetcher.py
+++ b/lib/faster_rcnn/data/blob_fetcher.py
@@ -13,21 +13,21 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import multiprocessing
 import numpy as np
-from multiprocessing import Process

 from lib.core.config import cfg
 from lib.utils.blob import im_list_to_blob


-class BlobFetcher(Process):
+class BlobFetcher(multiprocessing.Process):
    def __init__(self, **kwargs):
        super(BlobFetcher, self).__init__()
-        self.Q1_in = self.Q2_in = self.Q_out = None
+        self.q1_in = self.q2_in = self.q_out = None
        self.daemon = True

    def get(self, Q_in):
-        processed_ims = []; ims_info = []; all_boxes = []
+        processed_ims, ims_info, all_boxes = [], [], []
        for ix in range(cfg.TRAIN.IMS_PER_BATCH):
            im, im_scale, gt_boxes = Q_in.get()
            processed_ims.append(im)
@@ -46,7 +46,7 @@ class BlobFetcher(Process):

    def run(self):
        while True:
-            if self.Q1_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
-                self.Q_out.put(self.get(self.Q1_in))
-            elif self.Q2_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
-                self.Q_out.put(self.get(self.Q2_in))
\ No newline at end of file
+            if self.q1_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
+                self.q_out.put(self.get(self.q1_in))
+            elif self.q2_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
+                self.q_out.put(self.get(self.q2_in))
--- a/lib/faster_rcnn/data/data_batch.py
+++ b/lib/faster_rcnn/data/data_batch.py
@@ -13,16 +13,17 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+from multiprocessing import Queue
 import time
+
+import dragon
 import pprint
-from multiprocessing import Queue
-import dragon.core.mpi as mpi

 from lib.core.config import cfg
-import lib.utils.logger as logger
 from lib.faster_rcnn.data.data_reader import DataReader
 from lib.faster_rcnn.data.data_transformer import DataTransformer
 from lib.faster_rcnn.data.blob_fetcher import BlobFetcher
+from lib.utils import logger


 class DataBatch(object):
@@ -53,13 +54,14 @@ class DataBatch(object):
        super(DataBatch, self).__init__()
        # Init mpi
        global_rank, local_rank, group_size = 0, 0, 1
-        if mpi.Is_Init():
-            idx, group = mpi.AllowParallel()
-            if idx != -1:  # DataParallel
-                global_rank = mpi.Rank()
+        if dragon.mpi.is_init():
+            group = dragon.mpi.is_parallel()
+            if group is not None:  # DataParallel
+                global_rank = dragon.mpi.rank()
                group_size = len(group)
                for i, node in enumerate(group):
-                    if global_rank == node: local_rank = i
+                    if global_rank == node:
+                        local_rank = i
        kwargs['group_size'] = group_size

        # Configuration
@@ -89,7 +91,7 @@ class DataBatch(object):
        self._readers = []
        for i in range(self._num_readers):
            self._readers.append(DataReader(**kwargs))
-            self._readers[-1].Q_out = self.Q1
+            self._readers[-1].q_out = self.Q1

        for i in range(self._num_readers):
            part_idx, num_parts = i, self._num_readers
@@ -106,9 +108,9 @@ class DataBatch(object):
        for i in range(self._num_transformers):
            transformer = DataTransformer(**kwargs)
            transformer._rng_seed += (i + local_rank * self._num_transformers)
-            transformer.Q_in = self.Q1
-            transformer.Q1_out = self.Q21
-            transformer.Q2_out = self.Q22
+            transformer.q_in = self.Q1
+            transformer.q1_out = self.Q21
+            transformer.q2_out = self.Q22
            transformer.start()
            self._transformers.append(transformer)
            time.sleep(0.1)
@@ -117,15 +119,17 @@ class DataBatch(object):
        self._fetchers = []
        for i in range(self._num_fetchers):
            fetcher = BlobFetcher(**kwargs)
-            fetcher.Q1_in = self.Q21
-            fetcher.Q2_in = self.Q22
-            fetcher.Q_out = self.Q3
+            fetcher.q1_in = self.Q21
+            fetcher.q2_in = self.Q22
+            fetcher.q_out = self.Q3
            fetcher.start()
            self._fetchers.append(fetcher)
            time.sleep(0.1)

        # Prevent to echo multiple nodes
-        if local_rank == 0: self.echo()
+        if local_rank == 0:
+            self.echo()
+
        def cleanup():
            def terminate(processes):
                for process in processes:
@@ -137,6 +141,7 @@ class DataBatch(object):
            logger.info('Terminating DataTransformer ......')
            terminate(self._readers)
            logger.info('Terminating DataReader......')
+
        import atexit
        atexit.register(cleanup)


--- a/lib/faster_rcnn/data/data_reader.py
+++ b/lib/faster_rcnn/data/data_reader.py
@@ -14,21 +14,17 @@ from __future__ import division
 from __future__ import print_function

 import math
-import numpy
 import multiprocessing
+import numpy

-from dragon import config as _cfg
-from dragon.tools import db as _db
+from dragon.tools import db
+from lib.core.config import cfg


 class DataReader(multiprocessing.Process):
-    """DataReader is deployed to queue encoded str from `LMDB`_.
-
-    It is supported to adaptively partition and shuffle records over all distributed nodes.
+    """Collect encoded str from `LMDB`_.

-    """
-    def __init__(self, **kwargs):
-        """Construct a ``DataReader``.
+    Partition and shuffle records over distributed nodes.

    Parameters
    ----------
@@ -40,14 +36,20 @@ class DataReader(multiprocessing.Process):
        The number of chunks to split.

    """
+
+    def __init__(self, **kwargs):
+        """Create a DataReader."""
        super(DataReader, self).__init__()
        self._source = kwargs.get('source', '')
        self._use_shuffle = kwargs.get('shuffle', False)
        self._num_chunks = kwargs.get('num_chunks', 2048)
        self._part_idx, self._num_parts = 0, 1
        self._cursor, self._chunk_cursor = 0, 0
-        self._rng_seed = _cfg.GetRandomSeed()
-        self.Q_out = None
+        self._chunk_size, self._perm_size = 0, 0
+        self._head, self._tail, self._num_entries = 0, 0, 0
+        self._db, self._zfill, self._perm = None, None, None
+        self._rng_seed = cfg.RNG_SEED
+        self.q_out = None
        self.daemon = True

    def element(self):
@@ -69,10 +71,6 @@ class DataReader(multiprocessing.Process):
        target : int
            The key of the record.

-        Returns
-        -------
-        None
-
        Notes
        -----
        The redirection reopens the database.
@@ -88,17 +86,12 @@ class DataReader(multiprocessing.Process):
        self._db.set(str(target).zfill(self._zfill))

    def reset(self):
-        """Reset the cursor and environment.
-
-        Returns
-        -------
-        None
-
-        """
+        """Reset the cursor and environment."""
        if self._num_parts > 1 or self._use_shuffle:
            self._chunk_cursor = 0
            self._part_idx = (self._part_idx + 1) % self._num_parts
-            if self._use_shuffle: self._perm = numpy.random.permutation(self._perm_size)
+            if self._use_shuffle:
+                self._perm = numpy.random.permutation(self._perm_size)
            self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
            self._tail = self._head * self._chunk_size
            if self._head >= self._num_entries: self.next_chunk()
@@ -109,26 +102,15 @@ class DataReader(multiprocessing.Process):
        self.redirect(self._head)

    def next_record(self):
-        """Step the cursor of records.
-
-        Returns
-        -------
-        None
-
-        """
+        """Step the cursor of records."""
        self._db.next()
        self._cursor += 1

    def next_chunk(self):
-        """Step the cursor of shuffling chunks.
-
-        Returns
-        -------
-        None
-
-        """
+        """Step the cursor of chunks."""
        self._chunk_cursor += 1
-        if self._chunk_cursor >= self._perm_size: self.reset()
+        if self._chunk_cursor >= self._perm_size:
+            self.reset()
        else:
            self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
            self._head = self._head * self._chunk_size
@@ -140,18 +122,12 @@ class DataReader(multiprocessing.Process):
            self.redirect(self._head)

    def run(self):
-        """Start the process.
-
-        Returns
-        -------
-        None
-
-        """
+        """Start the process."""
        # Fix seed
        numpy.random.seed(self._rng_seed)

        # Init db
-        self._db = _db.LMDB()
+        self._db = db.LMDB()
        self._db.open(self._source)
        self._zfill = self._db.zfill()
        self._num_entries = self._db.num_entries()
@@ -189,9 +165,10 @@ class DataReader(multiprocessing.Process):

        # Run!
        while True:
-            self.Q_out.put(self.element())
+            self.q_out.put(self.element())
            self.next_record()
            if self._cursor >= self._tail:
                if self._num_parts > 1 or self._use_shuffle:
                    self.next_chunk()
-                else: self.reset()
\ No newline at end of file
+                else:
+                    self.reset()
--- a/lib/faster_rcnn/data/data_transformer.py
+++ b/lib/faster_rcnn/data/data_transformer.py
@@ -13,7 +13,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from multiprocessing import Process
+import multiprocessing
 import numpy as np
 import numpy.random as npr

@@ -28,11 +28,11 @@ except ImportError as e:

 from lib.core.config import cfg
 from lib.proto import anno_pb2 as pb
+from lib.utils import logger
 from lib.utils.blob import prep_im_for_blob
-import lib.utils.logger as logger


-class DataTransformer(Process):
+class DataTransformer(multiprocessing.Process):
    def __init__(self, **kwargs):
        super(DataTransformer, self).__init__()
        self._rng_seed = cfg.RNG_SEED
@@ -42,60 +42,64 @@ class DataTransformer(Process):
        self._num_classes = len(self._classes)
        self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
        self._queues = []
-        self.Q_in = self.Q1_out = self.Q2_out = None
+        self.q_in = self.q1_out = self.q2_out = None
        self.daemon = True

-    def make_record(
+    def make_roi_dict(
        self,
        ann_datum,
        im_scale,
-        flip=False,
+        apply_flip=False,
        offsets=None,
    ):
        annotations = ann_datum.annotation
        n_objects = 0
        if not self._use_diff:
            for ann in annotations:
-                if not ann.difficult: n_objects += 1
-        else: n_objects = len(annotations)
+                if not ann.difficult:
+                    n_objects += 1
+        else:
+            n_objects = len(annotations)

-        record = {
+        roi_dict = {
            'width': ann_datum.datum.width,
            'height': ann_datum.datum.height,
-            'gt_classes': np.zeros((n_objects,), dtype=np.int32),
-            'boxes': np.zeros((n_objects, 4), dtype=np.float32),
+            'gt_classes': np.zeros((n_objects,), 'int32'),
+            'boxes': np.zeros((n_objects, 4), 'float32'),
        }

        # Filter the difficult instances
-        instance_idx = 0
+        rec_idx = 0
        for ann in annotations:
-           if not self._use_diff and ann.difficult: continue
-           record['boxes'][instance_idx, :] = [
+            if not self._use_diff and ann.difficult:
+                continue
+            roi_dict['boxes'][rec_idx, :] = [
                max(0, ann.x1),
                max(0, ann.y1),
                min(ann.x2, ann_datum.datum.width - 1),
                min(ann.y2, ann_datum.datum.height - 1),
            ]
-           record['gt_classes'][instance_idx] = self._class_to_ind[ann.name]
-           instance_idx += 1
+            roi_dict['gt_classes'][rec_idx] = self._class_to_ind[ann.name]
+            rec_idx += 1

        # Flip the boxes if necessary
-        if flip:
-            record['boxes'] = _flip_boxes(
-                record['boxes'], record['width'])
+        if apply_flip:
+            roi_dict['boxes'] = _flip_boxes(
+                roi_dict['boxes'], roi_dict['width'])

        # Scale the boxes to the detecting scale
-        record['boxes'] *= im_scale
+        roi_dict['boxes'] *= im_scale

        # Apply the offsets from scale jitter
        if offsets is not None:
-            record['boxes'][:, 0::2] += offsets[0]
-            record['boxes'][:, 1::2] += offsets[1]
-            record['boxes'][:, :] = np.minimum(
-                np.maximum(record['boxes'][:, :], 0),
-                    [offsets[2][1] - 1, offsets[2][0] - 1] * 2)
+            roi_dict['boxes'][:, 0::2] += offsets[0]
+            roi_dict['boxes'][:, 1::2] += offsets[1]
+            roi_dict['boxes'][:, :] = np.minimum(
+                np.maximum(roi_dict['boxes'][:, :], 0),
+                [offsets[2][1] - 1, offsets[2][0] - 1] * 2,
+            )

-        return record
+        return roi_dict

    @classmethod
    def get_image(cls, serialized):
@@ -127,20 +131,23 @@ class DataTransformer(Process):
        datum.ParseFromString(serialized)
        im_datum = datum.datum
        im = np.fromstring(im_datum.data, np.uint8)
-        if im_datum.encoded is True: im = cv2.imdecode(im, -1)
-        else: im = im.reshape((im_datum.height, im_datum.width, im_datum.channels))
+        if im_datum.encoded is True:
+            im = cv2.imdecode(im, -1)
+        else:
+            h, w = im_datum.height, im_datum.width
+            im = im.reshape((h, w, im_datum.channels))

        # Scale
-        scale_indices = npr.randint(0, high=len(cfg.TRAIN.SCALES))
+        scale_indices = npr.randint(len(cfg.TRAIN.SCALES))
        target_size = cfg.TRAIN.SCALES[scale_indices]
        im, im_scale, jitter = prep_im_for_blob(im, target_size, cfg.TRAIN.MAX_SIZE)

        # Flip
-        flip = False
+        apply_flip = False
        if self._use_flipped:
            if npr.randint(0, 2) > 0:
                im = im[:, ::-1, :]
-                flip = True
+                apply_flip = True

        # Random Crop or RandomPad
        offsets = None
@@ -153,57 +160,63 @@ class DataTransformer(Process):
            # To a square (target_size, target_size)
            im, offsets = _get_image_with_target_size([target_size] * 2, im)

-        # Datum -> Record
-        rec = self.make_record(datum, im_scale, flip, offsets)
+        # Datum -> RoIDict
+        roi_dict = self.make_roi_dict(datum, im_scale, apply_flip, offsets)

        # Post-Process for gt boxes
        # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
-        gt_boxes = np.empty((len(rec['gt_classes']), 5), dtype=np.float32)
-        gt_boxes[:, 0:4], gt_boxes[:, 4] = rec['boxes'], rec['gt_classes']
+        gt_boxes = np.empty((len(roi_dict['gt_classes']), 5), dtype=np.float32)
+        gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']

        return im, im_scale, gt_boxes

    def run(self):
        npr.seed(self._rng_seed)
        while True:
-            serialized = self.Q_in.get()
+            serialized = self.q_in.get()
            data = self.get(serialized)
            # Ensure that there should be at least 1 ground-truth
-            if len(data[2]) < 1: continue
+            if len(data[2]) < 1:
+                continue
            aspect_ratio = float(data[0].shape[0]) / data[0].shape[1]
-            if aspect_ratio > 1.0: self.Q1_out.put(data)
-            else: self.Q2_out.put(data)
+            if aspect_ratio > 1.0:
+                self.q1_out.put(data)
+            else:
+                self.q2_out.put(data)


 def _flip_boxes(boxes, width):
    flip_boxes = boxes.copy()
-    oldx1 = boxes[:, 0].copy()
-    oldx2 = boxes[:, 2].copy()
-    flip_boxes[:, 0] = width - oldx2 - 1
-    flip_boxes[:, 2] = width - oldx1 - 1
+    old_x1 = boxes[:, 0].copy()
+    old_x2 = boxes[:, 2].copy()
+    flip_boxes[:, 0] = width - old_x2 - 1
+    flip_boxes[:, 2] = width - old_x1 - 1
    if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
        logger.fatal('Encounter invalid coordinates after flipping boxes.')
    return flip_boxes


-def _get_image_with_target_size(target_size, im):
-    im_shape = list(im.shape)
-    width_diff = target_size[1] - im_shape[1]
-    offset_crop_width = np.random.randint(0, max(-width_diff, 0) + 1)
-    offset_pad_width = np.random.randint(0, max(width_diff, 0) + 1)
-
+def _get_image_with_target_size(target_size, img):
+    im_shape = list(img.shape)
    height_diff = target_size[0] - im_shape[0]
-    offset_crop_height = np.random.randint(0, max(-height_diff, 0) + 1)
-    offset_pad_height = np.random.randint(0, max(height_diff, 0) + 1)
-
-    im_shape[0 : 2] = target_size
-    new_im = np.empty(im_shape, dtype=im.dtype)
-    new_im[:] = cfg.PIXEL_MEANS
-
-    new_im[offset_pad_height:offset_pad_height + im.shape[0],
-           offset_pad_width:offset_pad_width + im.shape[1]] = \
-        im[offset_crop_height:offset_crop_height + target_size[0],
-           offset_crop_width:offset_crop_width + target_size[1]]
+    width_diff = target_size[1] - im_shape[1]

-    return new_im, (offset_pad_width - offset_crop_width,
-        offset_pad_height - offset_crop_height, target_size)
\ No newline at end of file
+    ofs_crop_width = np.random.randint(max(-width_diff, 0) + 1)
+    ofs_pad_width = np.random.randint(max(width_diff, 0) + 1)
+    ofs_crop_height = np.random.randint(max(-height_diff, 0) + 1)
+    ofs_pad_height = np.random.randint(max(height_diff, 0) + 1)
+
+    im_shape[:2] = target_size
+    new_img = np.empty(im_shape, dtype=img.dtype)
+    new_img[:] = cfg.PIXEL_MEANS
+
+    new_img[ofs_pad_height:ofs_pad_height + img.shape[0],
+            ofs_pad_width:ofs_pad_width + img.shape[1]] = \
+        img[ofs_crop_height:ofs_crop_height + target_size[0],
+            ofs_crop_width:ofs_crop_width + target_size[1]]
+
+    return new_img, (
+        ofs_pad_width - ofs_crop_width,
+        ofs_pad_height - ofs_crop_height,
+        target_size,
+    )
--- a/lib/faster_rcnn/generate_anchors.py
+++ b/lib/faster_rcnn/generate_anchors.py
@@ -32,7 +32,7 @@ import numpy as np
 #       -79  -167    96   184
 #      -167  -343   184   360

-#array([[ -83.,  -39.,  100.,   56.],
+# array([[ -83.,  -39.,  100.,   56.],
 #       [-175.,  -87.,  192.,  104.],
 #       [-359., -183.,  376.,  200.],
 #       [ -55.,  -55.,   72.,   72.],
@@ -42,8 +42,12 @@ import numpy as np
 #       [ -79., -167.,   96.,  184.],
 #       [-167., -343.,  184.,  360.]])

-def generate_anchors(base_size=16, ratios=(0.5, 1, 2),
-                     scales=2**np.arange(3, 6)):
+
+def generate_anchors(
+    base_size=16,
+    ratios=(0.5, 1, 2),
+    scales=2**np.arange(3, 6),
+):
    """
    Generate anchor (reference) windows by enumerating aspect ratios X
    scales wrt a reference (0, 0, 15, 15) window.
@@ -55,22 +59,25 @@ def generate_anchors(base_size=16, ratios=(0.5, 1, 2),
    return anchors


-def generate_anchors_v2(stride=16, ratios=(0.5, 1, 2),
-                        sizes=(32, 64, 128, 256, 512)):
+def generate_anchors_v2(
+    stride=16,
+    ratios=(0.5, 1, 2),
+    sizes=(32, 64, 128, 256, 512),
+):
    """
    Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
    are centered on stride / 2, have (approximate) sqrt areas of the specified
    sizes, and aspect ratios as given.
    """
-    return generate_anchors(stride, ratios,
-        np.array(sizes, dtype=np.float) / stride)
+    return generate_anchors(
+        base_size=stride,
+        ratios=ratios,
+        scales=np.array(sizes, dtype=np.float) / stride,
+    )


 def _whctrs(anchor):
-    """
-    Return width, height, x center, and y center for an anchor (window).
-    """
-
+    """Return width, height, x center, and y center for an anchor (window)."""
    w = anchor[2] - anchor[0] + 1
    h = anchor[3] - anchor[1] + 1
    x_ctr = anchor[0] + 0.5 * (w - 1)
@@ -83,7 +90,6 @@ def _mkanchors(ws, hs, x_ctr, y_ctr):
    Given a vector of widths (ws) and heights (hs) around a center
    (x_ctr, y_ctr), output a set of anchors (windows).
    """
-
    ws = ws[:, np.newaxis]
    hs = hs[:, np.newaxis]
    anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
@@ -94,10 +100,7 @@ def _mkanchors(ws, hs, x_ctr, y_ctr):


 def _ratio_enum(anchor, ratios):
-    """
-    Enumerate a set of anchors for each aspect ratio wrt an anchor.
-    """
-
+    """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
    w, h, x_ctr, y_ctr = _whctrs(anchor)
    size = w * h
    size_ratios = size / ratios
@@ -108,10 +111,7 @@ def _ratio_enum(anchor, ratios):


 def _scale_enum(anchor, scales):
-    """
-    Enumerate a set of anchors for each scale wrt an anchor.
-    """
-
+    """Enumerate a set of anchors for each scale wrt an anchor."""
    w, h, x_ctr, y_ctr = _whctrs(anchor)
    ws = w * scales
    hs = h * scales

--- a/lib/faster_rcnn/layers/anchor_target_layer.py
+++ b/lib/faster_rcnn/layers/anchor_target_layer.py
@@ -19,9 +19,10 @@ import dragon.vm.torch as torch

 from lib.core.config import cfg
 from lib.utils import logger
-from lib.utils.blob import to_tensor
+from lib.utils.blob import blob_to_tensor
+from lib.utils.boxes import bbox_transform
+from lib.utils.boxes import dismantle_gt_boxes
 from lib.utils.cython_bbox import bbox_overlaps
-from lib.utils.bbox_transform import bbox_transform
 from lib.faster_rcnn.generate_anchors import generate_anchors


@@ -32,10 +33,9 @@ class AnchorTargetLayer(torch.nn.Module):
        super(AnchorTargetLayer, self).__init__()
        # Load the basic configs
        # C4 backbone takes the first stride
-        self.scales, self.stride, self.ratios = \
-            cfg.RPN.SCALES, \
-                cfg.RPN.STRIDES[0], \
-                    cfg.RPN.ASPECT_RATIOS
+        self.scales = cfg.RPN.SCALES
+        self.stride = cfg.RPN.STRIDES[0]
+        self.ratios = cfg.RPN.ASPECT_RATIOS

        # Allow boxes to sit over the edge by a small amount
        self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
@@ -61,11 +61,13 @@ class AnchorTargetLayer(torch.nn.Module):

        """
        num_images = cfg.TRAIN.IMS_PER_BATCH
-        gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
+        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)

        if len(gt_boxes_wide) != num_images:
-            logger.fatal('Input {} images, got {} slices of gt boxes.' \
-                .format(num_images, len(gt_boxes_wide)))
+            logger.fatal(
+                'Input {} images, got {} slices of gt boxes.'
+                .format(num_images, len(gt_boxes_wide))
+            )

        # Generate proposals from shifted anchors
        height, width = features[0].shape[-2:]
@@ -85,7 +87,7 @@ class AnchorTargetLayer(torch.nn.Module):
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

-        # label: 1 is positive, 0 is negative, -1 is dont care
+        # label: 1 is positive, 0 is negative, -1 is don not care
        all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32)
        all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
        all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
@@ -101,8 +103,8 @@ class AnchorTargetLayer(torch.nn.Module):
                inds_inside = np.where(
                    (all_anchors[:, 0] >= -self._allowed_border) &
                    (all_anchors[:, 1] >= -self._allowed_border) &
-                    (all_anchors[:, 2] < im_info[1] + self._allowed_border) &  # width
-                    (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0] # height
+                    (all_anchors[:, 2] < im_info[1] + self._allowed_border) &
+                    (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
                anchors = all_anchors[inds_inside, :]
            else:
                inds_inside = np.arange(all_anchors.shape[0])
@@ -143,7 +145,10 @@ class AnchorTargetLayer(torch.nn.Module):
            fg_inds = np.where(labels == 1)[0]
            if len(fg_inds) > num_fg:
                disable_inds = npr.choice(
-                    fg_inds, size=(len(fg_inds) - num_fg), replace=False)
+                    fg_inds,
+                    size=len(fg_inds) - num_fg,
+                    replace=False,
+                )
                labels[disable_inds] = -1
                fg_inds = np.where(labels == 1)[0]

@@ -152,12 +157,17 @@ class AnchorTargetLayer(torch.nn.Module):
            bg_inds = np.where(labels == 0)[0]
            if len(bg_inds) > num_bg:
                disable_inds = npr.choice(
-                    bg_inds, size=(len(bg_inds) - num_bg), replace=False)
+                    bg_inds,
+                    size=len(bg_inds) - num_bg,
+                    replace=False,
+                )
                labels[disable_inds] = -1

            bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
            bbox_targets[fg_inds, :] = bbox_transform(
-                anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], 0:4])
+                ex_rois=anchors[fg_inds, :],
+                gt_rois=gt_boxes[argmax_overlaps[fg_inds], 0:4],
+            )
            bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
            bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
            bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
@@ -169,34 +179,26 @@ class AnchorTargetLayer(torch.nn.Module):
            all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights
            all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights

-        # labels
-        labels = all_labels.reshape(
-            (num_images, height, width, A)).transpose(0, 3, 1, 2)
-        labels = labels.reshape((num_images, total_anchors))
+        labels = all_labels \
+            .reshape((num_images, height, width, A)) \
+            .transpose(0, 3, 1, 2) \
+            .reshape((num_images, total_anchors))

-        # bbox_targets
-        bbox_targets = all_bbox_targets.reshape(
-            (num_images, height, width, A * 4)).transpose(0, 3, 1, 2)
+        bbox_targets = all_bbox_targets \
+            .reshape((num_images, height, width, A * 4)) \
+            .transpose(0, 3, 1, 2)

-        # bbox_inside_weights
-        bbox_inside_weights = all_bbox_inside_weights.reshape(
-            (num_images, height, width, A * 4)).transpose(0, 3, 1, 2)
+        bbox_inside_weights = all_bbox_inside_weights \
+            .reshape((num_images, height, width, A * 4)) \
+            .transpose(0, 3, 1, 2)

-        # bbox_outside_weights
-        bbox_outside_weights = all_bbox_outside_weights.reshape(
-            (num_images, height, width, A * 4)).transpose(0, 3, 1, 2)
+        bbox_outside_weights = all_bbox_outside_weights \
+            .reshape((num_images, height, width, A * 4)) \
+            .transpose(0, 3, 1, 2)

        return {
-            'labels': to_tensor(labels),
-            'bbox_targets': to_tensor(bbox_targets),
-            'bbox_inside_weights': to_tensor(bbox_inside_weights),
-            'bbox_outside_weights': to_tensor(bbox_outside_weights),
+            'labels': blob_to_tensor(labels),
+            'bbox_targets': blob_to_tensor(bbox_targets),
+            'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
+            'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
        }
-
-
-def _dismantle_gt_boxes(gt_boxes, num_images):
-    return [
-        gt_boxes[
-            np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
-        ] for ix in range(num_images)
-    ]
\ No newline at end of file
--- a/lib/faster_rcnn/layers/data_layer.py
+++ b/lib/faster_rcnn/layers/data_layer.py
@@ -33,10 +33,8 @@ class DataLayer(torch.nn.Module):
        })

    def forward(self):
-        # Get a mini-batch from the Queue
-        blobs = self.data_batch.get()
-        # Zero-Copy from numpy
-        blobs['data'] = torch.from_numpy(blobs['data'])
-        # Switch the data to Device
-        blobs['data'].cuda(cfg.GPU_ID)
-        return blobs
\ No newline at end of file
+        # Get an array blob from the Queue
+        outputs = self.data_batch.get()
+        # Zero-Copy the array to tensor
+        outputs['data'] = torch.from_numpy(outputs['data'])
+        return outputs
--- a/lib/faster_rcnn/layers/proposal_layer.py
+++ b/lib/faster_rcnn/layers/proposal_layer.py
@@ -9,27 +9,35 @@
 #
 # --------------------------------------------------------

-import numpy as np
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
 import dragon.vm.torch as torch
+import numpy as np

 from lib.core.config import cfg
-from lib.utils.blob import to_tensor
-from lib.nms.nms_wrapper import nms
 from lib.faster_rcnn.generate_anchors import generate_anchors
-from lib.utils.bbox_transform import bbox_transform_inv, clip_boxes
+from lib.nms.nms_wrapper import nms
+from lib.utils.blob import blob_to_tensor
+from lib.utils.boxes import bbox_transform_inv
+from lib.utils.boxes import clip_tiled_boxes
+from lib.utils.boxes import filter_boxes


 class ProposalLayer(torch.nn.Module):
-    """Outputs object detection proposals by applying estimated bounding-box
-
+    """
+    Compute proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").

    """
+
    def __init__(self):
        super(ProposalLayer, self).__init__()
        # Load the basic configs
-        self.scales, self.stride, self.ratios = \
-            cfg.RPN.SCALES, cfg.RPN.STRIDES[0], cfg.RPN.ASPECT_RATIOS
+        self.scales = cfg.RPN.SCALES
+        self.stride = cfg.RPN.STRIDES[0]
+        self.ratios = cfg.RPN.ASPECT_RATIOS

        # Generate base anchors
        self.base_anchors = generate_anchors(
@@ -61,7 +69,8 @@ class ProposalLayer(torch.nn.Module):
        # Reshape to (K * A, 4) shifted anchors
        A = self.base_anchors.shape[0]
        K = shifts.shape[0]
-        anchors = self.base_anchors.reshape((1, A, 4)) + \
+        anchors = \
+            self.base_anchors.reshape((1, A, 4)) + \
            shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        all_anchors = anchors.reshape((K * A, 4))

@@ -69,8 +78,6 @@ class ProposalLayer(torch.nn.Module):
        batch_rois = []
        # scores & deltas are (1, A, H, W) format
        # Transpose to (1, H, W, A)
-
-
        batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1))
        batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1))

@@ -95,11 +102,11 @@ class ProposalLayer(torch.nn.Module):
            proposals = bbox_transform_inv(anchors, deltas)

            # 2. Clip predicted boxes to image
-            proposals = clip_boxes(proposals, ims_info[ix, :2])
+            proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])

            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
-            keep = _filter_boxes(proposals, min_size * ims_info[ix, 2])
+            keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
            proposals = proposals[keep, :]
            scores = scores[keep]

@@ -107,7 +114,8 @@ class ProposalLayer(torch.nn.Module):
            # 7. Take after_nms_topN (e.g. 300)
            # 8. Return the top proposals (-> RoIs top)
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
-            if post_nms_topN > 0: keep = keep[:post_nms_topN]
+            if post_nms_topN > 0:
+                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]

            # Output rois blob
@@ -118,13 +126,7 @@ class ProposalLayer(torch.nn.Module):

        # Merge RoIs into a blob
        rpn_rois = np.concatenate(batch_rois, axis=0)
-        if cfg_key == 'TRAIN': return rpn_rois
-        else: return [to_tensor(rpn_rois)]
-
-
-def _filter_boxes(boxes, min_size):
-    """Remove all boxes with any side smaller than min_size."""
-    ws = boxes[:, 2] - boxes[:, 0] + 1
-    hs = boxes[:, 3] - boxes[:, 1] + 1
-    keep = np.where((ws >= min_size) & (hs >= min_size))[0]
-    return keep
+        if cfg_key == 'TRAIN':
+            return rpn_rois
+        else:
+            return [blob_to_tensor(rpn_rois)]
--- a/lib/faster_rcnn/layers/proposal_target_layer.py
+++ b/lib/faster_rcnn/layers/proposal_target_layer.py
@@ -9,22 +9,24 @@
 #
 # --------------------------------------------------------

+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dragon.vm.torch as torch
 import numpy as np
 import numpy.random as npr
-import dragon.vm.torch as torch

 from lib.core.config import cfg
-from lib.utils.blob import to_tensor
+from lib.utils.blob import blob_to_tensor
+from lib.utils.boxes import bbox_transform
+from lib.utils.boxes import dismantle_gt_boxes
 from lib.utils.cython_bbox import bbox_overlaps
-from lib.utils.bbox_transform import bbox_transform


 class ProposalTargetLayer(torch.nn.Module):
-    """Assign object detection proposals to ground-truth targets.
-
-    Produces proposal classification labels and bounding-box regression targets.
+    """Assign object detection proposals to ground-truth targets."""

-    """
    def __init__(self):
        super(ProposalTargetLayer, self).__init__()
        self.num_classes = cfg.MODEL.NUM_CLASSES
@@ -34,8 +36,8 @@ class ProposalTargetLayer(torch.nn.Module):
        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
        all_rois = rpn_rois
-        # GT boxes (x1, y1, x2, y2, label, has_mask)
-        gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
+        # GT boxes (x1, y1, x2, y2, label)
+        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)

        # Prepare for the outputs
        keys = ['labels', 'rois', 'bbox_targets',
@@ -50,14 +52,12 @@ class ProposalTargetLayer(torch.nn.Module):
            # Include ground-truth boxes in the set of candidate rois
            inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
            rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
-
+            # Sample a batch of rois for training
            rois_per_image = cfg.TRAIN.BATCH_SIZE
            fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
-
            labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
                rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
            bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
-
            _fmap_batch([
                labels,
                rois, 
@@ -73,11 +73,11 @@ class ProposalTargetLayer(torch.nn.Module):
            batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)

        return {
-            'rois': [to_tensor(batch_outputs['rois'])],
-            'labels': to_tensor(batch_outputs['labels']),
-            'bbox_targets': to_tensor(batch_outputs['bbox_targets']),
-            'bbox_inside_weights': to_tensor(batch_outputs['bbox_inside_weights']),
-            'bbox_outside_weights': to_tensor(batch_outputs['bbox_outside_weights']),
+            'rois': [blob_to_tensor(batch_outputs['rois'])],
+            'labels': blob_to_tensor(batch_outputs['labels']),
+            'bbox_targets': blob_to_tensor(batch_outputs['bbox_targets']),
+            'bbox_inside_weights': blob_to_tensor(batch_outputs['bbox_inside_weights']),
+            'bbox_outside_weights': blob_to_tensor(batch_outputs['bbox_outside_weights']),
        }


@@ -109,7 +109,6 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):

 def _compute_targets(ex_rois, gt_rois, labels):
    """Compute bounding-box regression targets for an image."""
-
    assert ex_rois.shape[0] == gt_rois.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois.shape[1] == 4
@@ -117,12 +116,18 @@ def _compute_targets(ex_rois, gt_rois, labels):
    return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)


-def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
-    """Generate a random sample of RoIs comprising foreground and background examples."""
-
+def _sample_rois(
+    all_rois,
+    gt_boxes,
+    fg_rois_per_image,
+    rois_per_image,
+    num_classes,
+):
+    """Generate a random sample of RoIs."""
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
-        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
+        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float),
+    )
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]
@@ -164,11 +169,6 @@ def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_clas
    return labels, rois, bbox_targets, bbox_inside_weights


-def _dismantle_gt_boxes(gt_boxes, num_images):
-    return [gt_boxes[np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]] \
-            for ix in range(num_images)]
-
-
 def _fmap_batch(inputs, outputs, keys):
    for i, key in enumerate(keys):
        outputs[key].append(inputs[i])
--- a/lib/faster_rcnn/test.py
+++ b/lib/faster_rcnn/test.py
@@ -13,27 +13,23 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-try:
-    import cPickle
-except:
-    import pickle as cPickle
-import numpy as np
 import dragon.vm.torch as torch
+import numpy as np

 from lib.core.config import cfg
+from lib.nms.nms_wrapper import nms
+from lib.nms.nms_wrapper import soft_nms
+from lib.utils.blob import im_list_to_blob
+from lib.utils.blob import tensor_to_blob
+from lib.utils.boxes import bbox_transform_inv
+from lib.utils.boxes import clip_tiled_boxes
 from lib.utils.image import scale_image
-from lib.utils.bbox_transform import clip_boxes, bbox_transform_inv
-from lib.nms.nms_wrapper import nms, soft_nms
-
 from lib.utils.timer import Timer
-from lib.utils.blob import im_list_to_blob, to_array
 from lib.utils.vis import vis_one_image


 def im_detect(detector, raw_image):
-    """Detect a image, with single or multiple scales.
-
-    """
+    """Detect a image, with single or multiple scales."""
    # Prepare images
    ims, ims_scale = scale_image(raw_image)

@@ -42,25 +38,30 @@ def im_detect(detector, raw_image):
    blobs['ims_info'] = np.array([
        list(blobs['data'].shape[1:3]) + [im_scale]
        for im_scale in ims_scale], dtype=np.float32)
-    blobs['data'] = torch.from_numpy(blobs['data']).cuda(cfg.GPU_ID)
+    blobs['data'] = torch.from_numpy(blobs['data'])

    # Do Forward
    with torch.no_grad():
        outputs = detector.forward(inputs=blobs)

    # Decode results
-    batch_rois = to_array(outputs['rois'])
-    batch_scores = to_array(outputs['cls_prob'])
-    batch_deltas = to_array(outputs['bbox_pred'])
+    batch_rois = tensor_to_blob(outputs['rois'])
+    batch_scores = tensor_to_blob(outputs['cls_prob'])
+    batch_deltas = tensor_to_blob(outputs['bbox_pred'])
+
    batch_boxes = bbox_transform_inv(
-        batch_rois[:, 1:5], batch_deltas, cfg.BBOX_REG_WEIGHTS)
-    scores_wide = []; boxes_wide = []
+        boxes=batch_rois[:, 1:5],
+        deltas=batch_deltas,
+        weights=cfg.BBOX_REG_WEIGHTS,
+    )
+
+    scores_wide, boxes_wide = [], []

    for im_idx in range(len(ims)):
        indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0]
        boxes = batch_boxes[indices]
        boxes /= ims_scale[im_idx]
-        clip_boxes(boxes, raw_image.shape)
+        clip_tiled_boxes(boxes, raw_image.shape)
        scores_wide.append(batch_scores[indices])
        boxes_wide.append(boxes)

@@ -69,12 +70,13 @@ def im_detect(detector, raw_image):


 def test_net(detector, server):
-    classes, num_images, num_classes = \
-        server.classes, server.num_images, server.num_classes
-
+    # Load settings
+    classes = server.classes
+    num_images = server.num_images
+    num_classes = server.num_classes
    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]

-    _t = {'im_detect' : Timer(), 'misc' : Timer()}
+    _t = {'im_detect': Timer(), 'misc': Timer()}

    for i in range(num_images):
        image_id, raw_image = server.get_image()
@@ -89,22 +91,27 @@ def test_net(detector, server):
            inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j*4:(j+1)*4]
-            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).\
-                            astype(np.float32, copy=False)
+            cls_detections = np.hstack(
+                (cls_boxes, cls_scores[:, np.newaxis])
+            ).astype(np.float32, copy=False)
            if cfg.TEST.USE_SOFT_NMS:
-                keep = soft_nms(cls_dets, cfg.TEST.NMS,
+                keep = soft_nms(
+                    cls_detections, cfg.TEST.NMS,
                    method=cfg.TEST.SOFT_NMS_METHOD,
-                    sigma=cfg.TEST.SOFT_NMS_SIGMA)
+                    sigma=cfg.TEST.SOFT_NMS_SIGMA,
+                )
            else:
-                keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True)
-            cls_dets = cls_dets[keep, :]
-            all_boxes[j][i] = cls_dets
-            boxes_this_image.append(cls_dets)
+                keep = nms(cls_detections, cfg.TEST.NMS, force_cpu=True)
+            cls_detections = cls_detections[keep, :]
+            all_boxes[j][i] = cls_detections
+            boxes_this_image.append(cls_detections)

        if cfg.VIS or cfg.VIS_ON_FILE:
-            vis_one_image(raw_image, classes, boxes_this_image,
+            vis_one_image(
+                raw_image, classes, boxes_this_image,
                thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
-                    filename=server.get_save_filename(image_id))
+                filename=server.get_save_filename(image_id),
+            )

        # Limit to max_per_image detections *over all classes*
        if cfg.TEST.DETECTIONS_PER_IM > 0:
@@ -112,7 +119,8 @@ def test_net(detector, server):
            for j in range(1, num_classes):
                if len(all_boxes[j][i]) < 1: continue
                image_scores.append(all_boxes[j][i][:, -1])
-            if len(image_scores) > 0: image_scores = np.hstack(image_scores)
+            if len(image_scores) > 0:
+                image_scores = np.hstack(image_scores)
            if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
                image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
                for j in range(1, num_classes):
@@ -120,7 +128,7 @@ def test_net(detector, server):
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

-        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
+        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time), end='')


--- a/lib/fpn/__init__.py
+++ b/lib/fpn/__init__.py
--- a/lib/fpn/layers/anchor_target_layer.py
+++ b/lib/fpn/layers/anchor_target_layer.py
@@ -13,16 +13,18 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import collections
+import dragon.vm.torch as torch
 import numpy as np
 import numpy.random as npr
-import dragon.vm.torch as torch

 from lib.core.config import cfg
-import lib.utils.logger as logger
-from lib.utils.blob import to_tensor
-from lib.utils.cython_bbox import bbox_overlaps
-from lib.utils.bbox_transform import bbox_transform
 from lib.faster_rcnn.generate_anchors import generate_anchors
+from lib.utils import logger
+from lib.utils.blob import blob_to_tensor
+from lib.utils.boxes import bbox_transform
+from lib.utils.boxes import dismantle_gt_boxes
+from lib.utils.cython_bbox import bbox_overlaps


 class AnchorTargetLayer(torch.nn.Module):
@@ -31,14 +33,14 @@ class AnchorTargetLayer(torch.nn.Module):
    def __init__(self):
        super(AnchorTargetLayer, self).__init__()
        # Load the basic configs
-        self.scales, self.strides, self.ratios = \
-            cfg.RPN.SCALES, \
-                cfg.RPN.STRIDES, \
-                    cfg.RPN.ASPECT_RATIOS
+        self.scales = cfg.RPN.SCALES
+        self.strides = cfg.RPN.STRIDES
+        self.ratios = cfg.RPN.ASPECT_RATIOS
        if len(self.scales) != len(self.strides):
            logger.fatal(
                'Given {} scales and {} strides.'
-                    .format(len(self.scales), len(self.strides)))
+                .format(len(self.scales), len(self.strides))
+            )

        # Allow boxes to sit over the edge by a small amount
        self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
@@ -46,9 +48,9 @@ class AnchorTargetLayer(torch.nn.Module):
        # Generate base anchors
        self.base_anchors = []
        for i in range(len(self.strides)):
-            base_size = self.strides[i]
-            scale = self.scales[i]
-            if not isinstance(scale, list): scale = [scale]
+            base_size, scale = self.strides[i], self.scales[i]
+            if not isinstance(scale, collections.Iterable):
+                scale = [scale]
            self.base_anchors.append(
                generate_anchors(
                    base_size=base_size,
@@ -59,16 +61,17 @@ class AnchorTargetLayer(torch.nn.Module):

    def forward(self, features, gt_boxes, ims_info):
        """Produces anchor classification labels and bounding-box regression targets."""
-
        num_images = cfg.TRAIN.IMS_PER_BATCH
-        gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
+        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)

        if len(gt_boxes_wide) != num_images:
-            logger.fatal('Input {} images, got {} slices of gt boxes.' \
-                .format(num_images, len(gt_boxes_wide)))
+            logger.fatal(
+                'Input {} images, got {} slices of gt boxes.'
+                .format(num_images, len(gt_boxes_wide))
+            )

        # Generate proposals from shifted anchors
-        all_anchors = []; total_anchors = 0
+        all_anchors, total_anchors = [], 0
        for i in range(len(self.strides)):
            height, width = features[i].shape[-2:]
            shift_x = np.arange(0, width) * self.strides[i]
@@ -107,8 +110,8 @@ class AnchorTargetLayer(torch.nn.Module):
                inds_inside = np.where(
                    (all_anchors[:, 0] >= -self._allowed_border) &
                    (all_anchors[:, 1] >= -self._allowed_border) &
-                    (all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width
-                    (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0] # height
+                    (all_anchors[:, 2] < im_info[1] + self._allowed_border) &
+                    (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
                anchors = all_anchors[inds_inside, :]
            else:
                inds_inside = np.arange(all_anchors.shape[0])
@@ -180,16 +183,8 @@ class AnchorTargetLayer(torch.nn.Module):
        bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))

        return {
-            'labels': to_tensor(labels),
-            'bbox_targets': to_tensor(bbox_targets),
-            'bbox_inside_weights': to_tensor(bbox_inside_weights),
-            'bbox_outside_weights': to_tensor(bbox_outside_weights),
+            'labels': blob_to_tensor(labels),
+            'bbox_targets': blob_to_tensor(bbox_targets),
+            'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
+            'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
        }
-
-
-def _dismantle_gt_boxes(gt_boxes, num_images):
-    return [
-        gt_boxes[
-            np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
-        ] for ix in range(num_images)
-    ]
\ No newline at end of file
--- a/lib/fpn/layers/proposal_layer.py
+++ b/lib/fpn/layers/proposal_layer.py
@@ -9,39 +9,49 @@
 #
 # ------------------------------------------------------------

-import numpy as np
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
 import dragon.vm.torch as torch
+import numpy as np

 from lib.core.config import cfg
+from lib.faster_rcnn.generate_anchors import generate_anchors
 from lib.nms.nms_wrapper import nms
 from lib.utils import logger
-from lib.utils.blob import to_tensor
-from lib.utils.bbox_transform import bbox_transform_inv, clip_boxes
-from lib.faster_rcnn.generate_anchors import generate_anchors
+from lib.utils.blob import blob_to_tensor
+from lib.utils.boxes import bbox_transform_inv
+from lib.utils.boxes import clip_tiled_boxes
+from lib.utils.boxes import filter_boxes


 class ProposalLayer(torch.nn.Module):
-    """Outputs object detection proposals by applying estimated bounding-box.
-
+    """
+    Compute proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").

    """
+
    def __init__(self):
        super(ProposalLayer, self).__init__()
        # Load the basic configs
-        self.scales, self.strides, self.ratios = \
-            cfg.RPN.SCALES, cfg.RPN.STRIDES, cfg.RPN.ASPECT_RATIOS
+        self.scales = cfg.RPN.SCALES
+        self.strides = cfg.RPN.STRIDES
+        self.ratios = cfg.RPN.ASPECT_RATIOS
        if len(self.scales) != len(self.strides):
            logger.fatal(
                'Given {} scales and {} strides.'
-                    .format(len(self.scales), len(self.strides)))
+                .format(len(self.scales), len(self.strides))
+            )

        # Generate base anchors
        self.base_anchors = []
        for i in range(len(self.strides)):
-            base_size = self.strides[i]
-            scale = self.scales[i]
-            if not isinstance(scale, list): scale = [scale]
+            base_size, scale = self.strides[i], self.scales[i]
+            if not isinstance(scale, collections.Iterable):
+                scale = [scale]
            self.base_anchors.append(
                generate_anchors(
                    base_size=base_size,
@@ -92,7 +102,8 @@ class ProposalLayer(torch.nn.Module):
        # Prepare for the outputs
        batch_rois = []
        batch_scores = cls_prob.numpy(True)
-        batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
+        batch_deltas = bbox_pred.numpy(True) \
+            .transpose((0, 2, 1))  # [?, 4, n] -> [?, n, 4]

        # Extract RoIs separately
        for ix in range(num_images):
@@ -115,10 +126,10 @@ class ProposalLayer(torch.nn.Module):
            proposals = bbox_transform_inv(anchors, deltas)

            # 2. Clip predicted boxes to image
-            proposals = clip_boxes(proposals, ims_info[ix, :2])
+            proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])

            # 3. remove predicted boxes with either height or width < threshold
-            keep = _filter_boxes(proposals, min_size * ims_info[ix, 2])
+            keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
            proposals = proposals[keep, :]
            scores = scores[keep]

@@ -126,7 +137,8 @@ class ProposalLayer(torch.nn.Module):
            # 7. Take after_nms_topN (e.g. 300)
            # 8. Return the top proposals (-> RoIs top)
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
-            if post_nms_topN > 0: keep = keep[:post_nms_topN]
+            if post_nms_topN > 0:
+                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]

            # Output rois blob
@@ -151,28 +163,19 @@ class ProposalLayer(torch.nn.Module):
                lv_indices = np.where(fpn_levels == (i + min_level))[0]
                if len(lv_indices) == 0:
                    # Fake a tiny roi to avoid empty roi pooling
-                    all_rois.append(to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
+                    all_rois.append(blob_to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
                else:
-                    all_rois.append(to_tensor(rpn_rois[lv_indices]))
+                    all_rois.append(blob_to_tensor(rpn_rois[lv_indices]))
            return all_rois


-def _filter_boxes(boxes, min_size):
-    """Remove all boxes with any side smaller than min_size.
-
-    """
-    ws = boxes[:, 2] - boxes[:, 0] + 1
-    hs = boxes[:, 3] - boxes[:, 1] + 1
-    keep = np.where((ws >= min_size) & (hs >= min_size))[0]
-    return keep
-
-
 def _map_rois_to_fpn_levels(rois, k_min, k_max):
-    """Determine which FPN level each RoI in a set of RoIs should map to based
-    on the heuristic in the FPN paper.
-
    """
-    if len(rois) == 0: return []
+    Determine which FPN level each RoI in a set of RoIs
+    should map to based on the heuristic in the FPN paper.
+    """
+    if len(rois) == 0:
+        return []
    ws = rois[:, 3] - rois[:, 1] + 1
    hs = rois[:, 4] - rois[:, 2] + 1
    s = np.sqrt(ws * hs)

--- a/lib/fpn/layers/proposal_target_layer.py
+++ b/lib/fpn/layers/proposal_target_layer.py
@@ -9,14 +9,19 @@
 #
 # ------------------------------------------------------------

+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
 import numpy as np
 import numpy.random as npr
 import dragon.vm.torch as torch

 from lib.core.config import cfg
-from lib.utils.blob import to_tensor
+from lib.utils.blob import blob_to_tensor
+from lib.utils.boxes import bbox_transform
+from lib.utils.boxes import dismantle_gt_boxes
 from lib.utils.cython_bbox import bbox_overlaps
-from lib.utils.bbox_transform import bbox_transform


 class ProposalTargetLayer(torch.nn.Module):
@@ -36,26 +41,19 @@ class ProposalTargetLayer(torch.nn.Module):
            'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
        }

-    def _map_rois(self, inputs, fake_outputs, outputs, keys, levels):
-        f = lambda a, b, indices: a[indices] if len(indices) > 0 else b
-        for k in range(len(levels)):
-            inds = levels[k]
-            for i, key in enumerate(keys):
-                outputs[key].append(f(inputs[i], fake_outputs[key], inds))
-
    def forward(self, rpn_rois, gt_boxes):
        num_images = cfg.TRAIN.IMS_PER_BATCH
        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
        all_rois = rpn_rois
-        # GT boxes (x1, y1, x2, y2, label, has_mask)
-        gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
+        # GT boxes (x1, y1, x2, y2, label)
+        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)

        # Prepare for the outputs
        keys = ['labels', 'rois', 'bbox_targets',
                'bbox_inside_weights', 'bbox_outside_weights']
-        outputs = dict(map(lambda a, b: (a, b), keys, [[] for key in keys]))
-        batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for key in keys]))
+        outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
+        batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))

        # Generate targets separately
        for ix in range(num_images):
@@ -65,11 +63,9 @@ class ProposalTargetLayer(torch.nn.Module):
            # Include ground-truth boxes in the set of candidate rois
            inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
            rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
-
+            # Sample a batch of rois for training
            rois_per_image = cfg.TRAIN.BATCH_SIZE
            fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
-
-            # Sample rois with labels & bbox targets
            labels, rois, bbox_targets, bbox_inside_weights = \
                _sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
            bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
@@ -94,14 +90,20 @@ class ProposalTargetLayer(torch.nn.Module):
        K = max_level - min_level + 1
        fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
        lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)]
-        _fmap_rois([batch_outputs[key] for key in keys], self.fake_outputs, outputs, keys, lvs_indices)
+        _fmap_rois(
+            inputs=[batch_outputs[key] for key in keys],
+            fake_outputs=self.fake_outputs,
+            outputs=outputs,
+            keys=keys,
+            levels=lvs_indices,
+        )

        return {
-            'rois': [to_tensor(outputs['rois'][i]) for i in range(K)],
-            'labels': to_tensor(np.concatenate(outputs['labels'], axis=0)),
-            'bbox_targets': to_tensor(np.vstack(outputs['bbox_targets'])),
-            'bbox_inside_weights': to_tensor(np.vstack(outputs['bbox_inside_weights'])),
-            'bbox_outside_weights': to_tensor(np.vstack(outputs['bbox_outside_weights'])),
+            'rois': [blob_to_tensor(outputs['rois'][i]) for i in range(K)],
+            'labels': blob_to_tensor(np.concatenate(outputs['labels'], axis=0)),
+            'bbox_targets': blob_to_tensor(np.vstack(outputs['bbox_targets'])),
+            'bbox_inside_weights': blob_to_tensor(np.vstack(outputs['bbox_inside_weights'])),
+            'bbox_outside_weights': blob_to_tensor(np.vstack(outputs['bbox_outside_weights'])),
        }


@@ -115,6 +117,7 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):
    Returns:
        bbox_target (ndarray): N x 4K blob of regression targets
        bbox_inside_weights (ndarray): N x 4K blob of loss weights
+
    """
    clss = bbox_target_data[:, 0]
    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
@@ -131,7 +134,6 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):

 def _compute_targets(ex_rois, gt_rois, labels):
    """Compute bounding-box regression targets for an image."""
-
    assert ex_rois.shape[0] == gt_rois.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois.shape[1] == 4
@@ -140,10 +142,12 @@ def _compute_targets(ex_rois, gt_rois, labels):


 def _map_rois_to_fpn_levels(rois, k_min, k_max):
-    """Determine which FPN level each RoI in a set of RoIs should map to based
-    on the heuristic in the FPN paper.
    """
-    if len(rois) == 0: return []
+    Determine which FPN level each RoI in a set of RoIs
+    should map to based on the heuristic in the FPN paper.
+    """
+    if len(rois) == 0:
+        return []
    ws = rois[:, 3] - rois[:, 1] + 1
    hs = rois[:, 4] - rois[:, 2] + 1
    s = np.sqrt(ws * hs)
@@ -154,9 +158,7 @@ def _map_rois_to_fpn_levels(rois, k_min, k_max):


 def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
-    """Generate a random sample of RoIs comprising foreground and background
-    examples.
-    """
+    """Sample a batch of RoIs comprising foreground and background examples."""
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
@@ -203,19 +205,15 @@ def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_clas
    return labels, rois, bbox_targets, bbox_inside_weights


-def _dismantle_gt_boxes(gt_boxes, num_images):
-    return [gt_boxes[np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]] \
-            for ix in range(num_images)]
-
-
 def _fmap_batch(inputs, outputs, keys):
    for i, key in enumerate(keys):
        outputs[key].append(inputs[i])


 def _fmap_rois(inputs, fake_outputs, outputs, keys, levels):
-    f = lambda a, b, indices: a[indices] if len(indices) > 0 else b
+    def impl(a, b, indices):
+        return a[indices] if len(indices) > 0 else b
    for k in range(len(levels)):
        inds = levels[k]
        for i, key in enumerate(keys):
-            outputs[key].append(f(inputs[i], fake_outputs[key], inds))
\ No newline at end of file
+            outputs[key].append(impl(inputs[i], fake_outputs[key], inds))
--- a/lib/modeling/__init__.py
+++ b/lib/modeling/__init__.py
@@ -9,13 +9,17 @@
 #
 # ------------------------------------------------------------

+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
 # Import custom modules
-from lib.modeling.base import Bootstarp
-from lib.modeling.base import RPNDecoder
-from lib.modeling.base import RetinaNetDecoder
-from lib.modeling.base import conv1x1, conv3x3, bn, affine
-from lib.modeling.fpn import FPN
-from lib.modeling.rpn import RPN
+from lib.modeling.base import affine
+from lib.modeling.base import bn
+from lib.modeling.base import conv1x1
+from lib.modeling.base import conv3x3
 from lib.modeling.fast_rcnn import FastRCNN
+from lib.modeling.fpn import FPN
 from lib.modeling.retinanet import RetinaNet
+from lib.modeling.rpn import RPN
 from lib.modeling.ssd import SSD
--- a/lib/modeling/airnet.py
+++ b/lib/modeling/airnet.py
@@ -15,7 +15,9 @@ from __future__ import print_function

 import dragon.vm.torch as torch

-from lib.modeling import conv1x1, conv3x3, bn, affine
+from lib.modeling import affine
+from lib.modeling import conv1x1
+from lib.modeling import conv3x3


 class WideResBlock(torch.nn.Module):
@@ -112,8 +114,10 @@ class AirNet(torch.nn.Module):
        )
        self.layer1 = self.make_blocks(filters[0], blocks[0])
        self.layer2 = self.make_blocks(filters[1], blocks[1], 2)
-        if num_stages >= 4: self.layer3 = self.make_blocks(filters[2], blocks[2], 2)
-        if num_stages >= 5: self.layer4 = self.make_blocks(filters[3], blocks[3], 2)
+        if num_stages >= 4:
+            self.layer3 = self.make_blocks(filters[2], blocks[2], 2)
+        if num_stages >= 5:
+            self.layer4 = self.make_blocks(filters[3], blocks[3], 2)
        self.reset_parameters()

    def reset_parameters(self):
@@ -165,7 +169,14 @@ def airnet(num_stages):
    )
    return AirNet(blocks, num_stages)

+
 def make_airnet_(): return airnet(5)
+
+
 def make_airnet_3b(): return airnet(3)
+
+
 def make_airnet_4b(): return airnet(4)
+
+
 def make_airnet_5b(): return airnet(5)
--- a/lib/modeling/base.py
+++ b/lib/modeling/base.py
@@ -17,99 +17,20 @@ from __future__ import print_function

 import dragon.vm.torch as torch

-from lib.core.config import cfg
-from lib.utils.blob import to_tensor

+def affine(dim_in, inplace=True):
+    """AffineBN, weight and bias are fixed."""
+    return torch.nn.Affine(
+        dim_in,
+        fix_weight=True,
+        fix_bias=True,
+        inplace=inplace,
+    )

-class Bootstarp(torch.nn.Module):
-    """Extended operator to process the images."""
-
-    def __init__(self):
-        super(Bootstarp, self).__init__()
-        self.dtype = cfg.MODEL.DATA_TYPE.lower()
-        self.register_op()
-
-    def register_op(self):
-        self.op_meta = {
-            'op_type': 'ImageData',
-            'arguments': {
-                'dtype': self.dtype,
-                'data_format': 'NCHW',
-                'mean_values': cfg.PIXEL_MEANS,
-            }
-        }
-
-    def forward(self, x):
-        inputs, outputs = [x], [self.register_output()]
-        return self.run(inputs, outputs)
-
-
-class RPNDecoder(torch.nn.Module):
-    """Generate proposal regions from RPN."""
-
-    def __init__(self):
-        super(RPNDecoder, self).__init__()
-        self.register_op()
-        self.K = (cfg.FPN.ROI_MAX_LEVEL -
-                    cfg.FPN.ROI_MIN_LEVEL + 1) \
-            if len(cfg.RPN.STRIDES) > 1 else 1
-
-    def register_op(self):
-        self.op_meta = {
-            'op_type': 'Proposal',
-            'arguments': {
-                'det_type': 'RCNN',
-                'strides': cfg.RPN.STRIDES,
-                'ratios': [float(e) for e in cfg.RPN.ASPECT_RATIOS],
-                'scales': [float(e) for e in cfg.RPN.SCALES],
-                'pre_nms_top_n': cfg.TEST.RPN_PRE_NMS_TOP_N,
-                'post_nms_top_n': cfg.TEST.RPN_POST_NMS_TOP_N,
-                'nms_thresh': cfg.TEST.RPN_NMS_THRESH,
-                'min_size': cfg.TEST.RPN_MIN_SIZE,
-                'min_leve': cfg.FPN.ROI_MIN_LEVEL,
-                'max_level': cfg.FPN.ROI_MAX_LEVEL,
-                'canonical_scale': cfg.FPN.ROI_CANONICAL_SCALE,
-                'canonical_level': cfg.FPN.ROI_CANONICAL_LEVEL,
-            }
-        }
-
-    def forward(self, features, cls_prob, bbox_pred, ims_info):
-        inputs = features + [cls_prob, bbox_pred, to_tensor(ims_info)]
-        outputs = [self.register_output() for _ in range(self.K)]
-        outputs = self.run(inputs, outputs)
-        return outputs if isinstance(outputs, list) else [outputs]
-
-
-class RetinaNetDecoder(torch.nn.Module):
-    """Generate proposal regions from retinanet."""
-
-    def __init__(self):
-        super(RetinaNetDecoder, self).__init__()
-        k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
-        scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
-        self.strides = [int(2. ** lvl) for lvl in range(k_min, k_max + 1)]
-        self.scales = [cfg.RETINANET.ANCHOR_SCALE *
-            (2 ** (octave / float(scales_per_octave)))
-                for octave in range(scales_per_octave)]
-        self.register_op()
-
-    def register_op(self):
-        self.op_meta = {
-            'op_type': 'Proposal',
-            'arguments': {
-                'det_type': 'RETINANET',
-                'strides': self.strides,
-                'scales': self.scales,
-                'ratios': [float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
-                'pre_nms_top_n': cfg.RETINANET.PRE_NMS_TOP_N,
-                'score_thresh': cfg.TEST.SCORE_THRESH,
-            }
-        }

-    def forward(self, features, cls_prob, bbox_pred, ims_info):
-        inputs = features + [cls_prob, bbox_pred, to_tensor(ims_info)]
-        outputs = [self.register_output()]
-        return self.run(inputs, outputs)
+def bn(dim_in, eps=1e-5):
+    """The BatchNorm."""
+    return torch.nn.BatchNorm2d(dim_in, eps=eps)


 def conv1x1(dim_in, dim_out, stride=1, bias=False):
@@ -133,18 +54,3 @@ def conv3x3(dim_in, dim_out, stride=1, bias=False):
        padding=1,
        bias=bias,
    )
-
-
-def bn(dim_in, eps=1e-5):
-    """The BatchNorm."""
-    return torch.nn.BatchNorm2d(dim_in, eps=eps)
-
-
-def affine(dim_in, inplace=True):
-    """AffineBN, weight and bias are fixed."""
-    return torch.nn.Affine(
-        dim_in,
-        fix_weight=True,
-        fix_bias=True,
-        inplace=inplace,
-    )
\ No newline at end of file
--- a/lib/modeling/detector.py
+++ b/lib/modeling/detector.py
@@ -13,22 +13,19 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import collections
 import importlib
 import dragon.vm.torch as torch
-from collections import OrderedDict

 from lib.core.config import cfg
-from lib.utils.logger import is_root
+from lib.modeling import FPN
+from lib.modeling import RPN
+from lib.modeling import FastRCNN
+from lib.modeling import RetinaNet
+from lib.modeling import SSD
 from lib.modeling.factory import get_body_func
-
-from lib.modeling import (
-    Bootstarp,
-    FPN,
-    RPN,
-    FastRCNN,
-    RetinaNet,
-    SSD,
-)
+from lib.ops.modules import Bootstrap
+from lib.utils.logger import is_root


 class Detector(torch.nn.Module):
@@ -47,7 +44,7 @@ class Detector(torch.nn.Module):
        # + Data Loader
        self.data_layer = importlib.import_module(
            'lib.{}'.format(model)).DataLayer
-        self.bootstarp = Bootstarp()
+        self.bootstrap = Bootstrap()

        # + Feature Extractor
        self.body = get_body_func(body)()
@@ -84,8 +81,11 @@ class Detector(torch.nn.Module):
            The path of the weights file.

        """
-        self.load_state_dict(torch.load(weights),
-            strict=False, verbose=is_root())
+        self.load_state_dict(
+            torch.load(weights),
+            strict=False,
+            verbose=is_root(),
+        )

    def forward(self, inputs=None):
        """Compute the detection outputs.
@@ -107,9 +107,9 @@ class Detector(torch.nn.Module):
        # 1. Extract features
        # Process the data:
        #   1) NHWC => NCHW
-        #   2) Uint8 => Float32 or Float16
+        #   2) uint8 => float32 or float16
        #   3) Mean subtraction
-        image_data = self.bootstarp(inputs['data'])
+        image_data = self.bootstrap(inputs['data'])
        features = self.body(image_data)

        # 2. Apply the FPN to enhance features if necessary
@@ -117,7 +117,7 @@ class Detector(torch.nn.Module):
            features = self.fpn(features)

        # 3. Collect detection outputs
-        outputs = OrderedDict()
+        outputs = collections.OrderedDict()

        # 3.1 Feature -> RPN -> Fast R-CNN
        if hasattr(self, 'rpn'):

--- a/lib/modeling/factory.py
+++ b/lib/modeling/factory.py
@@ -13,27 +13,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import collections
 import importlib
-from collections import defaultdict


-_STORE = defaultdict(dict)
-
-
-def get_template_func(name, sets, desc):
-    name = name.lower()
-    if name not in sets:
-        raise ValueError(
-            'The {} for {} was not registered.\n'
-            'Registered modules: [{}]'.format(
-                name, desc, ', '.join(sets.keys())))
-    module_name = '.'.join(sets[name].split('.')[0:-1])
-    func_name = sets[name].split('.')[-1]
-    try:
-        module = importlib.import_module(module_name)
-        return getattr(module, func_name)
-    except ImportError as e:
-        raise ValueError('Can not import module from: ' + module_name)
+_STORE = collections.defaultdict(dict)


 ###########################################
@@ -59,6 +43,23 @@ for D in ['', '3b', '4b', '5b']:
    _STORE['BODY']['airnet{}'.format(D)] = \
        'lib.modeling.airnet.make_airnet_{}'.format(D)

+
+def get_template_func(name, sets, desc):
+    name = name.lower()
+    if name not in sets:
+        raise ValueError(
+            'The {} for {} was not registered.\n'
+            'Registered modules: [{}]'.format(
+                name, desc, ', '.join(sets.keys())))
+    module_name = '.'.join(sets[name].split('.')[0:-1])
+    func_name = sets[name].split('.')[-1]
+    try:
+        module = importlib.import_module(module_name)
+        return getattr(module, func_name)
+    except ImportError as e:
+        raise ValueError('Can not import module from: ' + module_name)
+
+
 def get_body_func(name):
    return get_template_func(
        name, _STORE['BODY'], 'Body')
--- a/lib/modeling/fast_rcnn.py
+++ b/lib/modeling/fast_rcnn.py
@@ -13,11 +13,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import collections
 import dragon.vm.torch as torch
-from collections import OrderedDict

 from lib.core.config import cfg
-from lib.modeling import RPNDecoder
+from lib.ops.modules import RPNDecoder


 class FastRCNN(torch.nn.Module):
@@ -50,11 +50,11 @@ class FastRCNN(torch.nn.Module):
        self.relu = torch.nn.ReLU(inplace=True)
        self.sigmoid = torch.nn.Sigmoid(inplace=False)
        self.roi_func = {
-            'RoIPool': torch.roi_pool,
-            'RoIAlign': torch.roi_align,
+            'RoIPool': torch.vision.ops.roi_pool,
+            'RoIAlign': torch.vision.ops.roi_align,
        }[cfg.FRCNN.ROI_XFORM_METHOD]
        self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
-        self.bbox_loss = torch.nn.SmoothL1Loss(beta=1.)
+        self.bbox_loss = torch.nn.SmoothL1Loss(beta=1., reduction='batch_size')
        # Compute spatial scales for multiple strides
        roi_levels = [level for level in range(
            cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)]
@@ -66,13 +66,16 @@ class FastRCNN(torch.nn.Module):
        torch.nn.init.normal_(self.cls_score.weight, std=0.01)
        torch.nn.init.normal_(self.bbox_pred.weight, std=0.001)
        for name, p in self.named_parameters():
-            if 'bias' in name: torch.nn.init.constant_(p, 0)
+            if 'bias' in name:
+                torch.nn.init.constant_(p, 0)

    def RoIFeatureTransform(self, feature, rois, spatial_scale):
        return self.roi_func(
            feature, rois,
-            pooled_h=cfg.FRCNN.ROI_XFORM_RESOLUTION,
-            pooled_w=cfg.FRCNN.ROI_XFORM_RESOLUTION,
+            output_size=(
+                cfg.FRCNN.ROI_XFORM_RESOLUTION,
+                cfg.FRCNN.ROI_XFORM_RESOLUTION,
+            ),
            spatial_scale=spatial_scale,
        )

@@ -127,14 +130,14 @@ class FastRCNN(torch.nn.Module):

        # Compute rcnn logits
        cls_score = self.cls_score(rcnn_output).float()
-        outputs = OrderedDict({
+        outputs = collections.OrderedDict({
            'bbox_pred':
                self.bbox_pred(rcnn_output).float(),
        })

        if self.training:
            # Compute rcnn losses
-            outputs.update(OrderedDict({
+            outputs.update(collections.OrderedDict({
                'cls_loss': self.cls_loss(
                    cls_score,
                    self.rcnn_data['labels'],

--- a/lib/modeling/fpn.py
+++ b/lib/modeling/fpn.py
@@ -16,7 +16,8 @@ from __future__ import print_function
 import dragon.vm.torch as torch

 from lib.core.config import cfg
-from lib.modeling import conv1x1, conv3x3
+from lib.modeling import conv1x1
+from lib.modeling import conv3x3


 HIGHEST_BACKBONE_LVL = 5  # E.g., "conv5"-like level
@@ -48,49 +49,43 @@ class FPN(torch.nn.Module):
            if isinstance(m, torch.nn.Conv2d):
                torch.nn.init.kaiming_uniform_(
                    m.weight,
-                    # Fix the gain for [-127, 127]
-                    a=1,
+                    a=1,  # Fix the gain for [-127, 127]
                )  # Xavier Initialization
                torch.nn.init.constant_(m.bias, 0)

    def apply_on_rcnn(self, features):
        fpn_input = self.C[-1](features[-1])
        min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
-        outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)]
-
+        outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
        # Apply MaxPool for higher features
        for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
            outputs.append(self.maxpool(outputs[-1]))
-
        # Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
        for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
            lateral_output = self.C[i - min_lvl](features[i - 1])
-            upscale_output = torch.nn_resize(
+            upscale_output = torch.vision.ops.nn_resize(
                fpn_input, dsize=lateral_output.shape[-2:])
            fpn_input = lateral_output.__iadd__(upscale_output)
            outputs.insert(0, self.P[i - min_lvl](fpn_input))
-
        return outputs

    def apply_on_retinanet(self, features):
        fpn_input = self.C[-1](features[-1])
        min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
        outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)]
-
        # Add extra convolutions for higher features
        extra_input = features[-1]
        for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
            outputs.append(self.P[i - min_lvl](extra_input))
-            if i != max_lvl: extra_input = self.relu(outputs[-1])
-
+            if i != max_lvl:
+                extra_input = self.relu(outputs[-1])
        # Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
        for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
            lateral_output = self.C[i - min_lvl](features[i - 1])
-            upscale_output = torch.nn_resize(
+            upscale_output = torch.vision.ops.nn_resize(
                fpn_input, dsize=lateral_output.shape[-2:])
            fpn_input = lateral_output.__iadd__(upscale_output)
            outputs.insert(0, self.P[i - min_lvl](fpn_input))
-
        return outputs

    def forward(self, features):

--- a/lib/modeling/resnet.py
+++ b/lib/modeling/resnet.py
@@ -20,12 +20,20 @@ from __future__ import print_function
 import dragon.vm.torch as torch

 from lib.core.config import cfg
-from lib.modeling import conv1x1, conv3x3, affine
+from lib.modeling import affine
+from lib.modeling import conv1x1
+from lib.modeling import conv3x3


 class BasicBlock(torch.nn.Module):
-    def __init__(self, dim_in, dim_out, stride=1,
-                 downsample=None, dropblock=None):
+    def __init__(
+        self,
+        dim_in,
+        dim_out,
+        stride=1,
+        downsample=None,
+        dropblock=None,
+    ):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(dim_in, dim_out, stride)
        self.bn1 = affine(dim_out)
@@ -65,8 +73,14 @@ class Bottleneck(torch.nn.Module):
    contraction = cfg.RESNET.NUM_GROUPS \
        * cfg.RESNET.GROUP_WIDTH / 256.0

-    def __init__(self, dim_in, dim_out, stride=1,
-                 downsample=None, dropblock=None):
+    def __init__(
+        self,
+        dim_in,
+        dim_out,
+        stride=1,
+        downsample=None,
+        dropblock=None,
+    ):
        super(Bottleneck, self).__init__()
        dim = int(dim_out * self.contraction)
        self.conv1 = conv1x1(dim_in, dim)
@@ -128,11 +142,17 @@ class ResNet(torch.nn.Module):
            ceil_mode=True,
        )
        self.drop3 = torch.nn.DropBlock2d(
-            7, 0.9, alpha=0.25, decrement=cfg.DROPBLOCK.DECREMENT) \
-                if cfg.DROPBLOCK.DROP_ON else None
+            kp=0.9,
+            block_size=7,
+            alpha=0.25,
+            decrement=cfg.DROPBLOCK.DECREMENT
+        ) if cfg.DROPBLOCK.DROP_ON else None
        self.drop4 = torch.nn.DropBlock2d(
-            7, 0.9, alpha=1., decrement=cfg.DROPBLOCK.DECREMENT) \
-                if cfg.DROPBLOCK.DROP_ON else None
+            kp=0.9,
+            block_size=7,
+            alpha=1.00,
+            decrement=cfg.DROPBLOCK.DECREMENT
+        ) if cfg.DROPBLOCK.DROP_ON else None
        self.layer1 = self.make_blocks(block, filters[0], layers[0])
        self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
        self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3)
@@ -145,7 +165,8 @@ class ResNet(torch.nn.Module):
            if isinstance(m, torch.nn.Conv2d):
                torch.nn.init.kaiming_normal_(
                    m.weight,
-                    nonlinearity='relu')
+                    nonlinearity='relu',
+                )

        # Stop the gradients if necessary
        def freeze_func(m):
@@ -178,25 +199,31 @@ class ResNet(torch.nn.Module):
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
-
        outputs = [x]
        outputs += [self.layer1(outputs[-1])]
        outputs += [self.layer2(outputs[-1])]
        outputs += [self.layer3(outputs[-1])]
        outputs += [self.layer4(outputs[-1])]
-
        return outputs


 def resnet(depth):
-    if depth == 18: units = [2, 2, 2, 2]
-    elif depth == 34: units = [3, 4, 6, 3]
-    elif depth == 50: units = [3, 4, 6, 3]
-    elif depth == 101: units = [3, 4, 23, 3]
-    elif depth == 152: units = [3, 8, 36, 3]
-    elif depth == 200: units = [3, 24, 36, 3]
-    elif depth == 269: units = [3, 30, 48, 8]
-    else: raise ValueError('Unsupported depth: %d' % depth)
+    if depth == 18:
+        units = [2, 2, 2, 2]
+    elif depth == 34:
+        units = [3, 4, 6, 3]
+    elif depth == 50:
+        units = [3, 4, 6, 3]
+    elif depth == 101:
+        units = [3, 4, 23, 3]
+    elif depth == 152:
+        units = [3, 8, 36, 3]
+    elif depth == 200:
+        units = [3, 24, 36, 3]
+    elif depth == 269:
+        units = [3, 30, 48, 8]
+    else:
+        raise ValueError('Unsupported depth: %d' % depth)
    block = Bottleneck if depth >= 50 else BasicBlock
    filters = [64, 256, 512, 1024, 2048] \
        if depth >= 50 else [64, 64, 128, 256, 512]
@@ -204,7 +231,15 @@ def resnet(depth):


 def make_resnet_18(): return resnet(18)
+
+
 def make_resnet_34(): return resnet(34)
+
+
 def make_resnet_50(): return resnet(50)
+
+
 def make_resnet_101(): return resnet(101)
+
+
 def make_resnet_152(): return resnet(152)
--- a/lib/modeling/retinanet.py
+++ b/lib/modeling/retinanet.py
@@ -13,12 +13,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import collections
 import math
 import dragon.vm.torch as torch
-from collections import OrderedDict

 from lib.core.config import cfg
-from lib.modeling import conv3x3, RetinaNetDecoder
+from lib.modeling import conv3x3
+from lib.ops.modules import RetinaNetDecoder
 from lib.retinanet import AnchorTargetLayer


@@ -32,10 +33,12 @@ class RetinaNet(torch.nn.Module):

        self.cls_conv = torch.nn.ModuleList(
            conv3x3(dim_in, dim_in, bias=True)
-                for _ in range(cfg.RETINANET.NUM_CONVS))
+            for _ in range(cfg.RETINANET.NUM_CONVS)
+        )
        self.bbox_conv = torch.nn.ModuleList(
            conv3x3(dim_in, dim_in, bias=True)
-                for _ in range(cfg.RETINANET.NUM_CONVS))
+            for _ in range(cfg.RETINANET.NUM_CONVS)
+        )
        # Packed as [C, A] not [A, C]
        self.C = cfg.MODEL.NUM_CLASSES - 1
        A = len(cfg.RETINANET.ASPECT_RATIOS) * \
@@ -53,8 +56,11 @@ class RetinaNet(torch.nn.Module):
        self.anchor_target_layer = AnchorTargetLayer()
        self.cls_loss = torch.nn.SigmoidFocalLoss(
            alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
-            gamma=cfg.MODEL.FOCAL_LOSS_GAMMA)
-        self.bbox_loss = torch.nn.SmoothL1Loss(beta=1. / 9.)
+            gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
+        )
+        self.bbox_loss = torch.nn.SmoothL1Loss(
+            beta=1. / 9., reduction='batch_size',
+        )
        self.reset_parameters()

    def reset_parameters(self):
@@ -127,7 +133,7 @@ class RetinaNet(torch.nn.Module):
                gt_boxes=gt_boxes,
                ims_info=ims_info,
            )
-        return OrderedDict({
+        return collections.OrderedDict({
            'cls_loss':
                self.cls_loss(
                    cls_score,
@@ -146,7 +152,7 @@ class RetinaNet(torch.nn.Module):
        cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
        cls_score, bbox_pred = cls_score.float(), bbox_pred.float()

-        outputs = OrderedDict({'bbox_pred': bbox_pred})
+        outputs = collections.OrderedDict({'bbox_pred': bbox_pred})

        if self.training:
            outputs.update(

--- a/lib/modeling/rpn.py
+++ b/lib/modeling/rpn.py
@@ -13,11 +13,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import collections
 import dragon.vm.torch as torch
-from collections import OrderedDict

 from lib.core.config import cfg
-from lib.modeling import conv1x1, conv3x3
+from lib.modeling import conv1x1
+from lib.modeling import conv3x3


 class RPN(torch.nn.Module):
@@ -119,7 +120,7 @@ class RPN(torch.nn.Module):
                gt_boxes=gt_boxes,
                ims_info=ims_info,
            )
-        return OrderedDict({
+        return collections.OrderedDict({
            'rpn_cls_loss':
                self.cls_loss(cls_score, self.rpn_data['labels']),
            'rpn_bbox_loss':
@@ -135,7 +136,7 @@ class RPN(torch.nn.Module):
        cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
        cls_score, bbox_pred = cls_score.float(), bbox_pred.float()

-        outputs = OrderedDict({
+        outputs = collections.OrderedDict({
            'rpn_cls_score': cls_score,
            'rpn_bbox_pred': bbox_pred,
        })

--- a/lib/modeling/ssd.py
+++ b/lib/modeling/ssd.py
@@ -13,18 +13,15 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import collections
 import dragon.vm.torch as torch
-from collections import OrderedDict

 from lib.core.config import cfg
 from lib.modeling import conv3x3
-
-from lib.ssd import (
-    PriorBoxLayer,
-    MultiBoxMatchLayer,
-    HardMiningLayer,
-    MultiBoxTargetLayer,
-)
+from lib.ssd import HardMiningLayer
+from lib.ssd import MultiBoxMatchLayer
+from lib.ssd import MultiBoxTargetLayer
+from lib.ssd import PriorBoxLayer


 class SSD(torch.nn.Module):
@@ -57,7 +54,7 @@ class SSD(torch.nn.Module):
        self.hard_mining_layer = HardMiningLayer()
        self.box_target_layer = MultiBoxTargetLayer()
        self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
-        self.bbox_loss = torch.nn.SmoothL1Loss()
+        self.bbox_loss = torch.nn.SmoothL1Loss(reduction='batch_size')
        self.reset_parameters()

    def reset_parameters(self):
@@ -88,8 +85,9 @@ class SSD(torch.nn.Module):
                    .permute(0, 2, 3, 1).view(0, -1))

        # Concat them if necessary
-        return torch.cat(cls_score_wide, dim=1).view(
-                    0, -1, cfg.MODEL.NUM_CLASSES), \
+        return \
+            torch.cat(cls_score_wide, dim=1) \
+            .view(0, -1, cfg.MODEL.NUM_CLASSES), \
            torch.cat(bbox_pred_wide, dim=1).view(0, -1, 4)

    def compute_losses(
@@ -138,7 +136,7 @@ class SSD(torch.nn.Module):
                gt_boxes=gt_boxes,
            )
        )
-        return OrderedDict({
+        return collections.OrderedDict({
            # A compensating factor of 4.0 is used
            # As we normalize both the pos and neg samples
            'cls_loss':
@@ -160,7 +158,7 @@ class SSD(torch.nn.Module):
        cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
        cls_score, bbox_pred = cls_score.float(), bbox_pred.float()

-        outputs = OrderedDict({
+        outputs = collections.OrderedDict({
            'prior_boxes': prior_boxes,
            'bbox_pred': bbox_pred,
        })

--- a/lib/modeling/vgg.py
+++ b/lib/modeling/vgg.py
@@ -16,7 +16,8 @@ from __future__ import print_function
 import dragon.vm.torch as torch

 from lib.core.config import cfg
-from lib.modeling import conv1x1, conv3x3
+from lib.modeling import conv1x1
+from lib.modeling import conv3x3


 class VGG(torch.nn.Module):
@@ -35,16 +36,22 @@ class VGG(torch.nn.Module):
            dim_in = 3 if i == 0 else filter_list[i - 1]
            for j in range(self.units[i]):
                self.__setattr__(
-                    '{}_{}'.format(conv_name, j + 1),
-                        conv3x3(dim_in, filter_list[i], bias=True))
-                if j == 0: dim_in = filter_list[i]
+                    '{}_{}'
+                    .format(conv_name, j + 1),
+                    conv3x3(dim_in, filter_list[i], bias=True),
+                )
+                if j == 0:
+                    dim_in = filter_list[i]
        if reduced:
            # L2Norm is redundant from the observation of
            # empirical experiments. We just keep a trainable scale
            self.conv4_3_norm = torch.nn.Affine(filter_list[3], bias=False)
            self.conv4_3_norm.weight.zero_()  # Zero-Init
-            self.fc6 = torch.nn.Conv2d(filter_list[-1], 1024,
-                kernel_size=3, stride=1, padding=6, dilation=6)
+            self.fc6 = torch.nn.Conv2d(
+                filter_list[-1], 1024,
+                kernel_size=3, padding=6,
+                stride=1, dilation=6,
+            )
            self.fc7 = conv1x1(1024, 1024, bias=True)
            self.feature_dims = [filter_list[-2], 1024]
        if extra_arch is not None:
@@ -54,15 +61,23 @@ class VGG(torch.nn.Module):
            for i in range(len(strides)):
                conv_name = 'conv{}'.format(i + 6)
                dim_in = 1024 if i == 0 else filter_list[i - 1] * 2
-                self.__setattr__('{}_1'.format(conv_name),
-                    conv1x1(dim_in, filter_list[i], bias=True))
+                self.__setattr__(
+                    '{}_1'.format(conv_name),
+                    conv1x1(dim_in, filter_list[i], bias=True),
+                )
                if strides[i] == 2:
-                    self.__setattr__('{}_2'.format(conv_name),
-                        conv3x3(filter_list[i], filter_list[i] * 2, 2, bias=True))
+                    self.__setattr__(
+                        '{}_2'.format(conv_name),
+                        conv3x3(filter_list[i], filter_list[i] * 2, 2, bias=True),
+                    )
                else:
-                    self.__setattr__('{}_2'.format(conv_name),
-                        torch.nn.Conv2d(filter_list[i], filter_list[i] * 2,
-                            kernel_size=kps[0], padding=kps[1], stride=kps[2]))
+                    self.__setattr__(
+                        '{}_2'.format(conv_name),
+                        torch.nn.Conv2d(
+                            filter_list[i], filter_list[i] * 2,
+                            kernel_size=kps[0], padding=kps[1], stride=kps[2]
+                        ),
+                    )
        self.reset_parameters()

    def reset_parameters(self):
@@ -88,8 +103,9 @@ class VGG(torch.nn.Module):
        for i in range(cfg.MODEL.FREEZE_AT, 0, -1):
            conv_name = 'conv{}'.format(i)
            for j in range(self.units[i - 1]):
-                self.__getattr__('{}_{}'.format(
-                    conv_name, j + 1)).apply(freeze_func)
+                self.__getattr__(
+                    '{}_{}'.format(conv_name, j + 1)
+                ).apply(freeze_func)

    def forward(self, x):
        outputs = []
@@ -101,8 +117,10 @@ class VGG(torch.nn.Module):
                    '{}_{}'.format(conv_name, j + 1))(x))
            if self.reduced and i == 3:
                outputs.append(self.conv4_3_norm(x))
-            if i < 4: x = self.maxpool(x)
-            else: x = self.s1pool(x) if self.reduced else x
+            if i < 4:
+                x = self.maxpool(x)
+            else:
+                x = self.s1pool(x) if self.reduced else x

        # Internal FC layers and Extra Conv Layers
        if self.reduced:
@@ -145,4 +163,6 @@ def make_vgg_16_reduced(scale=300):


 def make_vgg_16_reduced_300(): return make_vgg_16_reduced(300)
+
+
 def make_vgg_16_reduced_512(): return make_vgg_16_reduced(512)
--- a/lib/nms/__init__.py
+++ b/lib/nms/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
--- a/lib/nms/nms_wrapper.py
+++ b/lib/nms/nms_wrapper.py
@@ -18,7 +18,7 @@ from __future__ import division
 from __future__ import print_function

 from lib.core.config import cfg
-import lib.utils.logger as logger
+from lib.utils import logger

 try:
    from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms
@@ -33,10 +33,12 @@ except ImportError as e:

 def nms(detections, thresh, force_cpu=False):
    """Perform either CPU or GPU Hard-NMS."""
-    if detections.shape[0] == 0: return []
+    if detections.shape[0] == 0:
+        return []
    if cfg.USE_GPU_NMS and not force_cpu:
        return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
-    else: return cpu_nms(detections, thresh)
+    else:
+        return cpu_nms(detections, thresh)


 def soft_nms(
@@ -47,7 +49,8 @@ def soft_nms(
    score_thresh=0.001,
 ):
    """Perform CPU Soft-NMS."""
-    if detections.shape[0] == 0: return []
+    if detections.shape[0] == 0:
+        return []
    methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
    if method not in methods:
        logger.fatal('Unknown soft nms method: {}'.format(method))

--- a/lib/ops/__init__.py
+++ b/lib/ops/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
--- a/lib/ops/functional.py
+++ b/lib/ops/functional.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from dragon.vm.torch.autograd import function
+from lib.ops import functions
+
+
+def decode_retinanet(
+    features,
+    cls_prob,
+    bbox_pred,
+    ims_info,
+    strides,
+    ratios,
+    scales,
+    pre_nms_top_n,
+    score_thresh,
+):
+    return function.get(
+        functions.RetinaNetDecoder,
+        cls_prob.device,
+        strides=strides,
+        ratios=ratios,
+        scales=scales,
+        pre_nms_top_n=pre_nms_top_n,
+        score_thresh=score_thresh,
+    ).apply(features, cls_prob, bbox_pred, ims_info)
+
+
+def decode_rpn(
+    features,
+    cls_prob,
+    bbox_pred,
+    ims_info,
+    num_outputs,
+    strides,
+    ratios,
+    scales,
+    pre_nms_top_n,
+    post_nms_top_n,
+    nms_thresh,
+    min_size,
+    min_level,
+    max_level,
+    canonical_scale,
+    canonical_level,
+):
+    return function.get(
+        functions.RPNDecoder,
+        cls_prob.device,
+        K=num_outputs,
+        strides=strides,
+        ratios=ratios,
+        scales=scales,
+        pre_nms_top_n=pre_nms_top_n,
+        post_nms_top_n=post_nms_top_n,
+        nms_thresh=nms_thresh,
+        min_size=min_size,
+        min_level=min_level,
+        max_level=max_level,
+        canonical_scale=canonical_scale,
+        canonical_level=canonical_level,
+    ).apply(features, cls_prob, bbox_pred, ims_info)
+
--- a/lib/ops/functions.py
+++ b/lib/ops/functions.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from dragon.vm.torch.autograd import function
+
+
+class RetinaNetDecoder(function.Function):
+    def __init__(self, key, dev, **kwargs):
+        super(RetinaNetDecoder, self).__init__(key, dev, **kwargs)
+        self.args = kwargs
+
+    def register_operator(self):
+        return {
+            'op_type': 'Proposal',
+            'arguments': {
+                'det_type': 'RETINANET',
+                'strides': self.args['strides'],
+                'ratios': self.args['ratios'],
+                'scales': self.args['scales'],
+                'pre_nms_top_n': self.args['pre_nms_top_n'],
+                'score_thresh': self.args['score_thresh'],
+            }
+        }
+
+    def forward(self, features, cls_prob, bbox_pred, ims_info):
+        inputs = features + [cls_prob, bbox_pred, ims_info]
+        self._unify_devices(inputs[:-1])  # Skip <ims_info>
+        return self.run(inputs, [self.alloc()], unify_devices=False)
+
+
+class RPNDecoder(function.Function):
+    def __init__(self, key, dev, **kwargs):
+        super(RPNDecoder, self).__init__(key, dev, **kwargs)
+        self.args = kwargs
+
+    def register_operator(self):
+        return {
+            'op_type': 'Proposal',
+            'arguments': {
+                'det_type': 'RCNN',
+                'strides': self.args['strides'],
+                'ratios': self.args['ratios'],
+                'scales': self.args['scales'],
+                'pre_nms_top_n': self.args['pre_nms_top_n'],
+                'post_nms_top_n': self.args['post_nms_top_n'],
+                'nms_thresh': self.args['nms_thresh'],
+                'min_size': self.args['min_size'],
+                'min_level': self.args['min_level'],
+                'max_level': self.args['max_level'],
+                'canonical_scale': self.args['canonical_scale'],
+                'canonical_level': self.args['canonical_level'],
+            }
+        }
+
+    def forward(self, features, cls_prob, bbox_pred, ims_info):
+        inputs = features + [cls_prob, bbox_pred, ims_info]
+        self._unify_devices(inputs[:-1])  # Skip <ims_info>
+        outputs = [self.alloc() for _ in range(self.args['K'])]
+        return self.run(inputs, outputs, unify_devices=False)
--- a/lib/ops/modules.py
+++ b/lib/ops/modules.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dragon.vm.torch as torch
+
+from lib.core.config import cfg
+from lib.ops import functional as F
+from lib.utils.blob import blob_to_tensor
+
+
+class Bootstrap(torch.nn.Module):
+    """Extended operator to process the images."""
+
+    def __init__(self):
+        super(Bootstrap, self).__init__()
+        self.dtype = cfg.MODEL.DATA_TYPE.lower()
+        self.mean_values = cfg.PIXEL_MEANS
+        self.dummy_buffer = torch.ones(1)
+
+    def _apply(self, fn):
+        fn(self.dummy_buffer)
+
+    def cpu(self):
+        self._device = torch.device('cpu')
+
+    def cuda(self, device=None):
+        self._device = torch.device('cuda', device)
+
+    def device(self):
+        """Return the device of this module."""
+        return self.dummy_buffer.device
+
+    def forward(self, input):
+        cur_device = self.device()
+        if input._device != cur_device:
+            if cur_device.type == 'cpu':
+                input = input.cpu()
+            else:
+                input = input.cuda(cur_device.index)
+        return torch.vision.ops.image_data(
+            input, self.dtype, self.mean_values,
+        )
+
+
+class RetinaNetDecoder(torch.nn.Module):
+    """Generate proposal regions from retinanet."""
+
+    def __init__(self):
+        super(RetinaNetDecoder, self).__init__()
+        k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
+        scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
+        self.strides = [int(2. ** lvl) for lvl in range(k_min, k_max + 1)]
+        self.scales = [cfg.RETINANET.ANCHOR_SCALE *
+                       (2 ** (octave / float(scales_per_octave)))
+                       for octave in range(scales_per_octave)]
+
+    def register_operator(self):
+        return {
+            'op_type': 'Proposal',
+            'arguments': {
+                'det_type': 'RETINANET',
+                'strides': self.strides,
+                'scales': self.scales,
+                'ratios': [float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
+                'pre_nms_top_n': cfg.RETINANET.PRE_NMS_TOP_N,
+                'score_thresh': cfg.TEST.SCORE_THRESH,
+            }
+        }
+
+    def forward(self, features, cls_prob, bbox_pred, ims_info):
+        return F.decode_retinanet(
+            features=features,
+            cls_prob=cls_prob,
+            bbox_pred=bbox_pred,
+            ims_info=blob_to_tensor(ims_info, enforce_cpu=True),
+            strides=self.strides,
+            ratios=[float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
+            scales=self.scales,
+            pre_nms_top_n=cfg.RETINANET.PRE_NMS_TOP_N,
+            score_thresh=cfg.TEST.SCORE_THRESH,
+        )
+
+
+class RPNDecoder(torch.nn.Module):
+    """Generate proposal regions from RPN."""
+
+    def __init__(self):
+        super(RPNDecoder, self).__init__()
+        self.K = (cfg.FPN.ROI_MAX_LEVEL -
+                  cfg.FPN.ROI_MIN_LEVEL + 1) \
+            if len(cfg.RPN.STRIDES) > 1 else 1
+
+    def forward(self, features, cls_prob, bbox_pred, ims_info):
+        outputs = F.decode_rpn(
+            features=features,
+            cls_prob=cls_prob,
+            bbox_pred=bbox_pred,
+            ims_info=blob_to_tensor(ims_info, enforce_cpu=True),
+            num_outputs=self.K,
+            strides=cfg.RPN.STRIDES,
+            ratios=[float(e) for e in cfg.RPN.ASPECT_RATIOS],
+            scales=[float(e) for e in cfg.RPN.SCALES],
+            pre_nms_top_n=cfg.TEST.RPN_PRE_NMS_TOP_N,
+            post_nms_top_n=cfg.TEST.RPN_POST_NMS_TOP_N,
+            nms_thresh=cfg.TEST.RPN_NMS_THRESH,
+            min_size=cfg.TEST.RPN_MIN_SIZE,
+            min_level=cfg.FPN.ROI_MIN_LEVEL,
+            max_level=cfg.FPN.ROI_MAX_LEVEL,
+            canonical_scale=cfg.FPN.ROI_CANONICAL_SCALE,
+            canonical_level=cfg.FPN.ROI_CANONICAL_LEVEL,
+        )
+        return [outputs] if self.K == 1 else outputs
--- a/lib/retinanet/__init__.py
+++ b/lib/retinanet/__init__.py
@@ -15,4 +15,3 @@ from __future__ import print_function

 from lib.faster_rcnn.layers.data_layer import DataLayer
 from lib.retinanet.layers.anchor_target_layer import AnchorTargetLayer
-from lib.retinanet.layers.proposal_layer import ProposalLayer
\ No newline at end of file
--- a/lib/retinanet/layers/__init__.py
+++ b/lib/retinanet/layers/__init__.py
--- a/lib/retinanet/layers/anchor_target_layer.py
+++ b/lib/retinanet/layers/anchor_target_layer.py
@@ -13,15 +13,16 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import numpy as np
 import dragon.vm.torch as torch
+import numpy as np

 from lib.core.config import cfg
+from lib.faster_rcnn.generate_anchors import generate_anchors_v2
 from lib.utils import logger
+from lib.utils.blob import blob_to_tensor
+from lib.utils.boxes import bbox_transform
+from lib.utils.boxes import dismantle_gt_boxes
 from lib.utils.cython_bbox import bbox_overlaps
-from lib.utils.blob import to_tensor
-from lib.utils.bbox_transform import bbox_transform
-from lib.faster_rcnn.generate_anchors import generate_anchors_v2


 class AnchorTargetLayer(torch.nn.Module):
@@ -35,28 +36,32 @@ class AnchorTargetLayer(torch.nn.Module):
        anchor_scale = cfg.RETINANET.ANCHOR_SCALE
        self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
        self.ratios = cfg.RETINANET.ASPECT_RATIOS
-
        # Generate base anchors
        self.base_anchors = []
        for stride in self.strides:
            sizes = [stride * anchor_scale *
                     (2 ** (octave / float(scales_per_octave)))
                     for octave in range(scales_per_octave)]
-            self.base_anchors.append(generate_anchors_v2(
-                stride=stride, ratios=self.ratios, sizes=sizes))
+            self.base_anchors.append(
+                generate_anchors_v2(
+                    stride=stride,
+                    ratios=self.ratios,
+                    sizes=sizes,
+                ))

    def forward(self, features, gt_boxes, ims_info):
        """Produces anchor classification labels and bounding-box regression targets."""
-
        num_images = cfg.TRAIN.IMS_PER_BATCH
-        gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
+        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)

        if len(gt_boxes_wide) != num_images:
-            logger.fatal('Input {} images, got {} slices of gt boxes.' \
-                .format(num_images, len(gt_boxes_wide)))
+            logger.fatal(
+                'Input {} images, got {} slices of gt boxes.'
+                .format(num_images, len(gt_boxes_wide))
+            )

        # Generate proposals from shifted anchors
-        all_anchors = []; total_anchors = 0
+        all_anchors, total_anchors = [], 0
        for i in range(len(self.strides)):
            height, width = features[i].shape[-2:]
            shift_x = np.arange(0, width) * self.strides[i]
@@ -101,7 +106,8 @@ class AnchorTargetLayer(torch.nn.Module):
            # Overlaps between the anchors and the gt boxes
            overlaps = bbox_overlaps(
                np.ascontiguousarray(anchors, dtype=np.float),
-                np.ascontiguousarray(gt_boxes, dtype=np.float))
+                np.ascontiguousarray(gt_boxes, dtype=np.float),
+            )
            argmax_overlaps = overlaps.argmax(axis=1)
            max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]

@@ -125,10 +131,10 @@ class AnchorTargetLayer(torch.nn.Module):
            bbox_targets[fg_inds, :] = bbox_transform(
                anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], :4])
            bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_inside_weights[fg_inds, :] = np.array((1.0, 1.0, 1.0, 1.0))
+            bbox_inside_weights[fg_inds, :] = np.array((1., 1., 1., 1.))

            bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1.0)
+            bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1)

            labels_wide[ix, inds_inside] = labels
            bbox_targets_wide[ix, inds_inside] = bbox_targets
@@ -141,16 +147,8 @@ class AnchorTargetLayer(torch.nn.Module):
        bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))

        return {
-            'labels': to_tensor(labels),
-            'bbox_targets': to_tensor(bbox_targets),
-            'bbox_inside_weights': to_tensor(bbox_inside_weights),
-            'bbox_outside_weights': to_tensor(bbox_outside_weights),
+            'labels': blob_to_tensor(labels),
+            'bbox_targets': blob_to_tensor(bbox_targets),
+            'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
+            'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
        }
-
-
-def _dismantle_gt_boxes(gt_boxes, num_images):
-    return [
-        gt_boxes[
-            np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
-        ] for ix in range(num_images)
-    ]
\ No newline at end of file
--- a/lib/retinanet/layers/proposal_layer.py
+++ b/lib/retinanet/layers/proposal_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-import numpy as np
-import dragon.vm.torch as torch
-
-from lib.core.config import cfg
-from lib.utils import logger
-from lib.utils.bbox_transform import bbox_transform_inv
-from lib.faster_rcnn.generate_anchors import generate_anchors_v2
-
-
-class ProposalLayer(torch.nn.Module):
-    """Outputs object detection proposals by applying estimated bounding-box.
-
-    transformations to a set of regular boxes (called "anchors").
-
-    """
-    def __init__(self):
-        super(ProposalLayer, self).__init__()
-        # Load the basic configs
-        k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
-        scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
-        anchor_scale = cfg.RETINANET.ANCHOR_SCALE
-        self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
-        self.ratios = cfg.RETINANET.ASPECT_RATIOS
-
-        # Generate base anchors
-        self.base_anchors = []
-        for stride in self.strides:
-            sizes = [stride * anchor_scale *
-                (2 ** (octave / float(scales_per_octave)))
-                     for octave in range(scales_per_octave)]
-            self.base_anchors.append(generate_anchors_v2(
-                stride=stride, ratios=self.ratios, sizes=sizes))
-
-    def forward(self, features, cls_prob, bbox_pred, ims_info):
-        # Get resources
-        num_images = ims_info.shape[0]
-        cls_prob, bbox_pred = cls_prob.numpy(True), bbox_pred.numpy(True)
-        lvl_info = [features[i].shape[-2:] for i in range(len(self.strides))]
-
-        if cls_prob.shape[0] != num_images or \
-                bbox_pred.shape[0] != num_images:
-            logger.fatal('Incorrect num of images: {}'.format(num_images))
-
-        # Prepare for the outputs
-        batch_probs = cls_prob
-        batch_deltas = bbox_pred.transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
-        batch_detections = []
-
-        # Extract Detections separately
-        for ix in range(num_images):
-            im_scale = ims_info[ix, 2]
-            if cfg.RETINANET.SOFTMAX: P = batch_probs[ix, 1:, :]
-            else: P = batch_probs[ix] # [num_classes - 1, n]
-            D = batch_deltas[ix] # [n, 4]
-            anchor_pos = 0
-            for lvl, (H, W) in enumerate(lvl_info):
-                A, K = self.base_anchors[lvl].shape[0], H * W
-                num_anchors = A * K
-                prob = P[:, anchor_pos : anchor_pos + num_anchors]
-                deltas = D[anchor_pos : anchor_pos + num_anchors]
-                anchor_pos += num_anchors
-
-                prob_ravel = prob.ravel()
-                candidate_inds = np.where(prob_ravel > cfg.TEST.SCORE_THRESH)[0]
-
-                if len(candidate_inds) == 0: continue
-                pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
-
-                inds = np.argpartition(
-                    prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:]
-                inds = candidate_inds[inds]
-
-                prob_4d = prob.reshape((prob.shape[0], A, H, W))
-                inds_2d = np.array(np.unravel_index(inds, prob.shape)).transpose()
-                inds_4d = np.array(np.unravel_index(inds, prob_4d.shape)).transpose()
-                classes, anchor_ids = inds_2d[:, 0], inds_2d[:, 1]
-                a, y, x = inds_4d[:, 1], inds_4d[:, 2], inds_4d[:, 3]
-
-                scores = prob[classes, anchor_ids]
-                deltas = deltas[anchor_ids]
-                anchors = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
-                anchors = (anchors * self.strides[lvl]) + self.base_anchors[lvl][a, :]
-
-                pred_boxes = bbox_transform_inv(anchors, deltas)
-                pred_boxes /= im_scale
-
-                # {im_idx, x1, y1, x2, y2, score, cls}
-                detections = np.zeros((pred_boxes.shape[0], 7), dtype=np.float32)
-                detections[:, 0], detections[:, 1:5] = ix, pred_boxes
-                detections[:, 5], detections[:, 6] = scores, classes + 1
-                batch_detections.append(detections)
-
-        # Merge Detections into a blob
-        batch_detections = np.vstack(batch_detections) \
-            if len(batch_detections) > 0 else \
-                np.zeros((1, 7), dtype=np.float32)
-
-        return batch_detections
\ No newline at end of file
--- a/lib/retinanet/test.py
+++ b/lib/retinanet/test.py
@@ -13,20 +13,16 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-try:
-    import cPickle
-except:
-    import pickle as cPickle
-import numpy as np
 import dragon.vm.torch as torch
+import numpy as np

 from lib.core.config import cfg
+from lib.nms.nms_wrapper import nms
+from lib.nms.nms_wrapper import soft_nms
+from lib.utils.blob import im_list_to_blob
+from lib.utils.blob import tensor_to_blob
 from lib.utils.image import scale_image
-from lib.utils.bbox_transform import clip_boxes
-from lib.nms.nms_wrapper import nms, soft_nms
-
 from lib.utils.timer import Timer
-from lib.utils.blob import im_list_to_blob
 from lib.utils.vis import vis_one_image


@@ -39,72 +35,65 @@ def im_detect(detector, raw_image):
    blobs = {'data': im_list_to_blob(ims)}
    blobs['ims_info'] = np.array([
        list(blobs['data'].shape[1:3]) + [im_scale]
-            for im_scale in ims_scale], dtype=np.float32)
-    blobs['data'] = torch.from_numpy(blobs['data']).cuda(cfg.GPU_ID)
+        for im_scale in ims_scale], dtype=np.float32,
+    )
+    blobs['data'] = torch.from_numpy(blobs['data'])

    # Do Forward
    with torch.no_grad():
        outputs = detector.forward(inputs=blobs)

-    # Decode results
-    results = outputs['detections']
-    detections_wide = []
-
-    for im_idx in range(len(ims)):
-        indices = np.where(results[:, 0].astype(np.int32) == im_idx)[0]
-        detections = results[indices, 1:]
-        detections[:, :4] = clip_boxes(detections[:, :4], raw_image.shape)
-        detections_wide.append(detections)
-
-    return np.vstack(detections_wide) \
-        if len(detections_wide) > 1 else detections_wide[0]
-
+    # Unpack results
+    return tensor_to_blob(outputs['detections'])[:, 1:]

-def ims_detect(net, raw_images):
-    """Detect images, with single or multiple scales.

-    """
+def ims_detect(detector, raw_images):
+    """Detect images, with single or multiple scales."""
    # Prepare images
    ims, ims_scale = scale_image(raw_images[0])
+    num_scales = len(ims_scale)
    ims_shape = [im.shape for im in raw_images]
    for item_idx in range(1, len(raw_images)):
        ims_ext, ims_scale_ext = scale_image(raw_images[item_idx])
-        ims += ims_ext; ims_scale += ims_scale_ext
+        ims += ims_ext
+        ims_scale += ims_scale_ext

    # Prepare blobs
    blobs = {'data': im_list_to_blob(ims)}
    blobs['ims_info'] = np.array([
-        list(blobs['data'].shape[2:4]) + [im_scale]
-            for im_scale in ims_scale], dtype=np.float32)
+        list(blobs['data'].shape[1:3]) + [im_scale]
+        for im_scale in ims_scale], dtype=np.float32,
+    )
+    blobs['data'] = torch.from_numpy(blobs['data'])

    # Do Forward
-    net.forward(**blobs)()
+    with torch.no_grad():
+        outputs = detector.forward(inputs=blobs)

-    # Decode results
-    results = net.blobs['detections'].data.get_value()
+    # Unpack results
+    results = tensor_to_blob(outputs['detections'])
    detections_wide = [[] for _ in range(len(ims_shape))]

    for i in range(len(ims)):
-        j = i % len(ims_shape)
        indices = np.where(results[:, 0].astype(np.int32) == i)[0]
        detections = results[indices, 1:]
-        detections[:, :4] = clip_boxes(detections[:, :4], ims_shape[j])
-        detections_wide[j].append(detections)
+        detections_wide[i // num_scales].append(detections)

-    for j in range(len(ims_shape)):
-        detections_wide[j] = np.vstack(detections_wide[j]) \
-        if len(detections_wide[j]) > 1 else detections_wide[j][0]
+    for i in range(len(ims_shape)):
+        detections_wide[i] = np.vstack(detections_wide[i]) \
+            if len(detections_wide[i]) > 1 else detections_wide[i][0]

    return detections_wide


 def test_net(net, server):
-    classes, num_images, num_classes = \
-        server.classes, server.num_images, server.num_classes
-
+    # Load settings
+    classes = server.classes
+    num_images = server.num_images
+    num_classes = server.num_classes
    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]

-    _t = {'im_detect' : Timer(), 'misc' : Timer()}
+    _t = {'im_detect': Timer(), 'misc': Timer()}

    for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
        # Collect raw images and ground-truths
@@ -134,30 +123,46 @@ def test_net(net, server):
                cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0]
                cls_boxes = detections[cls_indices, 0:4]
                cls_scores = detections[cls_indices, 4]
-                cls_dets = np.hstack((
-                    cls_boxes, cls_scores[:, np.newaxis])).\
-                        astype(np.float32, copy=False)
+                cls_detections = np.hstack((
+                    cls_boxes, cls_scores[:, np.newaxis])) \
+                    .astype(np.float32, copy=False)
                if cfg.TEST.USE_SOFT_NMS:
-                    keep = soft_nms(cls_dets, cfg.TEST.NMS,
+                    keep = soft_nms(
+                        cls_detections,
+                        cfg.TEST.NMS,
                        method=cfg.TEST.SOFT_NMS_METHOD,
-                        sigma=cfg.TEST.SOFT_NMS_SIGMA)
-                else: keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True)
-                cls_dets = cls_dets[keep, :]
-                all_boxes[j][i] = cls_dets
-                boxes_this_image.append(cls_dets)
+                        sigma=cfg.TEST.SOFT_NMS_SIGMA,
+                    )
+                else:
+                    keep = nms(
+                        cls_detections,
+                        cfg.TEST.NMS,
+                        force_cpu=True,
+                    )
+                cls_detections = cls_detections[keep, :]
+                all_boxes[j][i] = cls_detections
+                boxes_this_image.append(cls_detections)

            if cfg.VIS or cfg.VIS_ON_FILE:
-                vis_one_image(raw_images[item_idx], classes, boxes_this_image,
-                    thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
-                        filename=server.get_save_filename(image_ids[item_idx]))
+                vis_one_image(
+                    raw_images[item_idx],
+                    classes,
+                    boxes_this_image,
+                    thresh=cfg.VIS_TH,
+                    box_alpha=1.,
+                    show_class=True,
+                    filename=server.get_save_filename(image_ids[item_idx]),
+                )

            # Limit to max_per_image detections *over all classes*
            if cfg.TEST.DETECTIONS_PER_IM > 0:
                image_scores = []
                for j in range(1, num_classes):
-                    if len(all_boxes[j][i]) < 1: continue
+                    if len(all_boxes[j][i]) < 1:
+                        continue
                    image_scores.append(all_boxes[j][i][:, -1])
-                if len(image_scores) > 0: image_scores = np.hstack(image_scores)
+                if len(image_scores) > 0:
+                    image_scores = np.hstack(image_scores)
                if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
                    image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
                    for j in range(1, num_classes):
@@ -165,7 +170,7 @@ def test_net(net, server):
                        all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

-        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
+        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
              .format(batch_idx + cfg.TEST.IMS_PER_BATCH,
                      num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time), end='')

--- a/lib/ssd/__init__.py
+++ b/lib/ssd/__init__.py
@@ -14,7 +14,7 @@ from __future__ import division
 from __future__ import print_function

 from lib.ssd.layers.data_layer import DataLayer
-from lib.ssd.layers.prior_box_layer import PriorBoxLayer
-from lib.ssd.layers.multibox_match_layer import MultiBoxMatchLayer
 from lib.ssd.layers.hard_mining_layer import HardMiningLayer
-from lib.ssd.layers.multibox_target_layer import MultiBoxTargetLayer
\ No newline at end of file
+from lib.ssd.layers.multibox_layer import MultiBoxMatchLayer
+from lib.ssd.layers.multibox_layer import MultiBoxTargetLayer
+from lib.ssd.layers.priorbox_layer import PriorBoxLayer
--- a/lib/ssd/data/blob_fetcher.py
+++ b/lib/ssd/data/blob_fetcher.py
@@ -13,32 +13,39 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import multiprocessing
 import numpy as np
-from multiprocessing import Process
+
 from lib.core.config import cfg


-class BlobFetcher(Process):
+class BlobFetcher(multiprocessing.Process):
    def __init__(self, **kwargs):
        super(BlobFetcher, self).__init__()
-        self.Q_in = self.Q_out = None
+        self._img_blob_size = (
+            cfg.TRAIN.IMS_PER_BATCH,
+            cfg.SSD.RESIZE.HEIGHT,
+            cfg.SSD.RESIZE.WIDTH, 3,
+        )
+        self.q_in = self.q_out = None
        self.daemon = True

    def get(self):
-        num_images = cfg.TRAIN.IMS_PER_BATCH
-        target_h = cfg.SSD.RESIZE.HEIGHT; target_w = cfg.SSD.RESIZE.WIDTH
-        ims_blob = np.zeros(shape=(num_images, target_h, target_w, 3), dtype=np.uint8)
-        gt_boxes_wide = []
-        for ix in range(cfg.TRAIN.IMS_PER_BATCH):
-            im, gt_boxes = self.Q_in.get()
-            ims_blob[ix, :, :, :] = im
-            # Encode boxes by adding the idx of images
-            im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), dtype=np.float32)
-            im_boxes[:, 0:gt_boxes.shape[1]] = gt_boxes
-            im_boxes[:, -1] = ix
-            gt_boxes_wide.append(im_boxes)
-
-        return {'data': ims_blob, 'gt_boxes': np.concatenate(gt_boxes_wide, axis=0)}
+        img_blob, boxes_blob = np.zeros(self._img_blob_size, 'uint8'), []
+
+        for i in range(cfg.TRAIN.IMS_PER_BATCH):
+            img_blob[i], gt_boxes = self.q_in.get()
+            # Pack the boxes by adding the index of images
+            boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), np.float32)
+            boxes[:, :gt_boxes.shape[1]] = gt_boxes
+            boxes[:, -1] = i
+            boxes_blob.append(boxes)
+
+        return {
+            'data': img_blob,
+            'gt_boxes': np.concatenate(boxes_blob, 0),
+        }

    def run(self):
-        while True: self.Q_out.put(self.get())
\ No newline at end of file
+        while True:
+            self.q_out.put(self.get())
--- a/lib/ssd/data/preprocessing/cat.jpg
+++ b/lib/ssd/data/preprocessing/cat.jpg
--- a/lib/ssd/data/data_batch.py
+++ b/lib/ssd/data/data_batch.py
@@ -13,15 +13,16 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+from multiprocessing import Queue
 import time
+
+import dragon
 import pprint
-from multiprocessing import Queue
-import dragon.core.mpi as mpi

-import lib.utils.logger as logger
 from lib.faster_rcnn.data.data_reader import DataReader
 from lib.ssd.data.data_transformer import DataTransformer
 from lib.ssd.data.blob_fetcher import BlobFetcher
+from lib.utils import logger


 class DataBatch(object):
@@ -52,19 +53,20 @@ class DataBatch(object):
        super(DataBatch, self).__init__()
        # Init mpi
        global_rank, local_rank, group_size = 0, 0, 1
-        if mpi.Is_Init():
-            idx, group = mpi.AllowParallel()
-            if idx != -1:  # DataParallel
-                global_rank = mpi.Rank()
+        if dragon.mpi.is_init():
+            group = dragon.mpi.is_parallel()
+            if group is not None:  # DataParallel
+                global_rank = dragon.mpi.rank()
                group_size = len(group)
                for i, node in enumerate(group):
-                    if global_rank == node: local_rank = i
+                    if global_rank == node:
+                        local_rank = i
        kwargs['group_size'] = group_size

        # Configuration
        self._prefetch = kwargs.get('prefetch', 5)
        self._batch_size = kwargs.get('batch_size', 32)
-        self._num_readers = kwargs.get( 'num_readers', 1)
+        self._num_readers = kwargs.get('num_readers', 1)
        self._num_transformers = kwargs.get('num_transformers', -1)
        self._max_transformers = kwargs.get('max_transformers', 3)
        self._num_fetchers = kwargs.get('num_fetchers', 1)
@@ -84,7 +86,7 @@ class DataBatch(object):
        self._readers = []
        for i in range(self._num_readers):
            self._readers.append(DataReader(**kwargs))
-            self._readers[-1].Q_out = self.Q1
+            self._readers[-1].q_out = self.Q1

        for i in range(self._num_readers):
            part_idx, num_parts = i, self._num_readers
@@ -101,8 +103,8 @@ class DataBatch(object):
        for i in range(self._num_transformers):
            transformer = DataTransformer(**kwargs)
            transformer._rng_seed += (i + local_rank * self._num_transformers)
-            transformer.Q_in = self.Q1
-            transformer.Q_out = self.Q2
+            transformer.q_in = self.Q1
+            transformer.q_out = self.Q2
            transformer.start()
            self._transformers.append(transformer)
            time.sleep(0.1)
@@ -111,14 +113,16 @@ class DataBatch(object):
        self._fetchers = []
        for i in range(self._num_fetchers):
            fetcher = BlobFetcher(**kwargs)
-            fetcher.Q_in = self.Q2
-            fetcher.Q_out = self.Q3
+            fetcher.q_in = self.Q2
+            fetcher.q_out = self.Q3
            fetcher.start()
            self._fetchers.append(fetcher)
            time.sleep(0.1)

        # Prevent to echo multiple nodes
-        if local_rank == 0: self.echo()
+        if local_rank == 0:
+            self.echo()
+
        def cleanup():
            def terminate(processes):
                for process in processes:
@@ -130,6 +134,7 @@ class DataBatch(object):
            logger.info('Terminating DataTransformer ......')
            terminate(self._readers)
            logger.info('Terminating DataReader......')
+
        import atexit
        atexit.register(cleanup)

@@ -145,13 +150,7 @@ class DataBatch(object):
        return self.Q3.get()

    def echo(self):
-        """Print I/O Information.
-
-        Returns
-        -------
-        None
-
-        """
+        """Print I/O Information."""
        print('---------------------------------------------------------')
        print('BatchFetcher({} Threads), Using config:'.format(
            self._num_readers + self._num_transformers + self._num_fetchers))

--- a/lib/ssd/data/data_transformer.py
+++ b/lib/ssd/data/data_transformer.py
@@ -14,34 +14,34 @@ from __future__ import division
 from __future__ import print_function

 import cv2
+import multiprocessing
 import numpy as np
-import numpy.random as npr
-from multiprocessing import Process

 from lib.core.config import cfg
 from lib.proto import anno_pb2 as pb
-from lib.ssd.data.preprocessing import *
-import lib.utils.logger as logger
+from lib.ssd.data import transforms
+from lib.utils import logger


-class DataTransformer(Process):
+class DataTransformer(multiprocessing.Process):
    def __init__(self, **kwargs):
        super(DataTransformer, self).__init__()
-        self._distorter = Distortor()
-        self._expander = Expander()
-        self._sampler = Sampler(cfg.SSD.SAMPLERS)
-        self._resizer = Resizer()
        self._rng_seed = cfg.RNG_SEED
        self._mirror = cfg.TRAIN.USE_FLIPPED
        self._use_diff = cfg.TRAIN.USE_DIFF
        self._classes = kwargs.get('classes', ('__background__',))
        self._num_classes = len(self._classes)
        self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
-        self._queues = []
-        self.Q_in = self.Q_out = None
+        self._image_aug = transforms.Compose(
+            transforms.Distort(),  # Color augmentation
+            transforms.Expand(),   # Expand and padding
+            transforms.Sample(),   # Sample a patch randomly
+            transforms.Resize(),   # Resize to a fixed scale
+        )
+        self.q_in = self.q_out = None
        self.daemon = True

-    def make_roidb(self, ann_datum, flip=False):
+    def make_roi_dict(self, ann_datum, flip=False):
        annotations = ann_datum.annotation
        n_objects = 0
        if not self._use_diff:
@@ -49,7 +49,7 @@ class DataTransformer(Process):
                if not ann.difficult: n_objects += 1
        else: n_objects = len(annotations)

-        roidb = {
+        roi_dict = {
            'width': ann_datum.datum.width,
            'height': ann_datum.datum.height,
            'gt_classes': np.zeros((n_objects,), dtype=np.int32),
@@ -57,75 +57,82 @@ class DataTransformer(Process):
            'normalized_boxes': np.zeros((n_objects, 4), dtype=np.float32),
        }

-        ix = 0
+        rec_idx = 0
        for ann in annotations:
-            if not self._use_diff and ann.difficult: continue
-            roidb['boxes'][ix, :] = [
-                max(0, ann.x1), max(0, ann.y1),
+            if not self._use_diff and ann.difficult:
+                continue
+            roi_dict['boxes'][rec_idx, :] = [
+                max(0, ann.x1),
+                max(0, ann.y1),
                min(ann.x2, ann_datum.datum.width - 1),
-                min(ann.y2, ann_datum.datum.height - 1)]
-            roidb['gt_classes'][ix] = self._class_to_ind[ann.name]
-            ix += 1
+                min(ann.y2, ann_datum.datum.height - 1),
+            ]
+            roi_dict['gt_classes'][rec_idx] = \
+                self._class_to_ind[ann.name]
+            rec_idx += 1

-        if flip: roidb['boxes'] = _flip_boxes(roidb['boxes'], roidb['width'])
-        roidb['normalized_boxes'][:, 0::2] = roidb['boxes'][:, 0::2] / float(roidb['width'])
-        roidb['normalized_boxes'][:, 1::2] = roidb['boxes'][:, 1::2] / float(roidb['height'])
+        if flip:
+            roi_dict['boxes'] = _flip_boxes(
+                roi_dict['boxes'], roi_dict['width'])

-        return roidb
+        roi_dict['boxes'][:, 0::2] /= roi_dict['width']
+        roi_dict['boxes'][:, 1::2] /= roi_dict['height']
+
+        return roi_dict

    def get(self, serialized):
        ann_datum = pb.AnnotatedDatum()
        ann_datum.ParseFromString(serialized)
-        im_datum = ann_datum.datum
-        im = np.fromstring(im_datum.data, np.uint8)
-        if im_datum.encoded is True: im = cv2.imdecode(im, -1)
-        else: im = im.reshape((im_datum.height, im_datum.width, im_datum.channels))
+        img_datum = ann_datum.datum
+        img = np.fromstring(img_datum.data, np.uint8)
+        if img_datum.encoded is True:
+            img = cv2.imdecode(img, -1)
+        else:
+            h, w = img_datum.height, img_datum.width
+            img = img.reshape((h, w, img_datum.channels))

        # Flip
        flip = False
        if self._mirror:
-            if npr.randint(0, 2) > 0:
-                im = im[:, ::-1, :]
+            if np.random.randint(0, 2) > 0:
+                img = img[:, ::-1, :]
                flip = True

        # Datum -> RoIDB
-        roidb = self.make_roidb(ann_datum, flip)
+        roi_dict = self.make_roi_dict(ann_datum, flip)

        # Post-Process for gt boxes
        # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
-        gt_boxes = np.empty((len(roidb['gt_classes']), 5), dtype=np.float32)
-        gt_boxes[:, 0:4] = roidb['normalized_boxes']
-        gt_boxes[:, 4] = roidb['gt_classes']
+        gt_boxes = np.empty((len(roi_dict['gt_classes']), 5), 'float32')
+        gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']

        # Distort => Expand => Sample => Resize
-        im = self._distorter.distort_image(im)
-        im, gt_boxes = self._expander.expand_image(im, gt_boxes)
-        im, gt_boxes = self._sampler.sample_image(im, gt_boxes)
-        im = self._resizer.resize_image(im)
+        img, gt_boxes = self._image_aug(img, gt_boxes)

-        # Modify gt boxes to the blob scale
+        # Restore to the blob scale
        gt_boxes[:, 0] *= cfg.SSD.RESIZE.WIDTH
        gt_boxes[:, 1] *= cfg.SSD.RESIZE.HEIGHT
        gt_boxes[:, 2] *= cfg.SSD.RESIZE.WIDTH
        gt_boxes[:, 3] *= cfg.SSD.RESIZE.HEIGHT

-        return im, gt_boxes
+        return img, gt_boxes

    def run(self):
-        npr.seed(self._rng_seed)
+        np.random.seed(self._rng_seed)
        while True:
-            serialized = self.Q_in.get()
+            serialized = self.q_in.get()
            im, gt_boxes = self.get(serialized)
-            if len(gt_boxes) < 1: continue
-            self.Q_out.put((im, gt_boxes))
+            if len(gt_boxes) < 1:
+                continue
+            self.q_out.put((im, gt_boxes))


 def _flip_boxes(boxes, width):
    flip_boxes = boxes.copy()
-    oldx1 = boxes[:, 0].copy()
-    oldx2 = boxes[:, 2].copy()
-    flip_boxes[:, 0] = width - oldx2 - 1
-    flip_boxes[:, 2] = width - oldx1 - 1
+    old_x1 = boxes[:, 0].copy()
+    old_x2 = boxes[:, 2].copy()
+    flip_boxes[:, 0] = width - old_x2 - 1
+    flip_boxes[:, 2] = width - old_x1 - 1
    if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
        logger.fatal('Encounter invalid coordinates after flipping boxes.')
    return flip_boxes
--- a/lib/ssd/data/preprocessing/distort.py
+++ b/lib/ssd/data/preprocessing/distort.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import cv2
-import PIL.Image
-import PIL.ImageEnhance
-import numpy as np
-import numpy.random as npr
-
-from lib.core.config import cfg
-
-
-class Distortor(object):
-    def __init__(self):
-        self._brightness_prob = cfg.SSD.DISTORT.BRIGHTNESS_PROB
-        self._brightness_delta = 0.3
-        self._contrast_prob = cfg.SSD.DISTORT.CONTRAST_PROB
-        self._contrast_delta = 0.3
-        self._saturation_prob = cfg.SSD.DISTORT.SATURATION_PROB
-        self._saturation_delta = 0.3
-
-    def distort_image(self, im):
-        im = PIL.Image.fromarray(im)
-        if npr.uniform() < self._brightness_prob:
-            delta_brightness = npr.uniform(-self._brightness_delta, self._brightness_delta) + 1.0
-            im = PIL.ImageEnhance.Brightness(im)
-            im = im.enhance(delta_brightness)
-        if npr.uniform() < self._contrast_prob:
-            delta_contrast = npr.uniform(-self._contrast_delta, self._contrast_delta) + 1.0
-            im = PIL.ImageEnhance.Contrast(im)
-            im = im.enhance(delta_contrast)
-        if npr.uniform() < self._saturation_prob:
-            delta_saturation = npr.uniform(-self._contrast_delta, self._contrast_delta) + 1.0
-            im = PIL.ImageEnhance.Color(im)
-            im = im.enhance(delta_saturation)
-        im = np.array(im)
-        return im
-
-
-if __name__ == '__main__':
-    distortor = Distortor()
-
-    while True:
-        im = cv2.imread('cat.jpg')
-        im = distortor.distort_image(im)
-        cv2.imshow('Distort', im)
-        cv2.waitKey(0)
\ No newline at end of file
--- a/lib/ssd/data/preprocessing/expand.py
+++ b/lib/ssd/data/preprocessing/expand.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import cv2
-import numpy.random as npr
-import numpy as np
-import math
-
-from lib.core.config import cfg
-import lib.utils.logger as logger
-
-
-class Expander(object):
-    def __init__(self, **params):
-        self._expand_prob = cfg.SSD.EXPAND.PROB
-        self._max_expand_ratio = cfg.SSD.EXPAND.MAX_RATIO
-        if self._max_expand_ratio < 1.0:
-            logger.fatal('The max expand ratio must >= 1.0, got {}'.format(self._max_expand_ratio))
-
-    def expand_image(self, im, gt_boxes=None):
-        prob = npr.uniform()
-        if prob > self._expand_prob : return im, gt_boxes
-        ratio = npr.uniform(1.0, self._max_expand_ratio)
-        if ratio == 1: return im, gt_boxes
-
-        im_h = im.shape[0]
-        im_w = im.shape[1]
-        expand_h = int(im_h * ratio)
-        expand_w = int(im_w * ratio)
-        h_off = int(math.floor(npr.uniform(0.0, expand_h - im_h)))
-        w_off = int(math.floor(npr.uniform(0.0, expand_w - im_w)))
-
-        new_im = np.empty((expand_h, expand_w, 3), dtype=np.uint8)
-        new_im[:] = cfg.PIXEL_MEANS
-        new_im[h_off : h_off + im_h, w_off : w_off + im_w, :] = im
-
-        if gt_boxes is not None:
-            ex_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
-            ex_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w + w_off) / expand_w
-            ex_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h + h_off) / expand_h
-            ex_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w + w_off) / expand_w
-            ex_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h + h_off) / expand_h
-            return new_im, ex_gt_boxes
-
-        return new_im, gt_boxes
-
-
-if __name__ == '__main__':
-    expander = Expander()
-
-    while True:
-        im = cv2.imread('cat.jpg')
-        gt_boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
-        im, gt_boxes = expander.expand_image(im, gt_boxes)
-        x1 = int(gt_boxes[0][0] * im.shape[1])
-        y1 = int(gt_boxes[0][1] * im.shape[0])
-        x2 = int(gt_boxes[0][2] * im.shape[1])
-        y2 = int(gt_boxes[0][3] * im.shape[0])
-        cv2.rectangle(im, (x1, y1), (x2, y2), (188,119,64), 2)
-        cv2.imshow('Expand', im)
-        cv2.waitKey(0)
\ No newline at end of file
--- a/lib/ssd/data/preprocessing/resize.py
+++ b/lib/ssd/data/preprocessing/resize.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import cv2
-import numpy.random as npr
-
-from lib.core.config import cfg
-
-
-class Resizer(object):
-    def __init__(self):
-        self._re_height = cfg.SSD.RESIZE.HEIGHT
-        self._re_width = cfg.SSD.RESIZE.WIDTH
-        interp_list = {
-            'LINEAR': cv2.INTER_LINEAR,
-            'AREA': cv2.INTER_AREA,
-            'NEAREST': cv2.INTER_NEAREST,
-            'CUBIC': cv2.INTER_CUBIC,
-            'LANCZOS4': cv2.INTER_LANCZOS4,
-        }
-        interp_mode = cfg.SSD.RESIZE.INTERP_MODE
-        self._interp_mode = [interp_list[key] for key in interp_mode]
-
-    def resize_image(self, im):
-        rand = npr.randint(0, len(self._interp_mode))
-        return cv2.resize(
-            im, (self._re_width, self._re_height),
-                interpolation=self._interp_mode[rand])
\ No newline at end of file
--- a/lib/ssd/data/preprocessing/sample.py
+++ b/lib/ssd/data/preprocessing/sample.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import numpy.random as npr
-
-from lib.utils.bbox_transform import clip_boxes
-from lib.utils.boxes import iou
-import lib.utils.logger as logger
-
-
-class Sampler(object):
-    def __init__(self, samplers):
-        if not isinstance(samplers, list): samplers = [samplers]
-        self._samplers = []
-        for sampler in samplers:
-            if len(sampler) != 8:
-                logger.fatal('The sample params should be a tuple of length 8.')
-            sample_param = {
-                'min_scale': sampler[0],
-                'max_scale': sampler[1],
-                'min_aspect_ratio': sampler[2],
-                'max_aspect_ratio': sampler[3],
-                'min_jaccard_overlap': sampler[4],
-                'max_jaccard_overlap': sampler[5],
-                'max_trials': sampler[6],
-                'max_sample': sampler[7]}
-            self._samplers.append(sample_param)
-
-    def _compute_overlaps(self, rand_box, gt_boxes):
-        return iou(np.expand_dims(rand_box, 0), gt_boxes[:, 0:4])
-
-    def _generate_sample(self, sample_param):
-        min_scale = sample_param.get('min_scale', 1.0)
-        max_scale = sample_param.get('max_scale', 1.0)
-        scale = npr.uniform(min_scale, max_scale)
-        min_aspect_ratio = sample_param.get('min_aspect_ratio', 1.0)
-        max_aspect_ratio = sample_param.get('max_aspect_ratio', 1.0)
-        min_aspect_ratio = max(min_aspect_ratio, scale**2)
-        max_aspect_ratio = min(max_aspect_ratio, 1.0 / (scale**2))
-        aspect_ratio = npr.uniform(min_aspect_ratio, max_aspect_ratio)
-        bbox_w = scale * (aspect_ratio ** 0.5)
-        bbox_h = scale / (aspect_ratio ** 0.5)
-        w_off = npr.uniform(0.0, float(1 - bbox_w))
-        h_off = npr.uniform(0.0, float(1 - bbox_h))
-        return np.array([w_off, h_off, w_off + bbox_w, h_off + bbox_h])
-
-    def _check_satisfy(self, sample_box, gt_boxes, constraint):
-        min_jaccard_overlap = constraint.get('min_jaccard_overlap', None)
-        max_jaccard_overlap = constraint.get('max_jaccard_overlap', None)
-        if min_jaccard_overlap == None and \
-            max_jaccard_overlap == None:
-            return True
-
-        max_overlap = self._compute_overlaps(sample_box, gt_boxes).max()
-        if min_jaccard_overlap is not None:
-            if max_overlap < min_jaccard_overlap: return False
-        if max_jaccard_overlap is not None:
-            if max_overlap > max_jaccard_overlap: return False
-
-        return True
-
-    def _generate_batch_samples(self, gt_boxes):
-        sample_boxes = []
-        for sampler in self._samplers:
-            found = 0
-            for i in range(sampler['max_trials']):
-                if found >= sampler['max_sample']: break
-                sample_box = self._generate_sample(sampler)
-                if sampler['min_jaccard_overlap'] != 0.0 or \
-                    sampler['max_jaccard_overlap'] != 1.0:
-                    ok = self._check_satisfy(sample_box, gt_boxes, sampler)
-                    if not ok: continue
-                found += 1
-                sample_boxes.append(sample_box)
-        return sample_boxes
-
-    def _rand_crop(self, im, rand_box, gt_boxes=None):
-        im_h = im.shape[0]
-        im_w = im.shape[1]
-        w_off = int(rand_box[0] * im_w)
-        h_off = int(rand_box[1] * im_h)
-        crop_w = int((rand_box[2] - rand_box[0]) * im_w)
-        crop_h = int((rand_box[3] - rand_box[1]) * im_h)
-
-        new_im = im[h_off: h_off + crop_h, w_off: w_off + crop_w, :]
-
-        if gt_boxes is not None:
-            ctr_x = (gt_boxes[:, 2] + gt_boxes[:, 0]) / 2.0
-            ctr_y = (gt_boxes[:, 3] + gt_boxes[:, 1]) / 2.0
-            # Keep the ground-truth box whose center is in the sample box
-            # Implement ``EmitConstraint.CENTER`` in the original SSD
-            keep_inds = np.where((ctr_x >= rand_box[0]) & (ctr_x <= rand_box[2])
-                                 & (ctr_y >= rand_box[1]) & (ctr_y <= rand_box[3]))[0]
-            gt_boxes = gt_boxes[keep_inds]
-            new_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
-            new_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w - w_off)
-            new_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h - h_off)
-            new_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w - w_off)
-            new_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h - h_off)
-            new_gt_boxes = clip_boxes(new_gt_boxes, (crop_h, crop_w))
-            new_gt_boxes[:, 0] = new_gt_boxes[:, 0] / crop_w
-            new_gt_boxes[:, 1] = new_gt_boxes[:, 1] / crop_h
-            new_gt_boxes[:, 2] = new_gt_boxes[:, 2] / crop_w
-            new_gt_boxes[:, 3] = new_gt_boxes[:, 3] / crop_h
-
-            return new_im, new_gt_boxes
-
-        return new_im, gt_boxes
-
-    def sample_image(self, im, gt_boxes):
-        sample_boxes = self._generate_batch_samples(gt_boxes)
-        if len(sample_boxes) > 0:
-            # Apply sampling if found at least one valid sample box
-            # Then randomly pick one
-            sample_idx = npr.randint(0, len(sample_boxes))
-            rand_box = sample_boxes[sample_idx]
-            im, gt_boxes = self._rand_crop(im, rand_box, gt_boxes)
-        return im, gt_boxes
\ No newline at end of file
--- a/lib/ssd/data/preprocessing/sample_test.py
+++ b/lib/ssd/data/preprocessing/sample_test.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import cv2
-import numpy as np
-import numpy.random as npr
-npr.seed(3)
-import sys
-sys.path.append('../../')
-
-from resize import Resizer
-from expand import Expander
-from distort import Distortor
-from sample import Sampler
-
-from lib.core.config import cfg
-
-if __name__ == '__main__':
-    distorter = Distortor()
-    expander = Expander()
-    sampler = Sampler(cfg.SSD.SAMPLERS)
-    resizer = Resizer()
-
-    while True:
-        im = cv2.imread('cat.jpg')
-        gt_boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
-        im = distorter.distort_image(im)
-        im, gt_boxes = expander.expand_image(im, gt_boxes)
-        im, gt_boxes = sampler.sample_image(im, gt_boxes)
-        if len(gt_boxes) < 1: continue
-        im = resizer.resize_image(im)
-        for gt_box in gt_boxes:
-            x1 = int(gt_box[0] * im.shape[1])
-            y1 = int(gt_box[1] * im.shape[0])
-            x2 = int(gt_box[2] * im.shape[1])
-            y2 = int(gt_box[3] * im.shape[0])
-            cv2.rectangle(im, (x1, y1), (x2, y2), (188, 119, 64), 2)
-            print(x1, y1, x2, y2)
-        cv2.imshow('Sample', im)
-        cv2.waitKey(0)
\ No newline at end of file
--- a/lib/ssd/data/transforms.py
+++ b/lib/ssd/data/transforms.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import math
+
+import cv2
+import PIL.Image
+import PIL.ImageEnhance
+import numpy as np
+import numpy.random as npr
+
+from lib.core.config import cfg
+from lib.utils import logger
+from lib.utils.boxes import clip_tiled_boxes
+from lib.utils.boxes import iou
+
+
+class Compose(object):
+    """Compose the several transforms together."""
+    def __init__(self, *transforms):
+        self.transforms = transforms
+
+    def __call__(self, img, boxes):
+        for transform in self.transforms:
+            img, boxes = transform.apply(img, boxes)
+        return img, boxes
+
+
+class Distort(object):
+    def __init__(self):
+        self._brightness_prob = cfg.SSD.DISTORT.BRIGHTNESS_PROB
+        self._contrast_prob = cfg.SSD.DISTORT.CONTRAST_PROB
+        self._saturation_prob = cfg.SSD.DISTORT.SATURATION_PROB
+
+    def apply(self, img, boxes=None):
+        img = PIL.Image.fromarray(img)
+        if npr.uniform() < self._brightness_prob:
+            delta = npr.uniform(-0.3, 0.3) + 1.
+            img = PIL.ImageEnhance.Brightness(img)
+            img = img.enhance(delta)
+        if npr.uniform() < self._contrast_prob:
+            delta = npr.uniform(-0.3, 0.3) + 1.
+            img = PIL.ImageEnhance.Contrast(img)
+            img = img.enhance(delta)
+        if npr.uniform() < self._saturation_prob:
+            delta = npr.uniform(-0.3, 0.3) + 1.
+            img = PIL.ImageEnhance.Color(img)
+            img = img.enhance(delta)
+        return np.array(img), boxes
+
+
+class Expand(object):
+    def __init__(self):
+        self._expand_prob = cfg.SSD.EXPAND.PROB
+        self._max_ratio = cfg.SSD.EXPAND.MAX_RATIO
+        if self._max_ratio < 1.0:
+            logger.fatal(
+                'The max expand ratio must >= 1, got {}'
+                .format(self._max_ratio)
+            )
+
+    def apply(self, img, boxes=None):
+        prob = npr.uniform()
+        if prob > self._expand_prob:
+            return img, boxes
+        ratio = npr.uniform(1., self._max_ratio)
+        if ratio == 1:
+            return img, boxes
+
+        im_h, im_w = img.shape[:2]
+        expand_h, expand_w = int(im_h * ratio), int(im_w * ratio)
+        h_off = int(math.floor(npr.uniform(0., expand_h - im_h)))
+        w_off = int(math.floor(npr.uniform(0., expand_w - im_w)))
+
+        new_img = np.empty((expand_h, expand_w, 3), dtype=np.uint8)
+        new_img[:] = cfg.PIXEL_MEANS
+        new_img[h_off:h_off + im_h, w_off:w_off + im_w, :] = img
+
+        if boxes is not None:
+            new_boxes = boxes.astype(boxes.dtype, copy=True)
+            new_boxes[:, 0] = (boxes[:, 0] * im_w + w_off) / expand_w
+            new_boxes[:, 1] = (boxes[:, 1] * im_h + h_off) / expand_h
+            new_boxes[:, 2] = (boxes[:, 2] * im_w + w_off) / expand_w
+            new_boxes[:, 3] = (boxes[:, 3] * im_h + h_off) / expand_h
+            boxes = new_boxes
+
+        return new_img, boxes
+
+
+class Resize(object):
+    def __init__(self):
+        self._target_size = (
+            cfg.SSD.RESIZE.WIDTH,
+            cfg.SSD.RESIZE.HEIGHT,
+        )
+        interp_list = {
+            'LINEAR': cv2.INTER_LINEAR,
+            'AREA': cv2.INTER_AREA,
+            'NEAREST': cv2.INTER_NEAREST,
+            'CUBIC': cv2.INTER_CUBIC,
+            'LANCZOS4': cv2.INTER_LANCZOS4,
+        }
+        interp_mode = cfg.SSD.RESIZE.INTERP_MODE
+        self._interp_mode = [interp_list[key] for key in interp_mode]
+
+    def apply(self, img, boxes):
+        rand = npr.randint(len(self._interp_mode))
+        return cv2.resize(
+            img, self._target_size,
+            interpolation=self._interp_mode[rand],
+        ), boxes
+
+
+class Sample(object):
+    def __init__(self):
+        samplers = cfg.SSD.SAMPLERS
+        if not isinstance(samplers, collections.Iterable):
+            samplers = [samplers]
+        self._samplers = []
+        for sampler in samplers:
+            if len(sampler) != 8:
+                logger.fatal('The sample params should be a tuple of length 8.')
+            sample_param = {
+                'min_scale': sampler[0],
+                'max_scale': sampler[1],
+                'min_aspect_ratio': sampler[2],
+                'max_aspect_ratio': sampler[3],
+                'min_overlap': sampler[4],
+                'max_overlap': sampler[5],
+                'max_trials': sampler[6],
+                'max_sample': sampler[7],
+            }
+            self._samplers.append(sample_param)
+
+    @classmethod
+    def _compute_overlaps(cls, rand_box, gt_boxes):
+        return iou(np.expand_dims(rand_box, 0), gt_boxes[:, 0:4])
+
+    @classmethod
+    def _generate_sample(cls, sample_param):
+        min_scale = sample_param.get('min_scale', 1.)
+        max_scale = sample_param.get('max_scale', 1.)
+        scale = npr.uniform(min_scale, max_scale)
+        min_aspect_ratio = sample_param.get('min_aspect_ratio', 1.)
+        max_aspect_ratio = sample_param.get('max_aspect_ratio', 1.)
+        min_aspect_ratio = max(min_aspect_ratio, scale**2)
+        max_aspect_ratio = min(max_aspect_ratio, 1. / (scale**2))
+        aspect_ratio = npr.uniform(min_aspect_ratio, max_aspect_ratio)
+        bbox_w = scale * (aspect_ratio ** 0.5)
+        bbox_h = scale / (aspect_ratio ** 0.5)
+        w_off = npr.uniform(0., 1. - bbox_w)
+        h_off = npr.uniform(0., 1. - bbox_h)
+        return np.array([w_off, h_off, w_off + bbox_w, h_off + bbox_h])
+
+    def _check_satisfy(self, sample_box, gt_boxes, constraint):
+        min_overlap = constraint.get('min_overlap', None)
+        max_overlap = constraint.get('max_overlap', None)
+        if min_overlap is None and \
+                max_overlap is None:
+            return True
+        max_overlap = self._compute_overlaps(sample_box, gt_boxes).max()
+        if min_overlap is not None:
+            if max_overlap < min_overlap:
+                return False
+        if max_overlap is not None:
+            if max_overlap > max_overlap:
+                return False
+        return True
+
+    def _generate_batch_samples(self, gt_boxes):
+        sample_boxes = []
+        for sampler in self._samplers:
+            found = 0
+            for i in range(sampler['max_trials']):
+                if found >= sampler['max_sample']:
+                    break
+                sample_box = self._generate_sample(sampler)
+                if sampler['min_overlap'] != 0. or \
+                        sampler['max_overlap'] != 1.:
+                    ok = self._check_satisfy(sample_box, gt_boxes, sampler)
+                    if not ok:
+                        continue
+                found += 1
+                sample_boxes.append(sample_box)
+        return sample_boxes
+
+    @classmethod
+    def _rand_crop(cls, im, rand_box, gt_boxes=None):
+        im_h, im_w = im.shape[:2]
+        w_off = int(rand_box[0] * im_w)
+        h_off = int(rand_box[1] * im_h)
+        crop_w = int((rand_box[2] - rand_box[0]) * im_w)
+        crop_h = int((rand_box[3] - rand_box[1]) * im_h)
+        new_im = im[h_off:h_off + crop_h, w_off:w_off + crop_w, :]
+
+        if gt_boxes is not None:
+            ctr_x = (gt_boxes[:, 2] + gt_boxes[:, 0]) / 2.0
+            ctr_y = (gt_boxes[:, 3] + gt_boxes[:, 1]) / 2.0
+            # Keep the ground-truth box whose center is in the sample box
+            # Implement ``EmitConstraint.CENTER`` in the original SSD
+            keep_inds = np.where((ctr_x >= rand_box[0]) & (ctr_x <= rand_box[2]) &
+                                 (ctr_y >= rand_box[1]) & (ctr_y <= rand_box[3]))[0]
+            gt_boxes = gt_boxes[keep_inds]
+            new_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
+            new_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w - w_off)
+            new_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h - h_off)
+            new_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w - w_off)
+            new_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h - h_off)
+            new_gt_boxes = clip_tiled_boxes(new_gt_boxes, (crop_h, crop_w))
+            new_gt_boxes[:, 0] = new_gt_boxes[:, 0] / crop_w
+            new_gt_boxes[:, 1] = new_gt_boxes[:, 1] / crop_h
+            new_gt_boxes[:, 2] = new_gt_boxes[:, 2] / crop_w
+            new_gt_boxes[:, 3] = new_gt_boxes[:, 3] / crop_h
+            return new_im, new_gt_boxes
+
+        return new_im, gt_boxes
+
+    def apply(self, img, boxes):
+        sample_boxes = self._generate_batch_samples(boxes)
+        if len(sample_boxes) > 0:
+            # Apply sampling if found at least one valid sample box
+            # Then randomly pick one
+            sample_idx = npr.randint(len(sample_boxes))
+            rand_box = sample_boxes[sample_idx]
+            img, boxes = self._rand_crop(img, rand_box, boxes)
+        return img, boxes
--- a/lib/ssd/data/transforms_test.py
+++ b/lib/ssd/data/transforms_test.py
--- a/lib/ssd/generate_anchors.py
+++ b/lib/ssd/generate_anchors.py
--- a/lib/ssd/layers/data_layer.py
+++ b/lib/ssd/layers/data_layer.py
--- a/lib/ssd/layers/hard_mining_layer.py
+++ b/lib/ssd/layers/hard_mining_layer.py
--- a/lib/ssd/layers/multibox_match_layer.py
+++ b/lib/ssd/layers/multibox_match_layer.py
--- a/lib/ssd/layers/multibox_target_layer.py
+++ b/lib/ssd/layers/multibox_target_layer.py
--- a/lib/ssd/layers/prior_box_layer.py
+++ b/lib/ssd/layers/prior_box_layer.py
--- a/lib/ssd/test.py
+++ b/lib/ssd/test.py
--- a/lib/utils/__init__.py
+++ b/lib/utils/__init__.py
--- a/lib/utils/attrdict.py
+++ b/lib/utils/attrdict.py
--- a/lib/utils/bbox_transform.py
+++ b/lib/utils/bbox_transform.py
--- a/lib/utils/blob.py
+++ b/lib/utils/blob.py
--- a/lib/utils/boxes.py
+++ b/lib/utils/boxes.py
--- a/lib/utils/colormap.py
+++ b/lib/utils/colormap.py
--- a/lib/utils/image.py
+++ b/lib/utils/image.py
--- a/lib/utils/logger.py
+++ b/lib/utils/logger.py
--- a/lib/utils/mask_transform.py
+++ b/lib/utils/mask_transform.py
--- a/lib/utils/stats.py
+++ b/lib/utils/stats.py
--- a/lib/utils/timer.py
+++ b/lib/utils/timer.py
--- a/lib/utils/vis.py
+++ b/lib/utils/vis.py
--- a/tools/deploy.py
+++ b/tools/deploy.py
--- a/tools/export.py
+++ b/tools/export.py
--- a/tools/mpi_train.py
+++ b/tools/mpi_train.py
--- a/tools/test.py
+++ b/tools/test.py
--- a/tools/test_all.py
+++ b/tools/test_all.py
--- a/tools/train.py
+++ b/tools/train.py