Adapt to SeetaRecord

Ting PAN
Commit f8359d17 authored Sep 29, 2019 by Ting PAN
Showing with 5417 additions and 6186 deletions
.gitignore
CHANGES
compile/make.sh
configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_1x.yml
configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_2x.yml
configs/faster_rcnn/voc_faster_rcnn_R-50-FPN.yml
configs/faster_rcnn/voc_faster_rcnn_VGG-16-C4.yml
configs/retinanet/coco_retinanet_400_R-50-FPN_1x.yml
configs/retinanet/coco_retinanet_400_R-50-FPN_4x.yml
configs/retinanet/voc_retinanet_300_AirNet-FPN.yml
configs/retinanet/voc_retinanet_300_R-18-FPN.yml
configs/retinanet/voc_retinanet_300_R-34-FPN.yml
configs/ssd/voc_ssd_300_AirNet-5b.yml
configs/ssd/voc_ssd_300_VGG-16.yml
database/__init__.py
database/frcnn/__init__.py
database/frcnn/pascal_voc/__init__.py
database/frcnn/pascal_voc/make_lmdb.py
database/frcnn/utils/__init__.py
database/frcnn/utils/make_from_dict.py
--- a/.gitignore
+++ b/.gitignore
@@ -47,4 +47,4 @@ __pycache__
 .idea

 # OSX dir files
-.DS_Store
\ No newline at end of file
+.DS_Store
--- a/CHANGES
+++ b/CHANGES
 ------------------------------------------------------------------------
 The list of most significant changes made over time in SeetaDet.

+SeetaDet 0.2.0 (20190929)
+
+Dragon Minimum Required (Version 0.3.0.dev20190929)
+
+Changes:
+
+Preview Features:
+
+- Use SeetaRecord instead of LMDB.
+
+- Flatten the implementation of layers.
+
+Bugs fixed:
+
+- None
+
+------------------------------------------------------------------------
+
 SeetaDet 0.1.2 (20190723)

 Dragon Minimum Required (Version 0.3.0.0)

--- a/compile/make.sh
+++ b/compile/make.sh
+#!/bin/sh
+
 # delete cache
 rm -r build install *.c *.cpp

-# compile proto files
-protoc -I ../lib/proto --python_out=../lib/proto ../lib/proto/anno.proto
-
 # compile cython modules
 python setup.py build_ext --inplace

 # compile cuda modules
-cd build
-cmake .. && make install && cd ..
+cd build && cmake .. && make install && cd ..

 # setup
 cp -r install/lib ../
--- a/configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_1x.yml
+++ b/configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_1x.yml
@@ -32,15 +32,15 @@ FRCNN:
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/R-101.Affine.pth'
-  DATABASE: '/data/coco_2014_trainval35k_lmdb'
+  WEIGHTS: '/model/R-101.Affine.pth'
+  DATABASE: '/data/coco_2014_trainval35k'
  IMS_PER_BATCH: 2
  USE_DIFF: False # Do not use crowd objects
  BATCH_SIZE: 512
  SCALES: [800]
  MAX_SIZE: 1333
 TEST:
-  DATABASE: '/data/coco_2014_minival_lmdb'
+  DATABASE: '/data/coco_2014_minival'
  JSON_FILE: '/data/instances_minival2014.json'
  PROTOCOL: 'coco'
  RPN_POST_NMS_TOP_N: 1000

--- a/configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_2x.yml
+++ b/configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_2x.yml
@@ -32,15 +32,15 @@ FRCNN:
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/R-101.Affine.pth'
-  DATABASE: '/data/coco_2014_trainval35k_lmdb'
+  WEIGHTS: '/model/R-101.Affine.pth'
+  DATABASE: '/data/coco_2014_trainval35k'
  IMS_PER_BATCH: 2
  USE_DIFF: False # Do not use crowd objects
  BATCH_SIZE: 512
  SCALES: [800]
  MAX_SIZE: 1333
 TEST:
-  DATABASE: '/data/coco_2014_minival_lmdb'
+  DATABASE: '/data/coco_2014_minival'
  JSON_FILE: '/data/instances_minival2014.json'
  PROTOCOL: 'coco'
  RPN_POST_NMS_TOP_N: 1000

--- a/configs/faster_rcnn/voc_faster_rcnn_R-50-FPN.yml
+++ b/configs/faster_rcnn/voc_faster_rcnn_R-50-FPN.yml
@@ -23,14 +23,14 @@ FRCNN:
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/R-50.Affine.pth'
-  DATABASE: '/data/voc_0712_trainval_lmdb'
+  WEIGHTS: '/model/R-50.Affine.pth'
+  DATABASE: '/data/voc_0712_trainval'
  IMS_PER_BATCH: 2
  BATCH_SIZE: 128
  SCALES: [600]
  MAX_SIZE: 1000
 TEST:
-  DATABASE: '/data/voc_2007_test_lmdb'
+  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
  RPN_POST_NMS_TOP_N: 1000
  SCALES: [600]

--- a/configs/faster_rcnn/voc_faster_rcnn_VGG-16-C4.yml
+++ b/configs/faster_rcnn/voc_faster_rcnn_VGG-16-C4.yml
@@ -28,15 +28,15 @@ FRCNN:
  ROI_XFORM_RESOLUTION: 7
  MLP_HEAD_DIM: 4096
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/VGG16.RCNN.pth'
-  DATABASE: '/data/voc_0712_trainval_lmdb'
+  WEIGHTS: '/model/VGG16.RCNN.pth'
+  DATABASE: '/data/voc_0712_trainval'
  RPN_MIN_SIZE: 16
  IMS_PER_BATCH: 2
  BATCH_SIZE: 128
  SCALES: [600]
  MAX_SIZE: 1000
 TEST:
-  DATABASE: '/data/voc_2007_test_lmdb'
+  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
  RPN_MIN_SIZE: 16
  RPN_POST_NMS_TOP_N: 300

--- a/configs/retinanet/coco_retinanet_400_R-50-FPN_1x.yml
+++ b/configs/retinanet/coco_retinanet_400_R-50-FPN_1x.yml
@@ -32,13 +32,13 @@ FPN:
  RPN_MIN_LEVEL: 3
  RPN_MAX_LEVEL: 7
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/R-50.Affine.pth'
-  DATABASE: '/data/coco_2014_trainval35k_lmdb'
+  WEIGHTS: '/model/R-50.Affine.pth'
+  DATABASE: '/data/coco_2014_trainval35k'
  IMS_PER_BATCH: 8
  SCALES: [400]
  MAX_SIZE: 666
 TEST:
-  DATABASE: '/data/coco_2014_minival_lmdb'
+  DATABASE: '/data/coco_2014_minival'
  JSON_FILE: '/data/instances_minival2014.json'
  PROTOCOL: 'coco'
  IMS_PER_BATCH: 1

--- a/configs/retinanet/coco_retinanet_400_R-50-FPN_4x.yml
+++ b/configs/retinanet/coco_retinanet_400_R-50-FPN_4x.yml
@@ -36,8 +36,8 @@ DROPBLOCK:
  DROP_ON: True
  DECREMENT: 0.000005 # * 20000 = 0.1
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/R-50.Affine.pth'
-  DATABASE: '/data/coco_2014_trainval35k_lmdb'
+  WEIGHTS: '/model/R-50.Affine.pth'
+  DATABASE: '/data/coco_2014_trainval35k'
  IMS_PER_BATCH: 8
  SCALES: [400]
  MAX_SIZE: 666
@@ -45,7 +45,7 @@ TRAIN:
  COLOR_JITTERING: True
  SCALE_RANGE: [0.75, 1.33]
 TEST:
-  DATABASE: '/data/coco_2014_minival_lmdb'
+  DATABASE: '/data/coco_2014_minival'
  JSON_FILE: '/data/instances_minival2014.json'
  PROTOCOL: 'coco'
  IMS_PER_BATCH: 1

--- a/configs/retinanet/voc_retinanet_300_AirNet-FPN.yml
+++ b/configs/retinanet/voc_retinanet_300_AirNet-FPN.yml
@@ -23,8 +23,8 @@ FPN:
  RPN_MIN_LEVEL: 3
  RPN_MAX_LEVEL: 7
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/AirNet.Affine.pth'
-  DATABASE: '/data/voc_0712_trainval_lmdb'
+  WEIGHTS: '/model/AirNet.Affine.pth'
+  DATABASE: '/data/voc_0712_trainval'
  IMS_PER_BATCH: 32
  SCALES: [300]
  MAX_SIZE: 500
@@ -32,7 +32,7 @@ TRAIN:
  SCALE_JITTERING: True
  COLOR_JITTERING: True
 TEST:
-  DATABASE: '/data/voc_2007_test_lmdb'
+  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
  IMS_PER_BATCH: 1
  SCALES: [300]

--- a/configs/retinanet/voc_retinanet_300_R-18-FPN.yml
+++ b/configs/retinanet/voc_retinanet_300_R-18-FPN.yml
@@ -24,8 +24,8 @@ FPN:
  RPN_MIN_LEVEL: 3
  RPN_MAX_LEVEL: 7
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/R-18.Affine.pth'
-  DATABASE: '/data/voc_0712_trainval_lmdb'
+  WEIGHTS: '/model/R-18.Affine.pth'
+  DATABASE: '/data/voc_0712_trainval'
  IMS_PER_BATCH: 32
  SCALES: [300]
  MAX_SIZE: 500
@@ -33,7 +33,7 @@ TRAIN:
  SCALE_JITTERING: True
  COLOR_JITTERING: True
 TEST:
-  DATABASE: '/data/voc_2007_test_lmdb'
+  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
  IMS_PER_BATCH: 1
  SCALES: [300]

--- a/configs/retinanet/voc_retinanet_300_R-34-FPN.yml
+++ b/configs/retinanet/voc_retinanet_300_R-34-FPN.yml
@@ -24,8 +24,8 @@ FPN:
  RPN_MIN_LEVEL: 3
  RPN_MAX_LEVEL: 7
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/R-34.Affine.pth'
-  DATABASE: '/data/voc_0712_trainval_lmdb'
+  WEIGHTS: '/model/R-34.Affine.pth'
+  DATABASE: '/data/voc_0712_trainval'
  IMS_PER_BATCH: 32
  SCALES: [300]
  MAX_SIZE: 500
@@ -33,7 +33,7 @@ TRAIN:
  SCALE_JITTERING: True
  COLOR_JITTERING: True
 TEST:
-  DATABASE: '/data/voc_2007_test_lmdb'
+  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
  IMS_PER_BATCH: 1
  SCALES: [300]

--- a/configs/ssd/voc_ssd_300_AirNet-5b.yml
+++ b/configs/ssd/voc_ssd_300_AirNet-5b.yml
@@ -29,11 +29,11 @@ SSD:
    STRIDES: [8, 16, 32]
    ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]]
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/AirNet.Affine.pth'
-  DATABASE: '/data/voc_0712_trainval_lmdb'
+  WEIGHTS: '/model/AirNet.Affine.pth'
+  DATABASE: '/data/voc_0712_trainval'
  IMS_PER_BATCH: 32
 TEST:
-  DATABASE: '/data/voc_2007_test_lmdb'
+  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
  IMS_PER_BATCH: 8
  NMS_TOP_K: 400

--- a/configs/ssd/voc_ssd_300_VGG-16.yml
+++ b/configs/ssd/voc_ssd_300_VGG-16.yml
@@ -32,11 +32,11 @@ SSD:
    ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33],
                    [1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]]
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/VGG16.SSD.pth'
-  DATABASE: '/data/voc_0712_trainval_lmdb'
+  WEIGHTS: '/model/VGG16.SSD.pth'
+  DATABASE: '/data/voc_0712_trainval'
  IMS_PER_BATCH: 32
 TEST:
-  DATABASE: '/data/voc_2007_test_lmdb'
+  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
  IMS_PER_BATCH: 8
  NMS_TOP_K: 400

--- a/database/__init__.py
+++ b/database/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
--- a/database/frcnn/__init__.py
+++ b/database/frcnn/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
--- a/database/frcnn/pascal_voc/__init__.py
+++ b/database/frcnn/pascal_voc/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
--- a/database/frcnn/pascal_voc/make_lmdb.py
+++ b/database/frcnn/pascal_voc/make_lmdb.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import sys
-import os.path as osp
-sys.path.insert(0, '../../../')
-
-from database.frcnn.utils.make_from_xml import make_db
-
-
-if __name__ == '__main__':
-    VOC_ROOT_DIR = '/home/workspace/datasets/VOC'
-
-    # train database: voc_2007_trainval + voc_2012_trainval
-    make_db(database_file=osp.join(VOC_ROOT_DIR, 'cache/voc_0712_trainval_lmdb'),
-            images_path=[osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/JPEGImages'),
-                         osp.join(VOC_ROOT_DIR, 'VOCdevkit2012/VOC2012/JPEGImages')],
-            annotations_path=[osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/Annotations'),
-                              osp.join(VOC_ROOT_DIR, 'VOCdevkit2012/VOC2012/Annotations')],
-            imagesets_path=[osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
-                            osp.join(VOC_ROOT_DIR, 'VOCdevkit2012/VOC2012/ImageSets/Main')],
-            splits=['trainval', 'trainval'])
-
-    # test database: voc_2007_test
-    make_db(database_file=osp.join(VOC_ROOT_DIR, 'cache/voc_2007_test_lmdb'),
-            images_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/JPEGImages'),
-            annotations_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/Annotations'),
-            imagesets_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
-            splits=['test'])
--- a/database/frcnn/utils/__init__.py
+++ b/database/frcnn/utils/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
--- a/database/frcnn/utils/make_from_dict.py
+++ b/database/frcnn/utils/make_from_dict.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import sys
-import time
-import cv2
-from dragon.tools.db import LMDB
-
-sys.path.insert(0, '../../..')
-from lib.proto import anno_pb2 as pb
-
-ZFILL = 8
-ENCODE_QUALITY = 95
-
-
-def set_zfill(value):
-    global ZFILL
-    ZFILL = value
-
-
-def set_quality(value):
-    global ENCODE_QUALITY
-    ENCODE_QUALITY = value
-
-
-def make_datum(image_id, image_file, objects):
-    anno_datum = pb.AnnotatedDatum()
-    datum = pb.Datum()
-
-    im = cv2.imread(image_file)
-    datum.height, datum.width, datum.channels = im.shape
-    datum.encoded = ENCODE_QUALITY != 100
-    if datum.encoded:
-        result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY])
-    datum.data = im.tostring()
-    anno_datum.datum.CopyFrom(datum)
-    anno_datum.filename = image_id
-
-    for ix, obj in enumerate(objects):
-        anno = pb.Annotation()
-        anno.x1, anno.y1, anno.x2, anno.y2 = obj['bbox']
-        anno.name = obj['name']
-        anno.difficult = obj['difficult']
-        anno_datum.annotation.add().CopyFrom(anno)
-
-    return anno_datum
-
-
-def make_db(database_file, images_path, gt_recs, ext='.png'):
-    if os.path.isdir(database_file) is True:
-        raise ValueError('The database path is already exist.')
-    else:
-        root_dir = database_file[:database_file.rfind('/')]
-        if not os.path.exists(root_dir):
-            os.makedirs(root_dir)
-
-    print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
-
-    db = LMDB(max_commit=10000)
-    db.open(database_file, mode='w')
-    count = 0
-    total_line = len(gt_recs)
-    start_time = time.time()
-    zfill_flag = '{0:0%d}' % (ZFILL)
-
-    for image_id, objects in gt_recs.items():
-        count += 1
-        if count % 10000 == 0:
-            now_time = time.time()
-            print('{0} / {1} in {2:.2f} sec'.format(
-                count, total_line, now_time - start_time))
-            db.commit()
-        image_file = os.path.join(images_path, image_id + ext)
-        datum = make_datum(image_id, image_file, objects)
-        db.put(zfill_flag.format(count - 1), datum.SerializeToString())
-
-    now_time = time.time()
-    print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
-    db.commit()
-    db.close()
-    end_time = time.time()
-
-    print('{0} images have been stored in the database.'.format(total_line))
-    print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time))
-    print('The size of database is {0} MB.'.format(
-        float(os.path.getsize(database_file + '/data.mdb') / 1000 / 1000)))
\ No newline at end of file
--- a/database/frcnn/utils/make_from_xml.py
+++ b/database/frcnn/utils/make_from_xml.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import sys
-import time
-
-import cv2
-import xml.etree.ElementTree as ET
-from dragon.tools.db import LMDB
-
-sys.path.insert(0, '../../..')
-from lib.proto import anno_pb2 as pb
-
-
-ZFILL = 8
-ENCODE_QUALITY = 95
-
-
-def set_zfill(value):
-    global ZFILL
-    ZFILL = value
-
-
-def set_quality(value):
-    global ENCODE_QUALITY
-    ENCODE_QUALITY = value
-
-
-def make_datum(image_file, xml_file):
-    tree = ET.parse(xml_file)
-    filename = os.path.split(xml_file)[-1]
-    objs = tree.findall('object')
-
-    anno_datum = pb.AnnotatedDatum()
-    datum = pb.Datum()
-
-    im = cv2.imread(image_file)
-    if im is None or im.shape[0] == 0 or im.shape[1] == 0:
-        print("XML have not objects ignored: ", xml_file)
-        return None
-    datum.height, datum.width, datum.channels = im.shape
-    datum.encoded = ENCODE_QUALITY != 100
-    if datum.encoded:
-        result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY])
-        if im is None or im.shape[0] == 0 or im.shape[1] == 0:
-            print("XML have not objects ignored: ", xml_file)
-            return None
-    datum.data = im.tostring()
-    anno_datum.datum.CopyFrom(datum)
-    anno_datum.filename = filename.split('.')[0]
-
-    if len(objs) == 0:
-        return None
-
-    for ix, obj in enumerate(objs):
-        anno = pb.Annotation()
-        bbox = obj.find('bndbox')
-        x1 = float(bbox.find('xmin').text)
-        y1 = float(bbox.find('ymin').text)
-        x2 = float(bbox.find('xmax').text)
-        y2 = float(bbox.find('ymax').text)
-        cls = obj.find('name').text.strip()
-        anno.x1, anno.y1, anno.x2, anno.y2 = (x1, y1, x2, y2)
-        anno.name = cls
-        class_name_set.add(cls)
-        anno.difficult = False
-        if obj.find('difficult') is not None:
-            anno.difficult = int(obj.find('difficult').text) == 1
-        anno_datum.annotation.add().CopyFrom(anno)
-
-    return anno_datum
-
-
-def make_db(
-    database_file,
-    images_path,
-    annotations_path,
-    imagesets_path,
-    splits,
-):
-    if os.path.isdir(database_file) is True:
-        print('Warning: The database path is already exist.')
-    else:
-        root_dir = database_file[:database_file.rfind('/')]
-        if not os.path.exists(root_dir):
-            os.makedirs(root_dir)
-    if not isinstance(images_path, list):
-        images_path = [images_path]
-    if not isinstance(annotations_path, list):
-        annotations_path = [annotations_path]
-    if not isinstance(imagesets_path, list):
-        imagesets_path = [imagesets_path]
-    assert len(splits) == len(imagesets_path)
-    assert len(splits) == len(images_path)
-    assert len(splits) == len(annotations_path)
-
-    print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
-
-    db = LMDB(max_commit=1000)
-    db.open(database_file, mode='w')
-    count = 0
-    total_line = 0
-    start_time = time.time()
-    zfill_flag = '{0:0%d}' % ZFILL
-
-    for db_idx, split in enumerate(splits):
-        split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
-        assert os.path.exists(split_file)
-        with open(split_file, 'r') as f:
-            lines = f.readlines()
-            total_line += len(lines)
-        for line in lines:
-            filename = line.strip()
-            image_file = os.path.join(images_path[db_idx], filename + '.jpg')
-            xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
-            datum = make_datum(image_file, xml_file)
-            if datum is not None:
-                count += 1
-                db.put(zfill_flag.format(count - 1), datum.SerializeToString())
-                if count % 1000 == 0:
-                    now_time = time.time()
-                    print('{0} / {1} in {2:.2f} sec'.format(
-                        count, total_line, now_time - start_time))
-                    db.commit()
-
-    now_time = time.time()
-    print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
-    db.commit()
-    db.close()
-    end_time = time.time()
-
-    print('{0} images have been stored in the database.'.format(total_line))
-    print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time))
-    print('The size of database is {0} MB.'.format(
-        float(os.path.getsize(database_file + '/data.mdb') / 1000 / 1000)))
--- a/lib/core/config.py
+++ b/lib/core/config.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# Codes are based on:
-#
-#      <https://github.com/facebookresearch/Detectron/blob/master/lib/core/config.py>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os.path as osp
-import numpy as np
-
-from lib.utils.attrdict import AttrDict as edict
-
-__C = edict()
-cfg = __C
-
-
-###########################################
-#                                         #
-#            Training Options             #
-#                                         #
-###########################################
-
-
-__C.TRAIN = edict()
-
-# Initialize network with weights from this file
-__C.TRAIN.WEIGHTS = ''
-
-# Database to train
-__C.TRAIN.DATABASE = ''
-
-# Scales to use during training (can list multiple scales)
-# Each scale is the pixel size of an image's shortest side
-__C.TRAIN.SCALES = (600,)
-
-# Max pixel size of the longest side of a scaled input image
-# A square will be used if value < 1
-__C.TRAIN.MAX_SIZE = 1000
-
-# Images to use per mini-batch
-__C.TRAIN.IMS_PER_BATCH = 1
-
-# Minibatch size (number of regions of interest [ROIs])
-__C.TRAIN.BATCH_SIZE = 128
-
-# Fraction of minibatch that is labeled foreground (i.e. class > 0)
-__C.TRAIN.FG_FRACTION = 0.25
-
-# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
-__C.TRAIN.FG_THRESH = 0.5
-
-# Overlap threshold for a ROI to be considered background (class = 0 if
-# overlap in [LO, HI))
-__C.TRAIN.BG_THRESH_HI = 0.5
-__C.TRAIN.BG_THRESH_LO = 0.0
-
-# Use shuffle after each epoch
-__C.TRAIN.USE_SHUFFLE = True
-
-# Use horizontally-flipped images during training?
-__C.TRAIN.USE_FLIPPED = True
-
-# Use the difficult(under occlusion) objects
-__C.TRAIN.USE_DIFF = True
-
-# Overlap required between a ROI and ground-truth box in order for that ROI to
-# be used as a bounding-box regression training example
-__C.TRAIN.BBOX_THRESH = 0.5
-
-# If True, randomly scale the image by scale range
-__C.TRAIN.SCALE_JITTERING = False
-__C.TRAIN.SCALE_RANGE = [0.75, 1.0]
-
-# If True, randomly distort the image by brightness, contrast, and saturation
-__C.TRAIN.COLOR_JITTERING = False
-
-# IOU >= thresh: positive example
-__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
-# IOU < thresh: negative example
-__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
-# If an anchor statisfied by positive and negative conditions set to negative
-__C.TRAIN.RPN_CLOBBER_POSITIVES = False
-# Max number of foreground examples
-__C.TRAIN.RPN_FG_FRACTION = 0.5
-# Total number of examples
-__C.TRAIN.RPN_BATCHSIZE = 256
-# NMS threshold used on RPN proposals
-__C.TRAIN.RPN_NMS_THRESH = 0.7
-# Number of top scoring boxes to keep before apply NMS to RPN proposals
-__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
-# Number of top scoring boxes to keep after applying NMS to RPN proposals
-__C.TRAIN.RPN_POST_NMS_TOP_N = 2000
-# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
-__C.TRAIN.RPN_MIN_SIZE = 0
-# Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels
-# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
-__C.TRAIN.RPN_STRADDLE_THRESH = 0
-
-
-###########################################
-#                                         #
-#            Testing Options              #
-#                                         #
-###########################################
-
-
-__C.TEST = edict()
-
-# Database to test
-__C.TEST.DATABASE = ''
-
-# Original json ground-truth file to use
-# Records in the Database file will be used instead
-__C.TEST.JSON_FILE = ''
-
-# Scales to use during testing (can list multiple scales)
-# Each scale is the pixel size of an image's shortest side
-__C.TEST.SCALES = (600,)
-
-# Max pixel size of the longest side of a scaled input image
-# A square will be used if value < 1
-__C.TEST.MAX_SIZE = 1000
-
-# Images to use per mini-batch
-__C.TEST.IMS_PER_BATCH = 1
-
-# Overlap threshold used for non-maximum suppression (suppress boxes with
-# IoU >= this threshold)
-__C.TEST.NMS = 0.3
-
-# Use Soft-NMS instead of standard NMS?
-# For the soft NMS overlap threshold, we simply use TEST.NMS
-__C.TEST.USE_SOFT_NMS = False
-__C.TEST.SOFT_NMS_METHOD = 'linear'
-__C.TEST.SOFT_NMS_SIGMA = 0.5
-
-# The top-k prior boxes before nms.
-__C.TEST.NMS_TOP_K = 400
-
-# The threshold for predicting boxes
-__C.TEST.SCORE_THRESH = 0.05
-
-# The threshold for predicting masks
-__C.TEST.BINARY_THRESH = 0.5
-
-# NMS threshold used on RPN proposals
-__C.TEST.RPN_NMS_THRESH = 0.7
-# Number of top scoring boxes to keep before apply NMS to RPN proposals
-__C.TEST.RPN_PRE_NMS_TOP_N = 6000
-# Number of top scoring boxes to keep after applying NMS to RPN proposals
-__C.TEST.RPN_POST_NMS_TOP_N = 300
-# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
-__C.TEST.RPN_MIN_SIZE = 0
-
-# Save detection results files if True
-# If false, results files are cleaned up (they can be large) after local
-# evaluation
-__C.TEST.COMPETITION_MODE = True
-
-# The optional test protocol for custom dataSet
-# Ignored by VOC, COCO dataSets
-# Available protocols: 'voc2007', 'voc2010', 'coco'
-__C.TEST.PROTOCOL = 'voc2007'
-
-# Maximum number of detections to return per image (100 is based on the limit
-# established for the COCO dataset)
-__C.TEST.DETECTIONS_PER_IM = 100
-
-
-###########################################
-#                                         #
-#              Model Options              #
-#                                         #
-###########################################
-
-
-__C.MODEL = edict()
-
-# The type of the model
-# ('faster_rcnn',
-#  'mask_rcnn',
-#  'ssd',
-#  'rssd',
-#  'retinanet,
-# )
-__C.MODEL.TYPE = ''
-
-# The float precision for training and inference
-# (FLOAT32, FLOAT16,)
-__C.MODEL.DATA_TYPE = 'FLOAT32'
-
-# The backbone
-__C.MODEL.BACKBONE = ''
-
-# The number of classes in the dataset
-__C.MODEL.NUM_CLASSES = -1
-
-# Keep it for TaaS DataSet
-__C.MODEL.CLASSES = ['__background__']
-
-# Add StopGrad at a specified stage so the bottom layers are frozen
-__C.MODEL.FREEZE_AT = 2
-
-# Whether to use focal loss for one-stage detectors?
-# Enabled if model type in ('ssd',)
-# Retinanet is force to use focal loss
-__C.MODEL.USE_FOCAL_LOSS = False
-__C.MODEL.FOCAL_LOSS_ALPHA = 0.25
-__C.MODEL.FOCAL_LOSS_GAMMA = 2.0
-
-# Stride of the coarsest Feature level
-# This is needed so the input can be padded properly
-__C.MODEL.COARSEST_STRIDE = -1
-
-
-###########################################
-#                                         #
-#              RPN Options                #
-#                                         #
-###########################################
-
-
-__C.RPN = edict()
-
-# Strides for multiple rpn heads
-__C.RPN.STRIDES = [4, 8, 16, 32, 64]
-
-# Scales for multiple anchors
-__C.RPN.SCALES = [8, 8, 8, 8, 8]
-
-# RPN anchor aspect ratios
-__C.RPN.ASPECT_RATIOS = [0.5, 1, 2]
-
-
-###########################################
-#                                         #
-#           Retina-Net Options            #
-#                                         #
-###########################################
-
-
-__C.RETINANET = edict()
-
-# Anchor aspect ratios to use
-__C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0)
-
-# Anchor scales per octave
-__C.RETINANET.SCALES_PER_OCTAVE = 3
-
-# At each FPN level, we generate anchors based on their scale, aspect_ratio,
-# stride of the level, and we multiply the resulting anchor by ANCHOR_SCALE
-__C.RETINANET.ANCHOR_SCALE = 4
-
-# Convolutions to use in the cls and bbox tower
-# NOTE: this doesn't include the last conv for logits
-__C.RETINANET.NUM_CONVS = 4
-
-# During inference, #locs to select based on cls score before NMS is performed
-__C.RETINANET.PRE_NMS_TOP_N = 5000
-
-# IoU overlap ratio for labeling an anchor as positive
-# Anchors with >= iou overlap are labeled positive
-__C.RETINANET.POSITIVE_OVERLAP = 0.5
-
-# IoU overlap ratio for labeling an anchor as negative
-# Anchors with < iou overlap are labeled negative
-__C.RETINANET.NEGATIVE_OVERLAP = 0.4
-
-
-###########################################
-#                                         #
-#              FPN Options                #
-#                                         #
-###########################################
-
-
-__C.FPN = edict()
-
-# Channel dimension of the FPN feature levels
-__C.FPN.DIM = 256
-
-# Coarsest level of the FPN pyramid
-__C.FPN.RPN_MAX_LEVEL = 6
-# Finest level of the FPN pyramid
-__C.FPN.RPN_MIN_LEVEL = 2
-
-# Hyper-Parameters for the RoI-to-FPN level mapping heuristic
-__C.FPN.ROI_CANONICAL_SCALE = 224
-__C.FPN.ROI_CANONICAL_LEVEL = 4
-# Coarsest level of the FPN pyramid
-__C.FPN.ROI_MAX_LEVEL = 5
-# Finest level of the FPN pyramid
-__C.FPN.ROI_MIN_LEVEL = 2
-
-
-###########################################
-#                                         #
-#           Fast R-CNN Options            #
-#                                         #
-###########################################
-
-
-__C.FRCNN = edict()
-
-# RoI transformation function (e.g., RoIPool or RoIAlign)
-__C.FRCNN.ROI_XFORM_METHOD = 'RoIPool'
-
-# Hidden layer dimension when using an MLP for the RoI box head
-__C.FRCNN.MLP_HEAD_DIM = 1024
-
-# RoI transform output resolution
-# Note: some models may have constraints on what they can use, e.g. they use
-# pretrained FC layers like in VGG16, and will ignore this option
-__C.FRCNN.ROI_XFORM_RESOLUTION = 7
-
-
-###########################################
-#                                         #
-#           Mask R-CNN Options            #
-#                                         #
-###########################################
-
-
-__C.MRCNN = edict()
-
-# Resolution of mask predictions
-__C.MRCNN.RESOLUTION = 28
-
-# RoI transformation function (e.g., RoIPool or RoIAlign)
-__C.MRCNN.ROI_XFORM_METHOD = 'RoIAlign'
-
-# RoI transformation function (e.g., RoIPool or RoIAlign)
-__C.MRCNN.ROI_XFORM_RESOLUTION = 14
-
-
-###########################################
-#                                         #
-#               SSD Options               #
-#                                         #
-###########################################
-
-
-__C.SSD = edict()
-
-# Whether to enable FPN enhancement?
-__C.SSD.FPN_ON = False
-
-__C.SSD.MULTIBOX = edict()
-# MultiBox configs
-__C.SSD.MULTIBOX.STRIDES = []
-__C.SSD.MULTIBOX.MIN_SIZES = []
-__C.SSD.MULTIBOX.MAX_SIZES = []
-__C.SSD.MULTIBOX.ASPECT_RATIOS = []
-__C.SSD.MULTIBOX.ASPECT_ANGLES = []
-
-__C.SSD.OHEM = edict()
-# The threshold for selecting negative bbox in hard example mining
-__C.SSD.OHEM.NEG_OVERLAP = 0.5
-# The ratio used in hard example mining
-__C.SSD.OHEM.NEG_POS_RATIO = 3.0
-
-# Distort the image?
-__C.SSD.DISTORT = edict()
-__C.SSD.DISTORT.BRIGHTNESS_PROB = 0.5
-__C.SSD.DISTORT.CONTRAST_PROB = 0.5
-__C.SSD.DISTORT.SATURATION_PROB = 0.5
-
-# Expand the image?
-__C.SSD.EXPAND = edict()
-__C.SSD.EXPAND.PROB = 0.5
-__C.SSD.EXPAND.MAX_RATIO = 4.0
-
-# Resize the image?
-__C.SSD.RESIZE = edict()
-__C.SSD.RESIZE.HEIGHT = 300
-__C.SSD.RESIZE.WIDTH = 300
-__C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
-
-# Samplers
-# Format as (min_scale, max_scale,
-#            min_aspect_ratio, max_aspect_ratio,
-#            min_jaccard_overlap, max_jaccard_overlap,
-#            max_trials, max_sample)
-__C.SSD.SAMPLERS = [
-    (1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1, 1),   # Entire image
-    (0.3, 1.0, 0.5, 2.0, 0.1, 1.0, 10, 1),  # IoU >= 0.1
-    (0.3, 1.0, 0.5, 2.0, 0.3, 1.0, 10, 1),  # IoU >= 0.3
-    (0.3, 1.0, 0.5, 2.0, 0.5, 1.0, 5, 1),   # IoU >= 0.5
-    (0.3, 1.0, 0.5, 2.0, 0.7, 1.0, 5, 1),   # IoU >= 0.7
-    (0.3, 1.0, 0.5, 2.0, 0.9, 1.0, 5, 1),   # IoU >= 0.9
-    (0.3, 1.0, 0.5, 2.0, 0.0, 1.0, 1, 1),   # Any patches
-]
-
-
-###########################################
-#                                         #
-#             ResNet Options              #
-#                                         #
-###########################################
-
-
-__C.RESNET = edict()
-
-# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
-__C.RESNET.NUM_GROUPS = 1
-
-# Baseline width of each group
-__C.RESNET.GROUP_WIDTH = 64
-
-
-###########################################
-#                                         #
-#            DropBlock Options            #
-#                                         #
-###########################################
-
-
-__C.DROPBLOCK = edict()
-
-# Whether to use drop block for more regulization
-__C.DROPBLOCK.DROP_ON = False
-
-# Decrement for scheduling keep prob after each iteration
-__C.DROPBLOCK.DECREMENT = 1e-6
-
-
-###########################################
-#                                         #
-#             Solver Options              #
-#                                         #
-###########################################
-
-
-__C.SOLVER = edict()
-
-# Base learning rate for the specified schedule
-__C.SOLVER.BASE_LR = 0.001
-
-# Optional scaling factor for total loss
-# This option is helpful to scale the magnitude
-# of gradients during FP16 training
-__C.SOLVER.LOSS_SCALING = 1.
-
-# Schedule type (see functions in utils.lr_policy for options)
-# E.g., 'step', 'steps_with_decay', ...
-__C.SOLVER.LR_POLICY = 'steps_with_decay'
-
-# Hyperparameter used by the specified policy
-# For 'step', the current LR is multiplied by SOLVER.GAMMA at each step
-__C.SOLVER.GAMMA = 0.1
-
-# Uniform step size for 'steps' policy
-__C.SOLVER.STEP_SIZE = 30000
-
-__C.SOLVER.STEPS = []
-
-# Maximum number of SGD iterations
-__C.SOLVER.MAX_ITERS = 40000
-
-# Momentum to use with SGD
-__C.SOLVER.MOMENTUM = 0.9
-
-# L2 regularization hyper parameters
-__C.SOLVER.WEIGHT_DECAY = 0.0005
-
-# L2 norm factor for clipping gradients
-__C.SOLVER.CLIP_NORM = -1.0
-
-# Warm up to SOLVER.BASE_LR over this number of SGD iterations
-__C.SOLVER.WARM_UP_ITERS = 500
-
-# Start the warm up from SOLVER.BASE_LR * SOLVER.WARM_UP_FACTOR
-__C.SOLVER.WARM_UP_FACTOR = 1.0 / 3.0
-
-# The steps for accumulating gradients
-__C.SOLVER.ITER_SIZE = 1
-
-# The interval to display logs
-__C.SOLVER.DISPLAY = 20
-
-# The interval to snapshot a model
-__C.SOLVER.SNAPSHOT_ITERS = 5000
-
-# prefix to yield the path: <prefix>_iters_XYZ.caffemodel
-__C.SOLVER.SNAPSHOT_PREFIX = ''
-
-
-###########################################
-#                                         #
-#               Misc Options              #
-#                                         #
-###########################################
-
-
-# Number of GPUs to use (applies to both training and testing)
-__C.NUM_GPUS = 1
-
-# Use NCCL for all reduce, otherwise use cuda-aware mpi
-__C.USE_NCCL = True
-
-# Hosts for Inter-Machine communication
-__C.HOSTS = []
-
-# Pixel mean values (BGR order)
-__C.PIXEL_MEANS = [102., 115., 122.]
-
-# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
-# These are empirically chosen to approximately lead to unit variance targets
-__C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
-
-# Default weights on (dx, dy, dw, dh, da) for normalizing rbox regression targets
-__C.RBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0, 10.0)
-
-# Prior prob for the positives at the beginning of training.
-# This is used to set the bias init for the logits layer
-__C.PRIOR_PROB = 0.01
-
-# For reproducibility
-__C.RNG_SEED = 3
-
-# Root directory of project
-__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
-
-# Data directory
-__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
-
-# Place outputs under an experiments directory
-__C.EXP_DIR = ''
-
-# Use GPU implementation of non-maximum suppression
-__C.USE_GPU_NMS = True
-
-# Default GPU device id
-__C.GPU_ID = 0
-
-# Dump detection visualizations
-__C.VIS = False
-__C.VIS_ON_FILE = False
-
-# Score threshold for visualization
-__C.VIS_TH = 0.7
-
-# Write summaries by tensor board
-__C.ENABLE_TENSOR_BOARD = False
-
-
-def _merge_a_into_b(a, b):
-    """Merge config dictionary a into config dictionary b, clobbering the
-    options in b whenever they are also specified in a.
-    """
-    if not isinstance(a, dict):
-        return
-    for k, v in a.items():
-        # a must specify keys that are in b
-        if k not in b:
-            raise KeyError('{} is not a valid config key'.format(k))
-        # the types must match, too
-        v = _check_and_coerce_cfg_value_type(v, b[k], k)
-        # recursively merge dicts
-        if type(v) is edict:
-            try:
-                _merge_a_into_b(a[k], b[k])
-            except:
-                print('Error under config key: {}'.format(k))
-                raise
-        else:
-            b[k] = v
-
-
-def cfg_from_file(filename):
-    """Load a config file and merge it into the default options."""
-    import yaml
-    with open(filename, 'r') as f:
-        yaml_cfg = edict(yaml.load(f))
-    global __C
-    _merge_a_into_b(yaml_cfg, __C)
-
-
-def cfg_from_list(cfg_list):
-    """Set config keys via list (e.g., from command line)."""
-    from ast import literal_eval
-    assert len(cfg_list) % 2 == 0
-    for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
-        key_list = k.split('.')
-        d = __C
-        for subkey in key_list[:-1]:
-            assert d.has_key(subkey)
-            d = d[subkey]
-        subkey = key_list[-1]
-        assert subkey in d
-        try:
-            value = literal_eval(v)
-        except:
-            # Handle the case when v is a string literal
-            value = v
-        assert type(value) == type(d[subkey]), \
-            'type {} does not match original type {}'\
-            .format(type(value), type(d[subkey]))
-        d[subkey] = value
-
-
-def _check_and_coerce_cfg_value_type(value_a, value_b, key):
-    """Checks that `value_a`, which is intended to replace `value_b` is of the
-    right type. The type is correct if it matches exactly or is one of a few
-    cases in which the type can be easily coerced.
-    """
-    # The types must match (with some exceptions)
-    type_b = type(value_b)
-    type_a = type(value_a)
-    if type_a is type_b:
-        return value_a
-    if type_b is float and type_a is int:
-        return float(value_a)
-
-    # Exceptions: numpy arrays, strings, tuple<->list
-    if isinstance(value_b, np.ndarray):
-        value_a = np.array(value_a, dtype=value_b.dtype)
-    elif isinstance(value_a, tuple) and isinstance(value_b, list):
-        value_a = list(value_a)
-    elif isinstance(value_a, list) and isinstance(value_b, tuple):
-        value_a = tuple(value_a)
-    elif isinstance(value_a, dict) and isinstance(value_b, edict):
-        value_a = edict(value_a)
-    else:
-        raise ValueError(
-            'Type mismatch ({} vs. {}) with values ({} vs. {}) for config '
-            'key: {}'.format(type_b, type_a, value_b, value_a, key)
-        )
-    return value_a
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/facebookresearch/Detectron/blob/master/lib/core/config.py>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os.path as osp
+import numpy as np
+
+from lib.utils.attrdict import AttrDict as edict
+
+__C = edict()
+cfg = __C
+
+
+###########################################
+#                                         #
+#            Training Options             #
+#                                         #
+###########################################
+
+
+__C.TRAIN = edict()
+
+# Initialize network with weights from this file
+__C.TRAIN.WEIGHTS = ''
+
+# Database to train
+__C.TRAIN.DATABASE = ''
+
+# Scales to use during training (can list multiple scales)
+# Each scale is the pixel size of an image's shortest side
+__C.TRAIN.SCALES = (600,)
+
+# Max pixel size of the longest side of a scaled input image
+# A square will be used if value < 1
+__C.TRAIN.MAX_SIZE = 1000
+
+# Images to use per mini-batch
+__C.TRAIN.IMS_PER_BATCH = 1
+
+# Minibatch size (number of regions of interest [ROIs])
+__C.TRAIN.BATCH_SIZE = 128
+
+# Fraction of minibatch that is labeled foreground (i.e. class > 0)
+__C.TRAIN.FG_FRACTION = 0.25
+
+# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
+__C.TRAIN.FG_THRESH = 0.5
+
+# Overlap threshold for a ROI to be considered background (class = 0 if
+# overlap in [LO, HI))
+__C.TRAIN.BG_THRESH_HI = 0.5
+__C.TRAIN.BG_THRESH_LO = 0.0
+
+# Use shuffle after each epoch
+__C.TRAIN.USE_SHUFFLE = True
+# The number of chunks to shuffle
+__C.TRAIN.NUM_SHUFFLE_CHUNKS = 0
+
+# Use horizontally-flipped images during training?
+__C.TRAIN.USE_FLIPPED = True
+
+# Use the difficult(under occlusion) objects
+__C.TRAIN.USE_DIFF = True
+
+# Overlap required between a ROI and ground-truth box in order for that ROI to
+# be used as a bounding-box regression training example
+__C.TRAIN.BBOX_THRESH = 0.5
+
+# If True, randomly scale the image by scale range
+__C.TRAIN.SCALE_JITTERING = False
+__C.TRAIN.SCALE_RANGE = [0.75, 1.0]
+
+# If True, randomly distort the image by brightness, contrast, and saturation
+__C.TRAIN.COLOR_JITTERING = False
+
+# IOU >= thresh: positive example
+__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
+# IOU < thresh: negative example
+__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
+# If an anchor statisfied by positive and negative conditions set to negative
+__C.TRAIN.RPN_CLOBBER_POSITIVES = False
+# Max number of foreground examples
+__C.TRAIN.RPN_FG_FRACTION = 0.5
+# Total number of examples
+__C.TRAIN.RPN_BATCHSIZE = 256
+# NMS threshold used on RPN proposals
+__C.TRAIN.RPN_NMS_THRESH = 0.7
+# Number of top scoring boxes to keep before apply NMS to RPN proposals
+__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
+# Number of top scoring boxes to keep after applying NMS to RPN proposals
+__C.TRAIN.RPN_POST_NMS_TOP_N = 2000
+# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
+__C.TRAIN.RPN_MIN_SIZE = 0
+# Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels
+# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
+__C.TRAIN.RPN_STRADDLE_THRESH = 0
+
+
+###########################################
+#                                         #
+#            Testing Options              #
+#                                         #
+###########################################
+
+
+__C.TEST = edict()
+
+# Database to test
+__C.TEST.DATABASE = ''
+
+# Original json ground-truth file to use
+# Records in the Database file will be used instead
+__C.TEST.JSON_FILE = ''
+
+# Scales to use during testing (can list multiple scales)
+# Each scale is the pixel size of an image's shortest side
+__C.TEST.SCALES = (600,)
+
+# Max pixel size of the longest side of a scaled input image
+# A square will be used if value < 1
+__C.TEST.MAX_SIZE = 1000
+
+# Images to use per mini-batch
+__C.TEST.IMS_PER_BATCH = 1
+
+# Overlap threshold used for non-maximum suppression (suppress boxes with
+# IoU >= this threshold)
+__C.TEST.NMS = 0.3
+
+# Use Soft-NMS instead of standard NMS?
+# For the soft NMS overlap threshold, we simply use TEST.NMS
+__C.TEST.USE_SOFT_NMS = False
+__C.TEST.SOFT_NMS_METHOD = 'linear'
+__C.TEST.SOFT_NMS_SIGMA = 0.5
+
+# The top-k prior boxes before nms.
+__C.TEST.NMS_TOP_K = 400
+
+# The threshold for predicting boxes
+__C.TEST.SCORE_THRESH = 0.05
+
+# The threshold for predicting masks
+__C.TEST.BINARY_THRESH = 0.5
+
+# NMS threshold used on RPN proposals
+__C.TEST.RPN_NMS_THRESH = 0.7
+# Number of top scoring boxes to keep before apply NMS to RPN proposals
+__C.TEST.RPN_PRE_NMS_TOP_N = 6000
+# Number of top scoring boxes to keep after applying NMS to RPN proposals
+__C.TEST.RPN_POST_NMS_TOP_N = 300
+# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
+__C.TEST.RPN_MIN_SIZE = 0
+
+# Save detection results files if True
+# If false, results files are cleaned up (they can be large) after local
+# evaluation
+__C.TEST.COMPETITION_MODE = True
+
+# The optional test protocol for custom dataSet
+# Ignored by VOC, COCO dataSets
+# Available protocols: 'voc2007', 'voc2010', 'coco'
+__C.TEST.PROTOCOL = 'voc2007'
+
+# Maximum number of detections to return per image (100 is based on the limit
+# established for the COCO dataset)
+__C.TEST.DETECTIONS_PER_IM = 100
+
+
+###########################################
+#                                         #
+#              Model Options              #
+#                                         #
+###########################################
+
+
+__C.MODEL = edict()
+
+# The type of the model
+# ('faster_rcnn',
+#  'mask_rcnn',
+#  'ssd',
+#  'rssd',
+#  'retinanet,
+# )
+__C.MODEL.TYPE = ''
+
+# The float precision for training and inference
+# (FLOAT32, FLOAT16,)
+__C.MODEL.DATA_TYPE = 'FLOAT32'
+
+# The backbone
+__C.MODEL.BACKBONE = ''
+
+# The number of classes in the dataset
+__C.MODEL.NUM_CLASSES = -1
+
+# Keep it for TaaS DataSet
+__C.MODEL.CLASSES = ['__background__']
+
+# Add StopGrad at a specified stage so the bottom layers are frozen
+__C.MODEL.FREEZE_AT = 2
+
+# Whether to use focal loss for one-stage detectors?
+# Enabled if model type in ('ssd',)
+# Retinanet is force to use focal loss
+__C.MODEL.USE_FOCAL_LOSS = False
+__C.MODEL.FOCAL_LOSS_ALPHA = 0.25
+__C.MODEL.FOCAL_LOSS_GAMMA = 2.0
+
+# Stride of the coarsest Feature level
+# This is needed so the input can be padded properly
+__C.MODEL.COARSEST_STRIDE = -1
+
+
+###########################################
+#                                         #
+#              RPN Options                #
+#                                         #
+###########################################
+
+
+__C.RPN = edict()
+
+# Strides for multiple rpn heads
+__C.RPN.STRIDES = [4, 8, 16, 32, 64]
+
+# Scales for multiple anchors
+__C.RPN.SCALES = [8, 8, 8, 8, 8]
+
+# RPN anchor aspect ratios
+__C.RPN.ASPECT_RATIOS = [0.5, 1, 2]
+
+
+###########################################
+#                                         #
+#           Retina-Net Options            #
+#                                         #
+###########################################
+
+
+__C.RETINANET = edict()
+
+# Anchor aspect ratios to use
+__C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0)
+
+# Anchor scales per octave
+__C.RETINANET.SCALES_PER_OCTAVE = 3
+
+# At each FPN level, we generate anchors based on their scale, aspect_ratio,
+# stride of the level, and we multiply the resulting anchor by ANCHOR_SCALE
+__C.RETINANET.ANCHOR_SCALE = 4
+
+# Convolutions to use in the cls and bbox tower
+# NOTE: this doesn't include the last conv for logits
+__C.RETINANET.NUM_CONVS = 4
+
+# During inference, #locs to select based on cls score before NMS is performed
+__C.RETINANET.PRE_NMS_TOP_N = 5000
+
+# IoU overlap ratio for labeling an anchor as positive
+# Anchors with >= iou overlap are labeled positive
+__C.RETINANET.POSITIVE_OVERLAP = 0.5
+
+# IoU overlap ratio for labeling an anchor as negative
+# Anchors with < iou overlap are labeled negative
+__C.RETINANET.NEGATIVE_OVERLAP = 0.4
+
+
+###########################################
+#                                         #
+#              FPN Options                #
+#                                         #
+###########################################
+
+
+__C.FPN = edict()
+
+# Channel dimension of the FPN feature levels
+__C.FPN.DIM = 256
+
+# Coarsest level of the FPN pyramid
+__C.FPN.RPN_MAX_LEVEL = 6
+# Finest level of the FPN pyramid
+__C.FPN.RPN_MIN_LEVEL = 2
+
+# Hyper-Parameters for the RoI-to-FPN level mapping heuristic
+__C.FPN.ROI_CANONICAL_SCALE = 224
+__C.FPN.ROI_CANONICAL_LEVEL = 4
+# Coarsest level of the FPN pyramid
+__C.FPN.ROI_MAX_LEVEL = 5
+# Finest level of the FPN pyramid
+__C.FPN.ROI_MIN_LEVEL = 2
+
+
+###########################################
+#                                         #
+#           Fast R-CNN Options            #
+#                                         #
+###########################################
+
+
+__C.FRCNN = edict()
+
+# RoI transformation function (e.g., RoIPool or RoIAlign)
+__C.FRCNN.ROI_XFORM_METHOD = 'RoIPool'
+
+# Hidden layer dimension when using an MLP for the RoI box head
+__C.FRCNN.MLP_HEAD_DIM = 1024
+
+# RoI transform output resolution
+# Note: some models may have constraints on what they can use, e.g. they use
+# pretrained FC layers like in VGG16, and will ignore this option
+__C.FRCNN.ROI_XFORM_RESOLUTION = 7
+
+
+###########################################
+#                                         #
+#           Mask R-CNN Options            #
+#                                         #
+###########################################
+
+
+__C.MRCNN = edict()
+
+# Resolution of mask predictions
+__C.MRCNN.RESOLUTION = 28
+
+# RoI transformation function (e.g., RoIPool or RoIAlign)
+__C.MRCNN.ROI_XFORM_METHOD = 'RoIAlign'
+
+# RoI transformation function (e.g., RoIPool or RoIAlign)
+__C.MRCNN.ROI_XFORM_RESOLUTION = 14
+
+
+###########################################
+#                                         #
+#               SSD Options               #
+#                                         #
+###########################################
+
+
+__C.SSD = edict()
+
+# Whether to enable FPN enhancement?
+__C.SSD.FPN_ON = False
+
+__C.SSD.MULTIBOX = edict()
+# MultiBox configs
+__C.SSD.MULTIBOX.STRIDES = []
+__C.SSD.MULTIBOX.MIN_SIZES = []
+__C.SSD.MULTIBOX.MAX_SIZES = []
+__C.SSD.MULTIBOX.ASPECT_RATIOS = []
+__C.SSD.MULTIBOX.ASPECT_ANGLES = []
+
+__C.SSD.OHEM = edict()
+# The threshold for selecting negative bbox in hard example mining
+__C.SSD.OHEM.NEG_OVERLAP = 0.5
+# The ratio used in hard example mining
+__C.SSD.OHEM.NEG_POS_RATIO = 3.0
+
+# Distort the image?
+__C.SSD.DISTORT = edict()
+__C.SSD.DISTORT.BRIGHTNESS_PROB = 0.5
+__C.SSD.DISTORT.CONTRAST_PROB = 0.5
+__C.SSD.DISTORT.SATURATION_PROB = 0.5
+
+# Expand the image?
+__C.SSD.EXPAND = edict()
+__C.SSD.EXPAND.PROB = 0.5
+__C.SSD.EXPAND.MAX_RATIO = 4.0
+
+# Resize the image?
+__C.SSD.RESIZE = edict()
+__C.SSD.RESIZE.HEIGHT = 300
+__C.SSD.RESIZE.WIDTH = 300
+__C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
+
+# Samplers
+# Format as (min_scale, max_scale,
+#            min_aspect_ratio, max_aspect_ratio,
+#            min_jaccard_overlap, max_jaccard_overlap,
+#            max_trials, max_sample)
+__C.SSD.SAMPLERS = [
+    (1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1, 1),   # Entire image
+    (0.3, 1.0, 0.5, 2.0, 0.1, 1.0, 10, 1),  # IoU >= 0.1
+    (0.3, 1.0, 0.5, 2.0, 0.3, 1.0, 10, 1),  # IoU >= 0.3
+    (0.3, 1.0, 0.5, 2.0, 0.5, 1.0, 5, 1),   # IoU >= 0.5
+    (0.3, 1.0, 0.5, 2.0, 0.7, 1.0, 5, 1),   # IoU >= 0.7
+    (0.3, 1.0, 0.5, 2.0, 0.9, 1.0, 5, 1),   # IoU >= 0.9
+    (0.3, 1.0, 0.5, 2.0, 0.0, 1.0, 1, 1),   # Any patches
+]
+
+
+###########################################
+#                                         #
+#             ResNet Options              #
+#                                         #
+###########################################
+
+
+__C.RESNET = edict()
+
+# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
+__C.RESNET.NUM_GROUPS = 1
+
+# Baseline width of each group
+__C.RESNET.GROUP_WIDTH = 64
+
+
+###########################################
+#                                         #
+#            DropBlock Options            #
+#                                         #
+###########################################
+
+
+__C.DROPBLOCK = edict()
+
+# Whether to use drop block for more regulization
+__C.DROPBLOCK.DROP_ON = False
+
+# Decrement for scheduling keep prob after each iteration
+__C.DROPBLOCK.DECREMENT = 1e-6
+
+
+###########################################
+#                                         #
+#             Solver Options              #
+#                                         #
+###########################################
+
+
+__C.SOLVER = edict()
+
+# Base learning rate for the specified schedule
+__C.SOLVER.BASE_LR = 0.001
+
+# Optional scaling factor for total loss
+# This option is helpful to scale the magnitude
+# of gradients during FP16 training
+__C.SOLVER.LOSS_SCALING = 1.
+
+# Schedule type (see functions in utils.lr_policy for options)
+# E.g., 'step', 'steps_with_decay', ...
+__C.SOLVER.LR_POLICY = 'steps_with_decay'
+
+# Hyperparameter used by the specified policy
+# For 'step', the current LR is multiplied by SOLVER.GAMMA at each step
+__C.SOLVER.GAMMA = 0.1
+
+# Uniform step size for 'steps' policy
+__C.SOLVER.STEP_SIZE = 30000
+
+__C.SOLVER.STEPS = []
+
+# Maximum number of SGD iterations
+__C.SOLVER.MAX_ITERS = 40000
+
+# Momentum to use with SGD
+__C.SOLVER.MOMENTUM = 0.9
+
+# L2 regularization hyper parameters
+__C.SOLVER.WEIGHT_DECAY = 0.0005
+
+# L2 norm factor for clipping gradients
+__C.SOLVER.CLIP_NORM = -1.0
+
+# Warm up to SOLVER.BASE_LR over this number of SGD iterations
+__C.SOLVER.WARM_UP_ITERS = 500
+
+# Start the warm up from SOLVER.BASE_LR * SOLVER.WARM_UP_FACTOR
+__C.SOLVER.WARM_UP_FACTOR = 1.0 / 3.0
+
+# The steps for accumulating gradients
+__C.SOLVER.ITER_SIZE = 1
+
+# The interval to display logs
+__C.SOLVER.DISPLAY = 20
+
+# The interval to snapshot a model
+__C.SOLVER.SNAPSHOT_ITERS = 5000
+
+# prefix to yield the path: <prefix>_iters_XYZ.caffemodel
+__C.SOLVER.SNAPSHOT_PREFIX = ''
+
+
+###########################################
+#                                         #
+#               Misc Options              #
+#                                         #
+###########################################
+
+
+# Number of GPUs to use (applies to both training and testing)
+__C.NUM_GPUS = 1
+
+# Use NCCL for all reduce, otherwise use cuda-aware mpi
+__C.USE_NCCL = True
+
+# Hosts for Inter-Machine communication
+__C.HOSTS = []
+
+# Pixel mean values (BGR order)
+__C.PIXEL_MEANS = [102., 115., 122.]
+
+# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
+# These are empirically chosen to approximately lead to unit variance targets
+__C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
+
+# Default weights on (dx, dy, dw, dh, da) for normalizing rbox regression targets
+__C.RBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0, 10.0)
+
+# Prior prob for the positives at the beginning of training.
+# This is used to set the bias init for the logits layer
+__C.PRIOR_PROB = 0.01
+
+# For reproducibility
+__C.RNG_SEED = 3
+
+# Root directory of project
+__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
+
+# Data directory
+__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
+
+# Place outputs under an experiments directory
+__C.EXP_DIR = ''
+
+# Use GPU implementation of non-maximum suppression
+__C.USE_GPU_NMS = True
+
+# Default GPU device id
+__C.GPU_ID = 0
+
+# Dump detection visualizations
+__C.VIS = False
+__C.VIS_ON_FILE = False
+
+# Score threshold for visualization
+__C.VIS_TH = 0.7
+
+# Write summaries by tensor board
+__C.ENABLE_TENSOR_BOARD = False
+
+
+def _merge_a_into_b(a, b):
+    """Merge config dictionary a into config dictionary b, clobbering the
+    options in b whenever they are also specified in a.
+    """
+    if not isinstance(a, dict):
+        return
+    for k, v in a.items():
+        # a must specify keys that are in b
+        if k not in b:
+            raise KeyError('{} is not a valid config key'.format(k))
+        # the types must match, too
+        v = _check_and_coerce_cfg_value_type(v, b[k], k)
+        # recursively merge dicts
+        if type(v) is edict:
+            try:
+                _merge_a_into_b(a[k], b[k])
+            except:
+                print('Error under config key: {}'.format(k))
+                raise
+        else:
+            b[k] = v
+
+
+def cfg_from_file(filename):
+    """Load a config file and merge it into the default options."""
+    import yaml
+    with open(filename, 'r') as f:
+        yaml_cfg = edict(yaml.load(f))
+    global __C
+    _merge_a_into_b(yaml_cfg, __C)
+
+
+def cfg_from_list(cfg_list):
+    """Set config keys via list (e.g., from command line)."""
+    from ast import literal_eval
+    assert len(cfg_list) % 2 == 0
+    for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
+        key_list = k.split('.')
+        d = __C
+        for subkey in key_list[:-1]:
+            assert d.has_key(subkey)
+            d = d[subkey]
+        subkey = key_list[-1]
+        assert subkey in d
+        try:
+            value = literal_eval(v)
+        except:
+            # Handle the case when v is a string literal
+            value = v
+        assert type(value) == type(d[subkey]), \
+            'type {} does not match original type {}'\
+            .format(type(value), type(d[subkey]))
+        d[subkey] = value
+
+
+def _check_and_coerce_cfg_value_type(value_a, value_b, key):
+    """Checks that `value_a`, which is intended to replace `value_b` is of the
+    right type. The type is correct if it matches exactly or is one of a few
+    cases in which the type can be easily coerced.
+    """
+    # The types must match (with some exceptions)
+    type_b = type(value_b)
+    type_a = type(value_a)
+    if type_a is type_b:
+        return value_a
+    if type_b is float and type_a is int:
+        return float(value_a)
+
+    # Exceptions: numpy arrays, strings, tuple<->list
+    if isinstance(value_b, np.ndarray):
+        value_a = np.array(value_a, dtype=value_b.dtype)
+    elif isinstance(value_a, tuple) and isinstance(value_b, list):
+        value_a = list(value_a)
+    elif isinstance(value_a, list) and isinstance(value_b, tuple):
+        value_a = tuple(value_a)
+    elif isinstance(value_a, dict) and isinstance(value_b, edict):
+        value_a = edict(value_a)
+    else:
+        raise ValueError(
+            'Type mismatch ({} vs. {}) with values ({} vs. {}) for config '
+            'key: {}'.format(type_b, type_a, value_b, value_a, key)
+        )
+    return value_a
--- a/lib/core/coordinator.py
+++ b/lib/core/coordinator.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import shutil
-import time
-import numpy as np
-
-from lib.core.config import cfg
-from lib.core.config import cfg_from_file
-
-
-class Coordinator(object):
-    """Coordinator is a simple tool to manage the
-     unique experiments from the YAML configurations.
-
-    """
-    def __init__(self, cfg_file, exp_dir=None):
-        # Override the default configs
-        cfg_from_file(cfg_file)
-        if cfg.EXP_DIR != '':
-            exp_dir = cfg.EXP_DIR
-        if exp_dir is None:
-            model_id = time.strftime(
-                '%Y%m%d_%H%M%S', time.localtime(time.time()))
-            self.experiment_dir = '../experiments/{}'.format(model_id)
-            if not os.path.exists(self.experiment_dir):
-                os.makedirs(self.experiment_dir)
-        else:
-            if not os.path.exists(exp_dir):
-                raise ValueError('ExperimentDir({}) does not exist.'.format(exp_dir))
-            self.experiment_dir = exp_dir
-
-    def _path_at(self, file, auto_create=True):
-        path = os.path.abspath(os.path.join(self.experiment_dir, file))
-        if auto_create and not os.path.exists(path):
-            os.makedirs(path)
-        return path
-
-    def checkpoints_dir(self):
-        return self._path_at('checkpoints')
-
-    def exports_dir(self):
-        return self._path_at('exports')
-
-    def results_dir(self, checkpoint=None):
-        sub_dir = os.path.splitext(os.path.basename(checkpoint))[0] if checkpoint else ''
-        return self._path_at(os.path.join('results', sub_dir))
-
-    def checkpoint(self, global_step=None, wait=True):
-        def locate():
-            files = os.listdir(self.checkpoints_dir())
-            steps = []
-            for ix, file in enumerate(files):
-                step = int(file.split('_iter_')[-1].split('.')[0])
-                if global_step == step:
-                    return os.path.join(self.checkpoints_dir(), files[ix]), step
-                steps.append(step)
-            if global_step is None:
-                if len(files) == 0:
-                    return None, 0
-                last_idx = int(np.argmax(steps))
-                last_step = steps[last_idx]
-                return os.path.join(self.checkpoints_dir(), files[last_idx]), last_step
-            return None, 0
-        result = locate()
-        while result[0] is None and wait:
-            print('\rWaiting for step_{}.checkpoint to exist...'.format(global_step), end='')
-            time.sleep(10)
-            result = locate()
-        return result
-
-    def delete_experiment(self):
-        if os.path.exists(self.experiment_dir):
-            shutil.rmtree(self.experiment_dir)
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import shutil
+import time
+import numpy as np
+
+from lib.core.config import cfg
+from lib.core.config import cfg_from_file
+
+
+class Coordinator(object):
+    """Coordinator is a simple tool to manage the
+     unique experiments from the YAML configurations.
+
+    """
+    def __init__(self, cfg_file, exp_dir=None):
+        # Override the default configs
+        cfg_from_file(cfg_file)
+        if cfg.EXP_DIR != '':
+            exp_dir = cfg.EXP_DIR
+        if exp_dir is None:
+            model_id = time.strftime(
+                '%Y%m%d_%H%M%S', time.localtime(time.time()))
+            self.experiment_dir = '../experiments/{}'.format(model_id)
+            if not os.path.exists(self.experiment_dir):
+                os.makedirs(self.experiment_dir)
+        else:
+            if not os.path.exists(exp_dir):
+                raise ValueError('ExperimentDir({}) does not exist.'.format(exp_dir))
+            self.experiment_dir = exp_dir
+
+    def _path_at(self, file, auto_create=True):
+        path = os.path.abspath(os.path.join(self.experiment_dir, file))
+        if auto_create and not os.path.exists(path):
+            os.makedirs(path)
+        return path
+
+    def checkpoints_dir(self):
+        return self._path_at('checkpoints')
+
+    def exports_dir(self):
+        return self._path_at('exports')
+
+    def results_dir(self, checkpoint=None):
+        sub_dir = os.path.splitext(os.path.basename(checkpoint))[0] if checkpoint else ''
+        return self._path_at(os.path.join('results', sub_dir))
+
+    def checkpoint(self, global_step=None, wait=True):
+        def locate():
+            files = os.listdir(self.checkpoints_dir())
+            steps = []
+            for ix, file in enumerate(files):
+                step = int(file.split('_iter_')[-1].split('.')[0])
+                if global_step == step:
+                    return os.path.join(self.checkpoints_dir(), files[ix]), step
+                steps.append(step)
+            if global_step is None:
+                if len(files) == 0:
+                    return None, 0
+                last_idx = int(np.argmax(steps))
+                last_step = steps[last_idx]
+                return os.path.join(self.checkpoints_dir(), files[last_idx]), last_step
+            return None, 0
+        result = locate()
+        while result[0] is None and wait:
+            print('\rWaiting for step_{}.checkpoint to exist...'.format(global_step), end='')
+            time.sleep(10)
+            result = locate()
+        return result
+
+    def delete_experiment(self):
+        if os.path.exists(self.experiment_dir):
+            shutil.rmtree(self.experiment_dir)
--- a/lib/core/test.py
+++ b/lib/core/test.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import cv2
-from multiprocessing import Queue
-from collections import OrderedDict
-
-from lib.core.config import cfg
-from lib.datasets.factory import get_imdb
-# All detectors share the same reader/transformer during testing
-from lib.faster_rcnn.data.data_reader import DataReader
-from lib.faster_rcnn.data.data_transformer import DataTransformer
-
-
-class TestServer(object):
-    def __init__(self, output_dir):
-        self.imdb = get_imdb(cfg.TEST.DATABASE)
-        self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE)
-        self.num_images, self.num_classes, self.classes = \
-            self.imdb.num_images, self.imdb.num_classes, self.imdb.classes
-        self.data_reader = DataReader(**{'source': self.imdb.source})
-        self.data_transformer = DataTransformer()
-        self.data_reader.q_out = Queue(cfg.TEST.IMS_PER_BATCH)
-        self.data_reader.start()
-        self.gt_recs = OrderedDict()
-        self.output_dir = output_dir
-        if cfg.VIS_ON_FILE:
-            self.vis_dir = os.path.join(self.output_dir, 'vis')
-            if not os.path.exists(self.vis_dir):
-                os.makedirs(self.vis_dir)
-
-    def set_transformer(self, transformer_cls):
-        self.data_transformer = transformer_cls()
-
-    def get_image(self):
-        serialized = self.data_reader.q_out.get()
-        image = self.data_transformer.get_image(serialized)
-        image_id, objects = self.data_transformer.get_annotations(serialized)
-        self.gt_recs[image_id] = {
-            'objects': objects,
-            'width': image.shape[1],
-            'height': image.shape[0],
-        }
-        return image_id, image
-
-    def get_save_filename(self, image_id, ext='.jpg'):
-        return os.path.join(self.vis_dir, image_id + ext) \
-            if cfg.VIS_ON_FILE else None
-
-    def get_records(self):
-        if len(self.gt_recs) != self.num_images:
-            raise RuntimeError(
-                'Loading {} records, while {} required.'
-                .format(len(self.gt_recs), self.num_images),
-            )
-        return self.gt_recs
-
-    def evaluate_detections(self, all_boxes):
-        self.imdb.evaluate_detections(
-            all_boxes, self.get_records(), self.output_dir)
-
-    def evaluate_segmentations(self, all_boxes, all_masks):
-        self.imdb.evaluate_segmentations(
-            all_boxes, all_masks, self.get_records(), self.output_dir)
-
-
-class InferServer(object):
-    def __init__(self, output_dir):
-        self.images_dir = cfg.TEST.DATABASE
-        self.imdb = get_imdb('taas:/empty')
-        self.images = os.listdir(self.images_dir)
-        self.num_images, self.num_classes, self.classes = \
-            len(self.images), cfg.MODEL.NUM_CLASSES, cfg.MODEL.CLASSES
-        self.data_transformer = DataTransformer()
-        self.gt_recs = OrderedDict()
-        self.output_dir = output_dir
-        self.image_idx = 0
-        if cfg.VIS_ON_FILE:
-            self.vis_dir = os.path.join(self.output_dir, 'vis')
-            if not os.path.exists(self.vis_dir):
-                os.makedirs(self.vis_dir)
-
-    def set_transformer(self, transformer_cls):
-        self.data_transformer = transformer_cls()
-
-    def get_image(self):
-        image_name = self.images[self.image_idx]
-        image_id = image_name.split('.')[0]
-        image = cv2.imread(os.path.join(self.images_dir, image_name))
-        self.image_idx = (self.image_idx + 1) % self.num_images
-        self.gt_recs[image_id] = {
-            'width': image.shape[1],
-            'height': image.shape[0],
-        }
-        return image_id, image
-
-    def get_save_filename(self, image_id, ext='.jpg'):
-        return os.path.join(self.vis_dir, image_id + ext) \
-            if cfg.VIS_ON_FILE else None
-
-    def get_records(self):
-        if len(self.gt_recs) != self.num_images:
-            raise RuntimeError(
-                'Loading {} records, while {} required.'
-                .format(len(self.gt_recs), self.num_images),
-            )
-        return self.gt_recs
-
-    def evaluate_detections(self, all_boxes):
-        self.imdb.evaluate_detections(
-            all_boxes,
-            self.get_records(),
-            self.output_dir,
-        )
-
-    def evaluate_segmentations(self, all_boxes, all_masks):
-        self.imdb.evaluate_segmentations(
-            all_boxes,
-            all_masks,
-            self.get_records(),
-            self.output_dir,
-        )
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import multiprocessing as mp
+import os
+
+import cv2
+import dragon
+
+from lib.core.config import cfg
+from lib.datasets.factory import get_imdb
+from lib.faster_rcnn.data_transformer import DataTransformer
+
+
+class TestServer(object):
+    def __init__(self, output_dir):
+        self.imdb = get_imdb(cfg.TEST.DATABASE)
+        self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE)
+        self.num_images, self.num_classes, self.classes = \
+            self.imdb.num_images, self.imdb.num_classes, self.imdb.classes
+        self.data_reader = dragon.io.DataReader(
+            dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source))
+        self.data_transformer = DataTransformer()
+        self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH)
+        self.data_reader.start()
+        self.gt_recs = collections.OrderedDict()
+        self.output_dir = output_dir
+        if cfg.VIS_ON_FILE:
+            self.vis_dir = os.path.join(self.output_dir, 'vis')
+            if not os.path.exists(self.vis_dir):
+                os.makedirs(self.vis_dir)
+
+    def set_transformer(self, transformer_cls):
+        self.data_transformer = transformer_cls()
+
+    def get_image(self):
+        example = self.data_reader.q_out.get()
+        image = self.data_transformer.get_image(example)
+        image_id, objects = self.data_transformer.get_annotations(example)
+        self.gt_recs[image_id] = {
+            'objects': objects,
+            'width': image.shape[1],
+            'height': image.shape[0],
+        }
+        return image_id, image
+
+    def get_save_filename(self, image_id, ext='.jpg'):
+        return os.path.join(self.vis_dir, image_id + ext) \
+            if cfg.VIS_ON_FILE else None
+
+    def get_records(self):
+        if len(self.gt_recs) != self.num_images:
+            raise RuntimeError(
+                'Loading {} records, while {} required.'
+                .format(len(self.gt_recs), self.num_images),
+            )
+        return self.gt_recs
+
+    def evaluate_detections(self, all_boxes):
+        self.imdb.evaluate_detections(
+            all_boxes,
+            self.get_records(),
+            self.output_dir,
+        )
+
+    def evaluate_segmentations(self, all_boxes, all_masks):
+        self.imdb.evaluate_segmentations(
+            all_boxes,
+            all_masks,
+            self.get_records(),
+            self.output_dir,
+        )
+
+
+class InferServer(object):
+    def __init__(self, output_dir):
+        self.images_dir = cfg.TEST.DATABASE
+        self.imdb = get_imdb('taas:/empty')
+        self.images = os.listdir(self.images_dir)
+        self.num_images, self.num_classes, self.classes = \
+            len(self.images), cfg.MODEL.NUM_CLASSES, cfg.MODEL.CLASSES
+        self.data_transformer = DataTransformer()
+        self.gt_recs = collections.OrderedDict()
+        self.output_dir = output_dir
+        self.image_idx = 0
+        if cfg.VIS_ON_FILE:
+            self.vis_dir = os.path.join(self.output_dir, 'vis')
+            if not os.path.exists(self.vis_dir):
+                os.makedirs(self.vis_dir)
+
+    def set_transformer(self, transformer_cls):
+        self.data_transformer = transformer_cls()
+
+    def get_image(self):
+        image_name = self.images[self.image_idx]
+        image_id = image_name.split('.')[0]
+        image = cv2.imread(os.path.join(self.images_dir, image_name))
+        self.image_idx = (self.image_idx + 1) % self.num_images
+        self.gt_recs[image_id] = {'width': image.shape[1], 'height': image.shape[0]}
+        return image_id, image
+
+    def get_save_filename(self, image_id, ext='.jpg'):
+        return os.path.join(self.vis_dir, image_id + ext) \
+            if cfg.VIS_ON_FILE else None
+
+    def get_records(self):
+        if len(self.gt_recs) != self.num_images:
+            raise RuntimeError(
+                'Loading {} records, while {} required.'
+                .format(len(self.gt_recs), self.num_images),
+            )
+        return self.gt_recs
+
+    def evaluate_detections(self, all_boxes):
+        self.imdb.evaluate_detections(
+            all_boxes,
+            self.get_records(),
+            self.output_dir,
+        )
+
+    def evaluate_segmentations(self, all_boxes, all_masks):
+        self.imdb.evaluate_segmentations(
+            all_boxes,
+            all_masks,
+            self.get_records(),
+            self.output_dir,
+        )
--- a/lib/core/train.py
+++ b/lib/core/train.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# Codes are based on:
-#
-#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/train.py>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import datetime
-import os
-
-import dragon.vm.torch as torch
-
-from lib.core.config import cfg
-from lib.core.solver import get_solver_func
-from lib.utils import logger
-from lib.utils.stats import SmoothedValue
-from lib.utils.timer import Timer
-
-
-class SolverWrapper(object):
-    def __init__(self, coordinator):
-        self.output_dir = coordinator.checkpoints_dir()
-        self.solver = get_solver_func('MomentumSGD')()
-
-        # Load the pre-trained weights
-        init_weights = cfg.TRAIN.WEIGHTS
-        if init_weights != '':
-            if os.path.exists(init_weights):
-                logger.info('Loading weights from {}.'.format(init_weights))
-                self.solver.detector.load_weights(init_weights)
-            else:
-                raise ValueError('Invalid path of weights: {}'.format(init_weights))
-
-        # Mixed precision training?
-        if cfg.MODEL.DATA_TYPE.lower() == 'float16':
-            self.solver.detector.half()  # Powerful FP16 Support
-
-        self.solver.detector.cuda(cfg.GPU_ID)
-
-        # Plan the metrics
-        self.metrics = collections.OrderedDict()
-        if cfg.ENABLE_TENSOR_BOARD:
-            from dragon.tools.tensorboard import TensorBoard
-            self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs')
-
-    def snapshot(self):
-        if not logger.is_root():
-            return None
-        filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}'
-                    .format(self.solver.iter) + '.pth')
-        filename = os.path.join(self.output_dir, filename)
-        torch.save(self.solver.detector.state_dict(), filename)
-        logger.info('Wrote snapshot to: {:s}'.format(filename))
-        return filename
-
-    def add_metrics(self, stats):
-        for k, v in stats['loss'].items():
-            if k not in self.metrics:
-                self.metrics[k] = SmoothedValue(20)
-            self.metrics[k].AddValue(v)
-
-    def send_metrics(self, stats):
-        if hasattr(self, 'board'):
-            self.board.scalar_summary('lr', stats['lr'], stats['iter'])
-            self.board.scalar_summary('time', stats['time'], stats['iter'])
-            for k, v in self.metrics.items():
-                if k == 'total':
-                    self.board.scalar_summary(
-                        'total_loss',
-                        v.GetMedianValue(),
-                        stats['iter'],
-                    )
-                else:
-                    self.board.scalar_summary(
-                        k,
-                        v.GetMedianValue(),
-                        stats['iter'],
-                    )
-
-    def step(self, display=False):
-        stats = self.solver.one_step()
-        self.add_metrics(stats)
-        self.send_metrics(stats)
-        if display:
-            logger.info(
-                'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (
-                    stats['iter'], stats['lr'],
-                    self.metrics['total'].GetMedianValue(),
-                    stats['time'],
-                )
-            )
-            for k, v in self.metrics.items():
-                if k == 'total':
-                    continue
-                logger.info(' ' * 10 + 'Train net output({}): {}'
-                            .format(k, v.GetMedianValue()))
-
-    def train_model(self):
-        """Network training loop."""
-        last_snapshot_iter = -1
-        timer = Timer()
-        model_paths = []
-        start_lr = self.solver.base_lr
-        while self.solver.iter < cfg.SOLVER.MAX_ITERS:
-            if self.solver.iter < cfg.SOLVER.WARM_UP_ITERS:
-                alpha = (self.solver.iter + 1.0) / cfg.SOLVER.WARM_UP_ITERS
-                self.solver.base_lr = \
-                    start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha)
-
-            # Apply 1-step SGD update
-            with timer.tic_and_toc():
-                self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0)
-
-            if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0:
-                average_time = timer.average_time
-                eta_seconds = average_time * (
-                    cfg.SOLVER.MAX_ITERS - self.solver.iter)
-                eta = str(datetime.timedelta(seconds=int(eta_seconds)))
-                progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS
-                logger.info(
-                    '< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
-                    .format(progress, timer.average_time, eta)
-                )
-
-            if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0:
-                last_snapshot_iter = self.solver.iter
-                model_paths.append(self.snapshot())
-
-        if last_snapshot_iter != self.solver.iter:
-            model_paths.append(self.snapshot())
-        return model_paths
-
-
-def train_net(coordinator, start_iter=0):
-    sw = SolverWrapper(coordinator)
-    sw.solver.iter = start_iter
-    logger.info('Solving...')
-    model_paths = sw.train_model()
-    return model_paths
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/train.py>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import datetime
+import os
+
+import dragon.vm.torch as torch
+
+from lib.core.config import cfg
+from lib.core.solver import get_solver_func
+from lib.utils import logger
+from lib.utils.stats import SmoothedValue
+from lib.utils.timer import Timer
+
+
+class SolverWrapper(object):
+    def __init__(self, coordinator):
+        self.output_dir = coordinator.checkpoints_dir()
+        self.solver = get_solver_func('MomentumSGD')()
+
+        # Load the pre-trained weights
+        init_weights = cfg.TRAIN.WEIGHTS
+        if init_weights != '':
+            if os.path.exists(init_weights):
+                logger.info('Loading weights from {}.'.format(init_weights))
+                self.solver.detector.load_weights(init_weights)
+            else:
+                raise ValueError('Invalid path of weights: {}'.format(init_weights))
+
+        # Mixed precision training?
+        if cfg.MODEL.DATA_TYPE.lower() == 'float16':
+            self.solver.detector.half()  # Powerful FP16 Support
+
+        self.solver.detector.cuda(cfg.GPU_ID)
+
+        # Plan the metrics
+        self.metrics = collections.OrderedDict()
+        if cfg.ENABLE_TENSOR_BOARD:
+            from dragon.tools.tensorboard import TensorBoard
+            self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs')
+
+    def snapshot(self):
+        if not logger.is_root():
+            return None
+        filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}'
+                    .format(self.solver.iter) + '.pth')
+        filename = os.path.join(self.output_dir, filename)
+        torch.save(self.solver.detector.state_dict(), filename)
+        logger.info('Wrote snapshot to: {:s}'.format(filename))
+        return filename
+
+    def add_metrics(self, stats):
+        for k, v in stats['loss'].items():
+            if k not in self.metrics:
+                self.metrics[k] = SmoothedValue(20)
+            self.metrics[k].AddValue(v)
+
+    def send_metrics(self, stats):
+        if hasattr(self, 'board'):
+            self.board.scalar_summary('lr', stats['lr'], stats['iter'])
+            self.board.scalar_summary('time', stats['time'], stats['iter'])
+            for k, v in self.metrics.items():
+                if k == 'total':
+                    self.board.scalar_summary(
+                        'total_loss',
+                        v.GetMedianValue(),
+                        stats['iter'],
+                    )
+                else:
+                    self.board.scalar_summary(
+                        k,
+                        v.GetMedianValue(),
+                        stats['iter'],
+                    )
+
+    def step(self, display=False):
+        stats = self.solver.one_step()
+        self.add_metrics(stats)
+        self.send_metrics(stats)
+        if display:
+            logger.info(
+                'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (
+                    stats['iter'], stats['lr'],
+                    self.metrics['total'].GetMedianValue(),
+                    stats['time'],
+                )
+            )
+            for k, v in self.metrics.items():
+                if k == 'total':
+                    continue
+                logger.info(' ' * 10 + 'Train net output({}): {}'
+                            .format(k, v.GetMedianValue()))
+
+    def train_model(self):
+        """Network training loop."""
+        last_snapshot_iter = -1
+        timer = Timer()
+        model_paths = []
+        start_lr = self.solver.base_lr
+        while self.solver.iter < cfg.SOLVER.MAX_ITERS:
+            if self.solver.iter < cfg.SOLVER.WARM_UP_ITERS:
+                alpha = (self.solver.iter + 1.0) / cfg.SOLVER.WARM_UP_ITERS
+                self.solver.base_lr = \
+                    start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha)
+
+            # Apply 1-step SGD update
+            with timer.tic_and_toc():
+                self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0)
+
+            if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0:
+                average_time = timer.average_time
+                eta_seconds = average_time * (
+                    cfg.SOLVER.MAX_ITERS - self.solver.iter)
+                eta = str(datetime.timedelta(seconds=int(eta_seconds)))
+                progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS
+                logger.info(
+                    '< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
+                    .format(progress, timer.average_time, eta)
+                )
+
+            if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0:
+                last_snapshot_iter = self.solver.iter
+                model_paths.append(self.snapshot())
+
+        if last_snapshot_iter != self.solver.iter:
+            model_paths.append(self.snapshot())
+        return model_paths
+
+
+def train_net(coordinator, start_iter=0):
+    sw = SolverWrapper(coordinator)
+    sw.solver.iter = start_iter
+    logger.info('Solving...')
+    model_paths = sw.train_model()
+    return model_paths
--- a/lib/datasets/factory.py
+++ b/lib/datasets/factory.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# Codes are based on:
-#
-#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/factory.py>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-from lib.datasets.taas import TaaS
-
-
-# TaaS DataSet
-_GLOBAL_DATA_SETS = {'taas': lambda source: TaaS(source)}
-
-
-def get_imdb(name):
-    """Get an imdb (image database) by name."""
-    keys = name.split(':')
-    if len(keys) >= 2:
-        cls, source = keys[0], ':'.join(keys[1:])
-        if cls not in _GLOBAL_DATA_SETS:
-            raise KeyError('Unknown DataSet: {}'.format(cls))
-        return _GLOBAL_DATA_SETS[cls](source)
-    elif os.path.exists(name):
-            return _GLOBAL_DATA_SETS['taas'](name)
-    else:
-        raise ValueError('Illegal Database: {}' + name)
-
-
-def list_imdbs():
-    """List all registered imdbs."""
-    return _GLOBAL_DATA_SETS.keys()
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/factory.py>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+from lib.datasets.taas import TaaS
+
+
+# TaaS DataSet
+_GLOBAL_DATA_SETS = {'taas': lambda source: TaaS(source)}
+
+
+def get_imdb(name):
+    """Get an imdb (image database) by name."""
+    keys = name.split(':')
+    if len(keys) >= 2:
+        cls, source = keys[0], ':'.join(keys[1:])
+        if cls not in _GLOBAL_DATA_SETS:
+            raise KeyError('Unknown DataSet: {}'.format(cls))
+        return _GLOBAL_DATA_SETS[cls](source)
+    elif os.path.exists(name):
+            return _GLOBAL_DATA_SETS['taas'](name)
+    else:
+        raise ValueError('Illegal Database: {}' + name)
+
+
+def list_imdbs():
+    """List all registered imdbs."""
+    return _GLOBAL_DATA_SETS.keys()
--- a/lib/datasets/imdb.py
+++ b/lib/datasets/imdb.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# Codes are based on:
-#
-#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/imdb.py>
-#
-# ------------------------------------------------------------
-
-import os
-from dragon.tools.db import LMDB
-
-from lib.core.config import cfg
-
-
-class imdb(object):
-    def __init__(self, name):
-        self._name = name
-        self._num_classes = 0
-        self._classes = []
-
-    @property
-    def name(self):
-        return self._name
-
-    @property
-    def num_classes(self):
-        return len(self._classes)
-
-    @property
-    def classes(self):
-        return self._classes
-
-    @property
-    def cache_path(self):
-        cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache'))
-        if not os.path.exists(cache_path):
-            os.makedirs(cache_path)
-        return cache_path
-
-    @property
-    def source(self):
-        excepted_source = os.path.join(self.cache_path, self.name + '_lmdb')
-        if not os.path.exists(excepted_source):
-            raise RuntimeError('Excepted LMDB source from: {}, '
-                               'but it is not existed.'.format(excepted_source))
-        return excepted_source
-
-    @property
-    def num_images(self):
-        self._db = LMDB()
-        self._db.open(self.source)
-        num_entries = self._db.num_entries()
-        self._db.close()
-        return num_entries
-
-    def evaluate_detections(self, all_boxes, gt_recs, output_dir):
-        pass
-
-    def evaluate_masks(self, all_boxes, all_masks, output_dir):
-        pass
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/imdb.py>
+#
+# ------------------------------------------------------------
+
+import os
+import dragon
+
+from lib.core.config import cfg
+
+
+class imdb(object):
+    def __init__(self, name):
+        self._name = name
+        self._num_classes = 0
+        self._classes = []
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def num_classes(self):
+        return len(self._classes)
+
+    @property
+    def classes(self):
+        return self._classes
+
+    @property
+    def cache_path(self):
+        cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache'))
+        if not os.path.exists(cache_path):
+            os.makedirs(cache_path)
+        return cache_path
+
+    @property
+    def source(self):
+        excepted_source = os.path.join(self.cache_path, self.name)
+        if not os.path.exists(excepted_source):
+            raise RuntimeError(
+                'Excepted source from: {}, '
+                'but it is not existed.'
+                .format(excepted_source)
+            )
+        return excepted_source
+
+    @property
+    def num_images(self):
+        return dragon.io.SeetaRecordDataset(self.source).size
+
+    def evaluate_detections(self, all_boxes, gt_recs, output_dir):
+        pass
+
+    def evaluate_masks(self, all_boxes, all_masks, output_dir):
+        pass
--- a/lib/datasets/taas.py
+++ b/lib/datasets/taas.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# Codes are based on:
-#
-#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import sys
-import json
-import numpy as np
-import uuid
-import cv2
-try:
-    import cPickle
-except:
-    import pickle as cPickle
-from .imdb import imdb
-from .voc_eval import voc_bbox_eval, voc_segm_eval
-
-from lib.core.config import cfg
-from lib.utils import boxes as box_utils
-from lib.pycocotools.mask import encode as encode_masks
-
-
-class TaaS(imdb):
-    def __init__(self, source):
-        imdb.__init__(self, 'taas')
-        self._classes = cfg.MODEL.CLASSES
-        self._source = source
-        self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
-        self._class_to_cat_id = self._class_to_ind
-        self._salt = str(uuid.uuid4())
-        self.config = {'cleanup': True, 'use_salt': True}
-
-    @property
-    def source(self):
-        excepted_source = self._source
-        if not os.path.exists(excepted_source):
-            raise RuntimeError('Excepted LMDB source from: {}, '
-                               'but it is not existed.'.format(excepted_source))
-        return excepted_source
-
-    ##############################################
-    #                                            #
-    #                   UTILS                    #
-    #                                            #
-    ##############################################
-
-    def _get_comp_id(self):
-        return '_' + self._salt if self.config['use_salt'] else ''
-
-    @classmethod
-    def _get_prefix(cls, type='bbox'):
-        if type == 'bbox':
-            return 'detections_'
-        elif type == 'segm':
-            return 'segmentations_'
-        elif type == 'kpt':
-            return 'keypoints_'
-        return ''
-
-    def _get_voc_results_T(self, results_folder, type='bbox'):
-        # experiments/model_id/results/detections_taas_<comp_id>_aeroplane.txt
-        if type == 'bbox':
-            filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.txt'
-        elif type == 'segm':
-            filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.pkl'
-        else:
-            raise ValueError('Type of results can be either bbox or segm.')
-        if not os.path.exists(results_folder):
-            os.makedirs(results_folder)
-        return os.path.join(results_folder, filename)
-
-    def _get_coco_annotations_T(self, results_folder, type='bbox'):
-        # experiments/model_id/annotations/[GT]detections_taas_<comp_id>.json
-        filename = '[GT]_' + self._get_prefix(type) + self._name + '.json'
-        if not os.path.exists(results_folder):
-            os.makedirs(results_folder)
-        return os.path.join(results_folder, filename)
-
-    def _get_coco_results_T(self, results_folder, type='bbox'):
-        # experiments/model_id/results/detections_taas_<comp_id>.json
-        filename = self._get_prefix(type) + self._name + self._get_comp_id() + '.json'
-        if not os.path.exists(results_folder):
-            os.makedirs(results_folder)
-        return os.path.join(results_folder, filename)
-
-    ##############################################
-    #                                            #
-    #                    VOC                     #
-    #                                            #
-    ##############################################
-
-    def _write_xml_bbox_results(self, all_boxes, gt_recs, output_dir):
-        from xml.dom import minidom
-        import xml.etree.ElementTree as ET
-        ix = 0
-        for image_id, rec in gt_recs.items():
-            root = ET.Element('annotation')
-            ET.SubElement(root, 'filename').text = str(image_id)
-            for cls_ind, cls in enumerate(self.classes):
-                if cls == '__background__':
-                    continue
-                detections = all_boxes[cls_ind][ix]
-                if len(detections) == 0:
-                    continue
-                for k in range(detections.shape[0]):
-                    if detections[k, -1] < cfg.VIS_TH:
-                        continue
-                    object = ET.SubElement(root, 'object')
-                    ET.SubElement(object, 'name').text = cls
-                    ET.SubElement(object, 'difficult').text = '0'
-                    bnd_box = ET.SubElement(object, 'bndbox')
-                    ET.SubElement(bnd_box, 'xmin').text = str(detections[k][0])
-                    ET.SubElement(bnd_box, 'ymin').text = str(detections[k][1])
-                    ET.SubElement(bnd_box, 'xmax').text = str(detections[k][2])
-                    ET.SubElement(bnd_box, 'ymax').text = str(detections[k][3])
-            ix += 1
-            rawText = ET.tostring(root)
-            dom = minidom.parseString(rawText)
-            with open('{}/{}.xml'.format(output_dir, image_id), 'w') as f:
-                dom.writexml(f, "", "\t", "\n", "utf-8")
-
-    def _write_voc_bbox_results(self, all_boxes, gt_recs, output_dir):
-        for cls_ind, cls in enumerate(self.classes):
-            if cls == '__background__':
-                continue
-            print('Writing {} VOC format bbox results'.format(cls))
-            filename = self._get_voc_results_T(output_dir).format(cls)
-            with open(filename, 'wt') as f:
-                ix = 0
-                for image_id, rec in gt_recs.items():
-                    dets = all_boxes[cls_ind][ix]
-                    ix += 1
-                    if len(dets) == 0:
-                        continue
-                    for k in range(dets.shape[0]):
-                        f.write(
-                            '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'
-                            .format(image_id, dets[k, -1],
-                                    dets[k, 0] + 1, dets[k, 1] + 1,
-                                    dets[k, 2] + 1, dets[k, 3] + 1))
-
-    def _write_voc_segm_results(self, all_boxes, all_masks, output_dir):
-        for cls_inds, cls in enumerate(self.classes):
-            if cls == '__background__':
-                continue
-            print('Writing {} VOC format segm results'.format(cls))
-            segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
-            bbox_filename = segm_filename.replace('segmentations', 'detections')
-            with open(bbox_filename, 'wb') as f:
-                cPickle.dump(all_boxes[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
-            with open(segm_filename, 'wb') as f:
-                cPickle.dump(all_masks[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
-
-    def _do_voc_bbox_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
-        aps = []
-        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
-        for i, cls in enumerate(self._classes):
-            if cls == '__background__':
-                continue
-            det_file = self._get_voc_results_T(output_dir).format(cls)
-            rec, prec, ap = voc_bbox_eval(
-                det_file, gt_recs, cls,
-                IoU=IoU, use_07_metric=use_07_metric,
-            )
-            if ap > 0:
-                aps += [ap]
-            print('AP for {} = {:.4f}'.format(cls, ap))
-        print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
-
-    def _do_voc_segm_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
-        aps = []
-        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
-        for i, cls in enumerate(self.classes):
-            if cls == '__background__':
-                continue
-            segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
-            bbox_filename = segm_filename.replace('segmentations', 'detections')
-            ap = voc_segm_eval(
-                bbox_filename, segm_filename, gt_recs, cls,
-                IoU=IoU, use_07_metric=use_07_metric,
-            )
-            if ap > 0:
-                aps += [ap]
-            print('AP for {} = {:.4f}'.format(cls, ap))
-        print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
-
-    ##############################################
-    #                                            #
-    #                    COCO                    #
-    #                                            #
-    ##############################################
-
-    @classmethod
-    def _get_coco_image_id(cls, image_name):
-        image_id = image_name.split('_')[-1].split('.')[0]
-        try:
-            return int(image_id)
-        except:
-            return image_name
-
-    @classmethod
-    def _encode_coco_masks(cls, masks, boxes, im_h, im_w):
-        num_pred = len(boxes)
-        assert len(masks) == num_pred
-        mask_image = np.zeros((im_h, im_w, num_pred), dtype=np.uint8, order='F')
-        M = masks[0].shape[0]
-        scale = (M + 2.0) / M
-        ref_boxes = box_utils.expand_boxes(boxes, scale)
-        ref_boxes = ref_boxes.astype(np.int32)
-        padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
-        for i in range(num_pred):
-            ref_box = ref_boxes[i, :4]
-            mask = masks[i]
-            padded_mask[1:-1, 1:-1] = mask[:, :]
-            w = ref_box[2] - ref_box[0] + 1
-            h = ref_box[3] - ref_box[1] + 1
-            w = np.maximum(w, 1)
-            h = np.maximum(h, 1)
-            mask = cv2.resize(padded_mask, (w, h))
-            mask = np.array(mask > cfg.TEST.BINARY_THRESH, dtype=np.uint8)
-            x1 = max(ref_box[0], 0)
-            y1 = max(ref_box[1], 0)
-            x2 = min(ref_box[2] + 1, im_w)
-            y2 = min(ref_box[3] + 1, im_h)
-            mask_image[y1:y2, x1:x2, i] = \
-                mask[(y1 - ref_box[1]):(y2 - ref_box[1]),
-                     (x1 - ref_box[0]):(x2 - ref_box[0])]
-        return encode_masks(mask_image)
-
-    def _write_coco_bbox_annotations(self, gt_recs, output_dir):
-        # Build images
-        dataset = {'images': []}
-        for image_name, rec in gt_recs.items():
-            dataset['images'].append({
-                'file_name': image_name + '.jpg',
-                'id': self._get_coco_image_id(image_name),
-                'height': rec['height'], 'width': rec['width'],
-            })
-        # Build categories
-        dataset['categories'] = []
-        for cls in self._classes:
-            if cls == '__background__':
-                continue
-            dataset['categories'].append({
-                'name': cls,
-                'id': self._class_to_ind[cls],
-            })
-        # Build annotations
-        dataset['annotations'] = []
-        ann_id = 0
-        for image_name, rec in gt_recs.items():
-            for obj in rec['objects']:
-                x, y = obj['bbox'][0], obj['bbox'][1]
-                w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
-                dataset['annotations'].append({
-                    'id': str(ann_id),
-                    'bbox': [x, y, w, h],
-                    'area': w * h,
-                    'iscrowd': obj['difficult'],
-                    'image_id': self._get_coco_image_id(image_name),
-                    'category_id': self._class_to_ind[obj['name']],
-                })
-                ann_id += 1
-        ann_file = self._get_coco_annotations_T(output_dir, type='bbox')
-        with open(ann_file, 'w') as f:
-            json.dump(dataset, f)
-        return ann_file
-
-    def _write_coco_segm_annotations(self, gt_recs, output_dir):
-        # Build images
-        dataset = {'images': []}
-        for image_name, rec in gt_recs.items():
-            dataset['images'].append({
-                'file_name': image_name + '.jpg',
-                'id': self._get_coco_image_id(image_name),
-                'height': rec['height'], 'width': rec['width'],
-            })
-        # Build categories
-        dataset['categories'] = []
-        for cls in self._classes:
-            if cls == '__background__':
-                continue
-            dataset['categories'].append({
-                'name': cls,
-                'id': self._class_to_ind[cls],
-            })
-        # Build annotations
-        dataset['annotations'] = []
-        ann_id = 0
-        for image_name, rec in gt_recs.items():
-            for obj in rec['objects']:
-                x, y = obj['bbox'][0], obj['bbox'][1]
-                w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
-                dataset['annotations'].append({
-                    'id': str(ann_id),
-                    'bbox': [x, y, w, h],
-                    'area': w * h,
-                    'segmentation': {
-                        'size': [rec['height'], rec['width']],
-                        'counts': obj['mask'],
-                    },
-                    'iscrowd': obj['difficult'],
-                    'image_id': self._get_coco_image_id(image_name),
-                    'category_id': self._class_to_ind[obj['name']],
-                })
-                ann_id += 1
-        ann_file = self._get_coco_annotations_T(output_dir, type='segm')
-        with open(ann_file, 'w') as f:
-            json.dump(dataset, f)
-        return ann_file
-
-    def _coco_bbox_results_one_category(self, boxes, cat_id, gt_recs):
-        ix, results = 0, []
-        for image_name, rec in gt_recs.items():
-            dets = boxes[ix]
-            ix += 1
-            if isinstance(dets, list) and len(dets) == 0:
-                continue
-            dets = dets.astype(np.float)
-            scores = dets[:, -1]
-            xs = dets[:, 0]
-            ys = dets[:, 1]
-            ws = dets[:, 2] - xs + 1
-            hs = dets[:, 3] - ys + 1
-            results.extend(
-                [{'image_id': self._get_coco_image_id(image_name),
-                  'category_id': cat_id,
-                  'bbox': [xs[k], ys[k], ws[k], hs[k]],
-                  'score': scores[k],
-                  } for k in range(dets.shape[0])]
-            )
-        return results
-
-    def _coco_segm_results_one_category(self, boxes, masks, cat_id, gt_recs):
-        def filter_boxes(dets):
-            boxes = dets[:, :4]
-            ws = boxes[:, 2] - boxes[:, 0]
-            hs = boxes[:, 3] - boxes[:, 1]
-            keep = np.where((ws >= 1) & (hs >= 1))[0]
-            return keep
-        results = []
-        ix = 0
-        for image_name, rec in gt_recs.items():
-            dets = boxes[ix].astype(np.float)
-            msks = masks[ix]
-            ix += 1
-            keep = filter_boxes(dets)
-            im_h, im_w = rec['height'], rec['width']
-            if len(keep) == 0:
-                continue
-            scores = dets[:, -1]
-            mask_encode = self._encode_coco_masks(
-                msks[keep], dets[keep, :4], im_h, im_w)
-            for k in range(dets[keep].shape[0]):
-                rle = mask_encode[k]
-                if sys.version_info >= (3, 0):
-                    rle['counts'] = rle['counts'].decode()
-                results.append({
-                    'image_id': self._get_coco_image_id(image_name),
-                    'category_id': cat_id,
-                    'segmentation': rle,
-                    'score': scores[k],
-                })
-        return results
-
-    def _write_coco_bbox_results(self, all_boxes, gt_recs, output_dir):
-        filename = self._get_coco_results_T(output_dir)
-        results = []
-        for cls_ind, cls in enumerate(self.classes):
-            if cls == '__background__':
-                continue
-            print('Collecting {} results ({:d}/{:d})'
-                  .format(cls, cls_ind, self.num_classes - 1))
-            cat_id = self._class_to_cat_id[cls]
-            results.extend(self._coco_bbox_results_one_category(
-                all_boxes[cls_ind], cat_id, gt_recs))
-        print('Writing results json to {}'.format(filename))
-        with open(filename, 'w') as fid:
-            json.dump(results, fid)
-        return filename
-
-    def _write_coco_segm_results(self, all_boxes, all_masks, gt_recs, output_dir):
-        filename = self._get_coco_results_T(output_dir, type='segm')
-        results = []
-        for cls_ind, cls in enumerate(self.classes):
-            if cls == '__background__':
-                continue
-            print('Collecting {} results ({:d}/{:d})'
-                  .format(cls, cls_ind, self.num_classes - 1))
-            cat_id = self._class_to_cat_id[cls]
-            results.extend(self._coco_segm_results_one_category(
-                all_boxes[cls_ind], all_masks[cls_ind], cat_id, gt_recs))
-        print('Writing results json to {}'.format(filename))
-        with open(filename, 'w') as fid:
-            json.dump(results, fid)
-        return filename
-
-    def _do_coco_bbox_eval(self, coco, res_file):
-        from lib.pycocotools.cocoeval import COCOeval
-        coco_dt = coco.loadRes(res_file)
-        coco_eval = COCOeval(coco, coco_dt, 'bbox')
-        coco_eval.evaluate()
-        coco_eval.accumulate()
-        self._print_coco_eval_results(coco_eval)
-
-    def _do_coco_segm_eval(self, coco, res_file):
-        from lib.pycocotools.cocoeval import COCOeval
-        coco_dt = coco.loadRes(res_file)
-        coco_eval = COCOeval(coco, coco_dt, 'segm')
-        coco_eval.evaluate()
-        coco_eval.accumulate()
-        self._print_coco_eval_results(coco_eval)
-
-    def _print_coco_eval_results(self, coco_eval):
-        IoU_lo_thresh = 0.5
-        IoU_hi_thresh = 0.95
-
-        def _get_thr_ind(coco_eval, thr):
-            ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
-                           (coco_eval.params.iouThrs < thr + 1e-5))[0][0]
-            iou_thr = coco_eval.params.iouThrs[ind]
-            assert np.isclose(iou_thr, thr)
-            return ind
-
-        ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
-        ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)
-
-        # Precision has dims (iou, recall, cls, area range, max dets)
-        # Area range index 0: all area ranges
-        # Max dets index 2: 100 per image
-        precision = \
-            coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
-        ap_default = np.mean(precision[precision > -1])
-        print('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
-              '~~~~'.format(IoU_lo_thresh, IoU_hi_thresh))
-        print('{:.1f}'.format(100 * ap_default))
-        for cls_ind, cls in enumerate(self.classes):
-            if cls == '__background__':
-                continue
-            # Minus 1 because of __background__
-            precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
-            ap = np.mean(precision[precision > -1])
-            print('{:.1f}'.format(100 * ap))
-
-        print('~~~~ Summary metrics ~~~~')
-        coco_eval.summarize()
-
-    ##############################################
-    #                                            #
-    #                    EVAL-API                #
-    #                                            #
-    ##############################################
-
-    def evaluate_detections(self, all_boxes, gt_recs, output_dir):
-        protocol = cfg.TEST.PROTOCOL
-        if 'voc' in protocol:
-            self._write_voc_bbox_results(all_boxes, gt_recs, output_dir)
-            if 'wo' not in protocol:
-                print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
-                self._do_voc_bbox_eval(
-                    gt_recs, output_dir, IoU=0.5,
-                    use_07_metric='2007' in protocol)
-                print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
-                self._do_voc_bbox_eval(
-                    gt_recs, output_dir, IoU=0.7,
-                    use_07_metric='2007' in protocol)
-        elif 'xml' in protocol:
-            if cfg.EXP_DIR != '':
-                output_dir = cfg.EXP_DIR
-            self._write_xml_bbox_results(all_boxes, gt_recs, output_dir)
-        elif 'coco' in protocol:
-            from lib.pycocotools.coco import COCO
-            if os.path.exists(cfg.TEST.JSON_FILE):
-                coco = COCO(cfg.TEST.JSON_FILE)
-                # We should override category id before writing results
-                cats = coco.loadCats(coco.getCatIds())
-                self._class_to_cat_id = dict(zip(
-                    [c['name'] for c in cats], coco.getCatIds()))
-            else:
-                coco = None
-            res_file = self._write_coco_bbox_results(
-                all_boxes, gt_recs, output_dir)
-            if 'wo' not in protocol:
-                if coco is None:
-                    ann_file = self._write_coco_bbox_annotations(gt_recs, output_dir)
-                    coco = COCO(ann_file)
-                self._do_coco_bbox_eval(coco, res_file)
-
-    def evaluate_segmentations(self, all_boxes, all_masks, gt_recs, output_dir):
-        protocol = cfg.TEST.PROTOCOL
-        if 'voc' in protocol:
-            self._write_voc_segm_results(all_boxes, all_masks, output_dir)
-            if 'wo' not in protocol:
-                print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
-                self._do_voc_segm_eval(
-                    gt_recs, output_dir, IoU=0.5,
-                    use_07_metric='2007' in protocol)
-                print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
-                self._do_voc_segm_eval(
-                    gt_recs, output_dir, IoU=0.7,
-                    use_07_metric='2007' in protocol)
-        elif 'coco' in protocol:
-            from lib.pycocotools.coco import COCO
-            if os.path.exists(cfg.TEST.JSON_FILE):
-                coco = COCO(cfg.TEST.JSON_FILE)
-                # We should override category id before writing results
-                cats = coco.loadCats(coco.getCatIds())
-                self._class_to_cat_id = dict(
-                    zip([c['name'] for c in cats], coco.getCatIds()))
-            else:
-                coco = None
-            res_file = self._write_coco_segm_results(all_boxes, all_masks, gt_recs, output_dir)
-            if 'wo' not in protocol:
-                if coco is None:
-                    coco = COCO(self._write_coco_segm_annotations(gt_recs, output_dir))
-                self._do_coco_segm_eval(coco, res_file)
-
-    def competition_mode(self, on):
-        if on:
-            self.config['use_salt'] = False
-            self.config['cleanup'] = False
-        else:
-            self.config['use_salt'] = True
-            self.config['cleanup'] = True
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import os
+import sys
+import uuid
+
+import cv2
+import numpy as np
+try:
+    import cPickle
+except:
+    import pickle as cPickle
+
+from lib.core.config import cfg
+from lib.datasets.imdb import imdb
+from lib.datasets.voc_eval import voc_bbox_eval
+from lib.datasets.voc_eval import voc_segm_eval
+from lib.pycocotools.mask import encode as encode_masks
+from lib.utils import boxes as box_utils
+
+
+class TaaS(imdb):
+    def __init__(self, source):
+        imdb.__init__(self, 'taas')
+        self._classes = cfg.MODEL.CLASSES
+        self._source = source
+        self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
+        self._class_to_cat_id = self._class_to_ind
+        self._salt = str(uuid.uuid4())
+        self.config = {'cleanup': True, 'use_salt': True}
+
+    @property
+    def source(self):
+        excepted_source = self._source
+        if not os.path.exists(excepted_source):
+            raise RuntimeError(
+                'Excepted source from: {}, '
+                'but it is not existed.'
+                .format(excepted_source)
+            )
+        return excepted_source
+
+    ##############################################
+    #                                            #
+    #                   UTILS                    #
+    #                                            #
+    ##############################################
+
+    def _get_comp_id(self):
+        return '_' + self._salt if self.config['use_salt'] else ''
+
+    @classmethod
+    def _get_prefix(cls, type='bbox'):
+        if type == 'bbox':
+            return 'detections_'
+        elif type == 'segm':
+            return 'segmentations_'
+        elif type == 'kpt':
+            return 'keypoints_'
+        return ''
+
+    def _get_voc_results_T(self, results_folder, type='bbox'):
+        # experiments/model_id/results/detections_taas_<comp_id>_aeroplane.txt
+        if type == 'bbox':
+            filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.txt'
+        elif type == 'segm':
+            filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.pkl'
+        else:
+            raise ValueError('Type of results can be either bbox or segm.')
+        if not os.path.exists(results_folder):
+            os.makedirs(results_folder)
+        return os.path.join(results_folder, filename)
+
+    def _get_coco_annotations_T(self, results_folder, type='bbox'):
+        # experiments/model_id/annotations/[GT]detections_taas_<comp_id>.json
+        filename = '[GT]_' + self._get_prefix(type) + self._name + '.json'
+        if not os.path.exists(results_folder):
+            os.makedirs(results_folder)
+        return os.path.join(results_folder, filename)
+
+    def _get_coco_results_T(self, results_folder, type='bbox'):
+        # experiments/model_id/results/detections_taas_<comp_id>.json
+        filename = self._get_prefix(type) + self._name + self._get_comp_id() + '.json'
+        if not os.path.exists(results_folder):
+            os.makedirs(results_folder)
+        return os.path.join(results_folder, filename)
+
+    ##############################################
+    #                                            #
+    #                    VOC                     #
+    #                                            #
+    ##############################################
+
+    def _write_xml_bbox_results(self, all_boxes, gt_recs, output_dir):
+        from xml.dom import minidom
+        import xml.etree.ElementTree as ET
+        ix = 0
+        for image_id, rec in gt_recs.items():
+            root = ET.Element('annotation')
+            ET.SubElement(root, 'filename').text = str(image_id)
+            for cls_ind, cls in enumerate(self.classes):
+                if cls == '__background__':
+                    continue
+                detections = all_boxes[cls_ind][ix]
+                if len(detections) == 0:
+                    continue
+                for k in range(detections.shape[0]):
+                    if detections[k, -1] < cfg.VIS_TH:
+                        continue
+                    object = ET.SubElement(root, 'object')
+                    ET.SubElement(object, 'name').text = cls
+                    ET.SubElement(object, 'difficult').text = '0'
+                    bnd_box = ET.SubElement(object, 'bndbox')
+                    ET.SubElement(bnd_box, 'xmin').text = str(detections[k][0])
+                    ET.SubElement(bnd_box, 'ymin').text = str(detections[k][1])
+                    ET.SubElement(bnd_box, 'xmax').text = str(detections[k][2])
+                    ET.SubElement(bnd_box, 'ymax').text = str(detections[k][3])
+            ix += 1
+            rawText = ET.tostring(root)
+            dom = minidom.parseString(rawText)
+            with open('{}/{}.xml'.format(output_dir, image_id), 'w') as f:
+                dom.writexml(f, "", "\t", "\n", "utf-8")
+
+    def _write_voc_bbox_results(self, all_boxes, gt_recs, output_dir):
+        for cls_ind, cls in enumerate(self.classes):
+            if cls == '__background__':
+                continue
+            print('Writing {} VOC format bbox results'.format(cls))
+            filename = self._get_voc_results_T(output_dir).format(cls)
+            with open(filename, 'wt') as f:
+                ix = 0
+                for image_id, rec in gt_recs.items():
+                    dets = all_boxes[cls_ind][ix]
+                    ix += 1
+                    if len(dets) == 0:
+                        continue
+                    for k in range(dets.shape[0]):
+                        f.write(
+                            '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'
+                            .format(image_id, dets[k, -1],
+                                    dets[k, 0] + 1, dets[k, 1] + 1,
+                                    dets[k, 2] + 1, dets[k, 3] + 1))
+
+    def _write_voc_segm_results(self, all_boxes, all_masks, output_dir):
+        for cls_inds, cls in enumerate(self.classes):
+            if cls == '__background__':
+                continue
+            print('Writing {} VOC format segm results'.format(cls))
+            segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
+            bbox_filename = segm_filename.replace('segmentations', 'detections')
+            with open(bbox_filename, 'wb') as f:
+                cPickle.dump(all_boxes[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
+            with open(segm_filename, 'wb') as f:
+                cPickle.dump(all_masks[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
+
+    def _do_voc_bbox_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
+        aps = []
+        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
+        for i, cls in enumerate(self._classes):
+            if cls == '__background__':
+                continue
+            det_file = self._get_voc_results_T(output_dir).format(cls)
+            rec, prec, ap = voc_bbox_eval(
+                det_file, gt_recs, cls,
+                IoU=IoU, use_07_metric=use_07_metric,
+            )
+            if ap > 0:
+                aps += [ap]
+            print('AP for {} = {:.4f}'.format(cls, ap))
+        print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
+
+    def _do_voc_segm_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
+        aps = []
+        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
+        for i, cls in enumerate(self.classes):
+            if cls == '__background__':
+                continue
+            segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
+            bbox_filename = segm_filename.replace('segmentations', 'detections')
+            ap = voc_segm_eval(
+                bbox_filename, segm_filename, gt_recs, cls,
+                IoU=IoU, use_07_metric=use_07_metric,
+            )
+            if ap > 0:
+                aps += [ap]
+            print('AP for {} = {:.4f}'.format(cls, ap))
+        print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
+
+    ##############################################
+    #                                            #
+    #                    COCO                    #
+    #                                            #
+    ##############################################
+
+    @classmethod
+    def _get_coco_image_id(cls, image_name):
+        image_id = image_name.split('_')[-1].split('.')[0]
+        try:
+            return int(image_id)
+        except:
+            return image_name
+
+    @classmethod
+    def _encode_coco_masks(cls, masks, boxes, im_h, im_w):
+        num_pred = len(boxes)
+        assert len(masks) == num_pred
+        mask_image = np.zeros((im_h, im_w, num_pred), dtype=np.uint8, order='F')
+        M = masks[0].shape[0]
+        scale = (M + 2.0) / M
+        ref_boxes = box_utils.expand_boxes(boxes, scale)
+        ref_boxes = ref_boxes.astype(np.int32)
+        padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
+        for i in range(num_pred):
+            ref_box = ref_boxes[i, :4]
+            mask = masks[i]
+            padded_mask[1:-1, 1:-1] = mask[:, :]
+            w = ref_box[2] - ref_box[0] + 1
+            h = ref_box[3] - ref_box[1] + 1
+            w = np.maximum(w, 1)
+            h = np.maximum(h, 1)
+            mask = cv2.resize(padded_mask, (w, h))
+            mask = np.array(mask > cfg.TEST.BINARY_THRESH, dtype=np.uint8)
+            x1 = max(ref_box[0], 0)
+            y1 = max(ref_box[1], 0)
+            x2 = min(ref_box[2] + 1, im_w)
+            y2 = min(ref_box[3] + 1, im_h)
+            mask_image[y1:y2, x1:x2, i] = \
+                mask[(y1 - ref_box[1]):(y2 - ref_box[1]),
+                     (x1 - ref_box[0]):(x2 - ref_box[0])]
+        return encode_masks(mask_image)
+
+    def _write_coco_bbox_annotations(self, gt_recs, output_dir):
+        # Build images
+        dataset = {'images': []}
+        for image_name, rec in gt_recs.items():
+            dataset['images'].append({
+                'file_name': image_name + '.jpg',
+                'id': self._get_coco_image_id(image_name),
+                'height': rec['height'], 'width': rec['width'],
+            })
+        # Build categories
+        dataset['categories'] = []
+        for cls in self._classes:
+            if cls == '__background__':
+                continue
+            dataset['categories'].append({
+                'name': cls,
+                'id': self._class_to_ind[cls],
+            })
+        # Build annotations
+        dataset['annotations'] = []
+        ann_id = 0
+        for image_name, rec in gt_recs.items():
+            for obj in rec['objects']:
+                x, y = obj['bbox'][0], obj['bbox'][1]
+                w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
+                dataset['annotations'].append({
+                    'id': str(ann_id),
+                    'bbox': [x, y, w, h],
+                    'area': w * h,
+                    'iscrowd': obj['difficult'],
+                    'image_id': self._get_coco_image_id(image_name),
+                    'category_id': self._class_to_ind[obj['name']],
+                })
+                ann_id += 1
+        ann_file = self._get_coco_annotations_T(output_dir, type='bbox')
+        with open(ann_file, 'w') as f:
+            json.dump(dataset, f)
+        return ann_file
+
+    def _write_coco_segm_annotations(self, gt_recs, output_dir):
+        # Build images
+        dataset = {'images': []}
+        for image_name, rec in gt_recs.items():
+            dataset['images'].append({
+                'file_name': image_name + '.jpg',
+                'id': self._get_coco_image_id(image_name),
+                'height': rec['height'], 'width': rec['width'],
+            })
+        # Build categories
+        dataset['categories'] = []
+        for cls in self._classes:
+            if cls == '__background__':
+                continue
+            dataset['categories'].append({
+                'name': cls,
+                'id': self._class_to_ind[cls],
+            })
+        # Build annotations
+        dataset['annotations'] = []
+        ann_id = 0
+        for image_name, rec in gt_recs.items():
+            for obj in rec['objects']:
+                x, y = obj['bbox'][0], obj['bbox'][1]
+                w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
+                dataset['annotations'].append({
+                    'id': str(ann_id),
+                    'bbox': [x, y, w, h],
+                    'area': w * h,
+                    'segmentation': {
+                        'size': [rec['height'], rec['width']],
+                        'counts': obj['mask'],
+                    },
+                    'iscrowd': obj['difficult'],
+                    'image_id': self._get_coco_image_id(image_name),
+                    'category_id': self._class_to_ind[obj['name']],
+                })
+                ann_id += 1
+        ann_file = self._get_coco_annotations_T(output_dir, type='segm')
+        with open(ann_file, 'w') as f:
+            json.dump(dataset, f)
+        return ann_file
+
+    def _coco_bbox_results_one_category(self, boxes, cat_id, gt_recs):
+        ix, results = 0, []
+        for image_name, rec in gt_recs.items():
+            dets = boxes[ix]
+            ix += 1
+            if isinstance(dets, list) and len(dets) == 0:
+                continue
+            dets = dets.astype(np.float)
+            scores = dets[:, -1]
+            xs = dets[:, 0]
+            ys = dets[:, 1]
+            ws = dets[:, 2] - xs + 1
+            hs = dets[:, 3] - ys + 1
+            results.extend(
+                [{'image_id': self._get_coco_image_id(image_name),
+                  'category_id': cat_id,
+                  'bbox': [xs[k], ys[k], ws[k], hs[k]],
+                  'score': scores[k],
+                  } for k in range(dets.shape[0])]
+            )
+        return results
+
+    def _coco_segm_results_one_category(self, boxes, masks, cat_id, gt_recs):
+        def filter_boxes(dets):
+            boxes = dets[:, :4]
+            ws = boxes[:, 2] - boxes[:, 0]
+            hs = boxes[:, 3] - boxes[:, 1]
+            keep = np.where((ws >= 1) & (hs >= 1))[0]
+            return keep
+        results = []
+        ix = 0
+        for image_name, rec in gt_recs.items():
+            dets = boxes[ix].astype(np.float)
+            msks = masks[ix]
+            ix += 1
+            keep = filter_boxes(dets)
+            im_h, im_w = rec['height'], rec['width']
+            if len(keep) == 0:
+                continue
+            scores = dets[:, -1]
+            mask_encode = self._encode_coco_masks(
+                msks[keep], dets[keep, :4], im_h, im_w)
+            for k in range(dets[keep].shape[0]):
+                rle = mask_encode[k]
+                if sys.version_info >= (3, 0):
+                    rle['counts'] = rle['counts'].decode()
+                results.append({
+                    'image_id': self._get_coco_image_id(image_name),
+                    'category_id': cat_id,
+                    'segmentation': rle,
+                    'score': scores[k],
+                })
+        return results
+
+    def _write_coco_bbox_results(self, all_boxes, gt_recs, output_dir):
+        filename = self._get_coco_results_T(output_dir)
+        results = []
+        for cls_ind, cls in enumerate(self.classes):
+            if cls == '__background__':
+                continue
+            print('Collecting {} results ({:d}/{:d})'
+                  .format(cls, cls_ind, self.num_classes - 1))
+            cat_id = self._class_to_cat_id[cls]
+            results.extend(self._coco_bbox_results_one_category(
+                all_boxes[cls_ind], cat_id, gt_recs))
+        print('Writing results json to {}'.format(filename))
+        with open(filename, 'w') as fid:
+            json.dump(results, fid)
+        return filename
+
+    def _write_coco_segm_results(self, all_boxes, all_masks, gt_recs, output_dir):
+        filename = self._get_coco_results_T(output_dir, type='segm')
+        results = []
+        for cls_ind, cls in enumerate(self.classes):
+            if cls == '__background__':
+                continue
+            print('Collecting {} results ({:d}/{:d})'
+                  .format(cls, cls_ind, self.num_classes - 1))
+            cat_id = self._class_to_cat_id[cls]
+            results.extend(self._coco_segm_results_one_category(
+                all_boxes[cls_ind], all_masks[cls_ind], cat_id, gt_recs))
+        print('Writing results json to {}'.format(filename))
+        with open(filename, 'w') as fid:
+            json.dump(results, fid)
+        return filename
+
+    def _do_coco_bbox_eval(self, coco, res_file):
+        from lib.pycocotools.cocoeval import COCOeval
+        coco_dt = coco.loadRes(res_file)
+        coco_eval = COCOeval(coco, coco_dt, 'bbox')
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        self._print_coco_eval_results(coco_eval)
+
+    def _do_coco_segm_eval(self, coco, res_file):
+        from lib.pycocotools.cocoeval import COCOeval
+        coco_dt = coco.loadRes(res_file)
+        coco_eval = COCOeval(coco, coco_dt, 'segm')
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        self._print_coco_eval_results(coco_eval)
+
+    def _print_coco_eval_results(self, coco_eval):
+        IoU_lo_thresh = 0.5
+        IoU_hi_thresh = 0.95
+
+        def _get_thr_ind(coco_eval, thr):
+            ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
+                           (coco_eval.params.iouThrs < thr + 1e-5))[0][0]
+            iou_thr = coco_eval.params.iouThrs[ind]
+            assert np.isclose(iou_thr, thr)
+            return ind
+
+        ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
+        ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)
+
+        # Precision has dims (iou, recall, cls, area range, max dets)
+        # Area range index 0: all area ranges
+        # Max dets index 2: 100 per image
+        precision = \
+            coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
+        ap_default = np.mean(precision[precision > -1])
+        print('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
+              '~~~~'.format(IoU_lo_thresh, IoU_hi_thresh))
+        print('{:.1f}'.format(100 * ap_default))
+        for cls_ind, cls in enumerate(self.classes):
+            if cls == '__background__':
+                continue
+            # Minus 1 because of __background__
+            precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
+            ap = np.mean(precision[precision > -1])
+            print('{:.1f}'.format(100 * ap))
+
+        print('~~~~ Summary metrics ~~~~')
+        coco_eval.summarize()
+
+    ##############################################
+    #                                            #
+    #                    EVAL-API                #
+    #                                            #
+    ##############################################
+
+    def evaluate_detections(self, all_boxes, gt_recs, output_dir):
+        protocol = cfg.TEST.PROTOCOL
+        if 'voc' in protocol:
+            self._write_voc_bbox_results(all_boxes, gt_recs, output_dir)
+            if 'wo' not in protocol:
+                print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
+                self._do_voc_bbox_eval(
+                    gt_recs, output_dir, IoU=0.5,
+                    use_07_metric='2007' in protocol)
+                print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
+                self._do_voc_bbox_eval(
+                    gt_recs, output_dir, IoU=0.7,
+                    use_07_metric='2007' in protocol)
+        elif 'xml' in protocol:
+            if cfg.EXP_DIR != '':
+                output_dir = cfg.EXP_DIR
+            self._write_xml_bbox_results(all_boxes, gt_recs, output_dir)
+        elif 'coco' in protocol:
+            from lib.pycocotools.coco import COCO
+            if os.path.exists(cfg.TEST.JSON_FILE):
+                coco = COCO(cfg.TEST.JSON_FILE)
+                # We should override category id before writing results
+                cats = coco.loadCats(coco.getCatIds())
+                self._class_to_cat_id = dict(zip(
+                    [c['name'] for c in cats], coco.getCatIds()))
+            else:
+                coco = None
+            res_file = self._write_coco_bbox_results(
+                all_boxes, gt_recs, output_dir)
+            if 'wo' not in protocol:
+                if coco is None:
+                    ann_file = self._write_coco_bbox_annotations(gt_recs, output_dir)
+                    coco = COCO(ann_file)
+                self._do_coco_bbox_eval(coco, res_file)
+
+    def evaluate_segmentations(self, all_boxes, all_masks, gt_recs, output_dir):
+        protocol = cfg.TEST.PROTOCOL
+        if 'voc' in protocol:
+            self._write_voc_segm_results(all_boxes, all_masks, output_dir)
+            if 'wo' not in protocol:
+                print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
+                self._do_voc_segm_eval(
+                    gt_recs, output_dir, IoU=0.5,
+                    use_07_metric='2007' in protocol)
+                print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
+                self._do_voc_segm_eval(
+                    gt_recs, output_dir, IoU=0.7,
+                    use_07_metric='2007' in protocol)
+        elif 'coco' in protocol:
+            from lib.pycocotools.coco import COCO
+            if os.path.exists(cfg.TEST.JSON_FILE):
+                coco = COCO(cfg.TEST.JSON_FILE)
+                # We should override category id before writing results
+                cats = coco.loadCats(coco.getCatIds())
+                self._class_to_cat_id = dict(
+                    zip([c['name'] for c in cats], coco.getCatIds()))
+            else:
+                coco = None
+            res_file = self._write_coco_segm_results(all_boxes, all_masks, gt_recs, output_dir)
+            if 'wo' not in protocol:
+                if coco is None:
+                    coco = COCO(self._write_coco_segm_annotations(gt_recs, output_dir))
+                self._do_coco_segm_eval(coco, res_file)
+
+    def competition_mode(self, on):
+        if on:
+            self.config['use_salt'] = False
+            self.config['cleanup'] = False
+        else:
+            self.config['use_salt'] = True
+            self.config['cleanup'] = True
--- a/lib/datasets/voc_eval.py
+++ b/lib/datasets/voc_eval.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# Codes are based on:
-#
-#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import cv2
-import numpy as np
-
-try:
-    import cPickle
-except:
-    import pickle as cPickle
-
-from lib.core.config import cfg
-from lib.pycocotools.mask_utils import mask_rle2im
-from lib.utils.boxes import expand_boxes
-from lib.utils.mask_transform import mask_overlap
-
-
-def voc_ap(rec, prec, use_07_metric=False):
-    """ ap = voc_ap(rec, prec, [use_07_metric])
-    Compute VOC AP given precision and recall.
-    If use_07_metric is true, uses the
-    VOC 07 11 point method (default:False).
-    """
-    if use_07_metric:
-        # 11 point metric
-        ap = 0.
-        for t in np.arange(0., 1.1, 0.1):
-            if np.sum(rec >= t) == 0:
-                p = 0
-            else:
-                p = np.max(prec[rec >= t])
-            ap = ap + p / 11.
-    else:
-        # correct AP calculation
-        # first append sentinel values at the end
-        mrec = np.concatenate(([0.], rec, [1.]))
-        mpre = np.concatenate(([0.], prec, [0.]))
-
-        # compute the precision envelope
-        for i in range(mpre.size - 1, 0, -1):
-            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
-
-        # to calculate area under PR curve, look for points
-        # where X axis (recall) changes value
-        i = np.where(mrec[1:] != mrec[:-1])[0]
-
-        # and sum (\Delta recall) * prec
-        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
-    return ap
-
-
-def voc_bbox_eval(
-    det_file,
-    gt_recs,
-    cls_name,
-    IoU=0.5,
-    use_07_metric=False,
-):
-    class_recs = {}
-    n_pos = 0
-    for image_name, rec in gt_recs.items():
-        R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
-        bbox = np.array([x['bbox'] for x in R])
-        difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
-        det = [False] * len(R)
-        n_pos = n_pos + sum(~difficult)
-        class_recs[image_name] = {
-            'bbox': bbox,
-            'difficult': difficult,
-            'det': det
-        }
-
-    # Read detections
-    with open(det_file, 'r') as f:
-        lines = f.readlines()
-
-    splitlines = [x.strip().split(' ') for x in lines]
-    image_ids = [x[0] for x in splitlines]
-    confidence = np.array([float(x[1]) for x in splitlines])
-    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
-
-    # Avoid IndexError if detecting nothing
-    if len(BB) == 0:
-        return 0, 0, -1
-
-    # Sort by confidence
-    sorted_ind = np.argsort(-confidence)
-    BB = BB[sorted_ind, :]
-    image_ids = [image_ids[x] for x in sorted_ind]
-
-    # Go down detections and mark TPs and FPs
-    nd = len(image_ids)
-    tp, fp = np.zeros(nd), np.zeros(nd)
-    for d in range(nd):
-        R = class_recs[image_ids[d]]
-        bb = BB[d, :].astype(float)
-        ovmax, jmax = -np.inf, 0
-        BBGT = R['bbox'].astype(float)
-
-        if BBGT.size > 0:
-            # Compute overlaps intersection
-            ixmin = np.maximum(BBGT[:, 0], bb[0])
-            iymin = np.maximum(BBGT[:, 1], bb[1])
-            ixmax = np.minimum(BBGT[:, 2], bb[2])
-            iymax = np.minimum(BBGT[:, 3], bb[3])
-            iw = np.maximum(ixmax - ixmin + 1., 0.)
-            ih = np.maximum(iymax - iymin + 1., 0.)
-            inters = iw * ih
-
-            # Union
-            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
-                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *
-                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
-
-            overlaps = inters / uni
-            ovmax = np.max(overlaps)
-            jmax = np.argmax(overlaps)
-
-        if ovmax > IoU:
-            if not R['difficult'][jmax]:
-                if not R['det'][jmax]:
-                    tp[d] = 1.
-                    R['det'][jmax] = 1
-                else:
-                    fp[d] = 1.
-        else:
-            fp[d] = 1.
-
-    # compute precision recall
-    fp = np.cumsum(fp)
-    tp = np.cumsum(tp)
-    rec = tp / float(n_pos)
-    # avoid divide by zero in case the first detection matches a difficult
-    # ground truth
-    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
-    ap = voc_ap(rec, prec, use_07_metric)
-
-    return rec, prec, ap
-
-
-def voc_segm_eval(
-    det_file,
-    seg_file,
-    gt_recs,
-    cls_name,
-    IoU=0.5,
-    use_07_metric=False,
-):
-    # 0. Constants
-    M = cfg.MRCNN.RESOLUTION
-    binary_thresh = cfg.TEST.BINARY_THRESH
-    scale = (M + 2.0) / M
-    padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
-
-    # 1. Get bbox & mask ground truths
-    image_names, class_recs, n_pos = [], {}, 0
-    for image_name, rec in gt_recs.items():
-        R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
-        bbox = np.array([x['bbox'] for x in R])
-        mask = np.array([mask_rle2im([x['mask']], rec['height'], rec['width'])[0] for x in R])
-        difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
-        det = [False] * len(R)
-        n_pos = n_pos + sum(~difficult)
-        class_recs[image_name] = {
-            'bbox': bbox,
-            'mask': mask,
-            'difficult': difficult,
-            'det': det
-        }
-        image_names.append(image_name)
-
-    # 2. Get predict pickle file for this class
-    with open(det_file, 'rb') as f:
-        boxes_pkl = cPickle.load(f)
-    with open(seg_file, 'rb') as f:
-        masks_pkl = cPickle.load(f)
-
-    # 3. Pre-compute number of total instances to allocate memory
-    num_images = len(gt_recs)
-    box_num = 0
-    for im_i in range(num_images):
-        box_num += len(boxes_pkl[im_i])
-
-    # avoid IndexError if detecting nothing
-    if box_num == 0:
-        return 0, 0, -1
-
-    # 4. Re-organize all the predicted boxes
-    new_boxes = np.zeros((box_num, 5))
-    new_masks = np.zeros((box_num, M, M))
-    new_images = []
-    cnt = 0
-    for image_ind in range(num_images):
-        boxes = boxes_pkl[image_ind]
-        masks = masks_pkl[image_ind]
-        num_instance = len(boxes)
-        for box_ind in range(num_instance):
-            new_boxes[cnt] = boxes[box_ind]
-            new_masks[cnt] = masks[box_ind]
-            new_images.append(image_names[image_ind])
-            cnt += 1
-
-    # 5. Rearrange boxes according to their scores
-    seg_scores = new_boxes[:, -1]
-    keep_inds = np.argsort(-seg_scores)
-    new_boxes = new_boxes[keep_inds, :]
-    new_masks = new_masks[keep_inds, :, :]
-    num_pred = new_boxes.shape[0]
-
-    # 6. Calculate t/f positive
-    fp = np.zeros((num_pred, 1))
-    tp = np.zeros((num_pred, 1))
-
-    ref_boxes = expand_boxes(new_boxes, scale)
-    ref_boxes = ref_boxes.astype(np.int32)
-
-    for i in range(num_pred):
-        image_name = new_images[keep_inds[i]]
-        if image_name not in class_recs:
-            print('Warning: {} does not exist in the ground-truths.'.format(image_name))
-            fp[i] = 1
-            continue
-        R = class_recs[image_name]
-        im_h = gt_recs[image_name]['height']
-        im_w = gt_recs[image_name]['width']
-
-        # Decode mask
-        ref_box = ref_boxes[i, :4]
-        mask = new_masks[i]
-        padded_mask[1:-1, 1:-1] = mask[:, :]
-        w = ref_box[2] - ref_box[0] + 1
-        h = ref_box[3] - ref_box[1] + 1
-        w = np.maximum(w, 1)
-        h = np.maximum(h, 1)
-        mask = cv2.resize(padded_mask, (w, h))
-        mask = np.array(mask > binary_thresh, dtype=np.uint8)
-        x1 = max(ref_box[0], 0)
-        y1 = max(ref_box[1], 0)
-        x2 = min(ref_box[2] + 1, im_w)
-        y2 = min(ref_box[3] + 1, im_h)
-        pred_mask = mask[(y1 - ref_box[1]): (y2 - ref_box[1]),
-                         (x1 - ref_box[0]): (x2 - ref_box[0])]
-
-        # Calculate max region overlap
-        ovmax, jmax = -1, -1
-
-        for j in range(len(R['det'])):
-            gt_mask_bound = R['bbox'][j].astype(int)
-            pred_mask_bound = new_boxes[i, :4].astype(int)
-            crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1,
-                                     gt_mask_bound[0]:gt_mask_bound[2] + 1]
-
-            ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask)
-
-            if ov > ovmax:
-                ovmax = ov
-                jmax = j
-
-        if ovmax > IoU:
-            if not R['difficult'][jmax]:
-                if not R['det'][jmax]:
-                    tp[i] = 1.
-                    R['det'][jmax] = 1
-                else:
-                    fp[i] = 1.
-        else:
-            fp[i] = 1
-
-    # 7. Calculate precision
-    fp = np.cumsum(fp)
-    tp = np.cumsum(tp)
-    rec = tp / float(n_pos)
-    # avoid divide by zero in case the first matches a difficult gt
-    prec = tp / np.maximum(fp + tp, np.finfo(np.float64).eps)
-    ap = voc_ap(rec, prec, use_07_metric=use_07_metric)
-    return ap
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import cv2
+import numpy as np
+
+try:
+    import cPickle
+except:
+    import pickle as cPickle
+
+from lib.core.config import cfg
+from lib.pycocotools.mask_utils import mask_rle2im
+from lib.utils.boxes import expand_boxes
+from lib.utils.mask import mask_overlap
+
+
+def voc_ap(rec, prec, use_07_metric=False):
+    """ ap = voc_ap(rec, prec, [use_07_metric])
+    Compute VOC AP given precision and recall.
+    If use_07_metric is true, uses the
+    VOC 07 11 point method (default:False).
+    """
+    if use_07_metric:
+        # 11 point metric
+        ap = 0.
+        for t in np.arange(0., 1.1, 0.1):
+            if np.sum(rec >= t) == 0:
+                p = 0
+            else:
+                p = np.max(prec[rec >= t])
+            ap = ap + p / 11.
+    else:
+        # correct AP calculation
+        # first append sentinel values at the end
+        mrec = np.concatenate(([0.], rec, [1.]))
+        mpre = np.concatenate(([0.], prec, [0.]))
+
+        # compute the precision envelope
+        for i in range(mpre.size - 1, 0, -1):
+            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+        # to calculate area under PR curve, look for points
+        # where X axis (recall) changes value
+        i = np.where(mrec[1:] != mrec[:-1])[0]
+
+        # and sum (\Delta recall) * prec
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+
+
+def voc_bbox_eval(
+    det_file,
+    gt_recs,
+    cls_name,
+    IoU=0.5,
+    use_07_metric=False,
+):
+    class_recs = {}
+    n_pos = 0
+    for image_name, rec in gt_recs.items():
+        R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
+        bbox = np.array([x['bbox'] for x in R])
+        difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
+        det = [False] * len(R)
+        n_pos = n_pos + sum(~difficult)
+        class_recs[image_name] = {
+            'bbox': bbox,
+            'difficult': difficult,
+            'det': det
+        }
+
+    # Read detections
+    with open(det_file, 'r') as f:
+        lines = f.readlines()
+
+    splitlines = [x.strip().split(' ') for x in lines]
+    image_ids = [x[0] for x in splitlines]
+    confidence = np.array([float(x[1]) for x in splitlines])
+    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
+
+    # Avoid IndexError if detecting nothing
+    if len(BB) == 0:
+        return 0, 0, -1
+
+    # Sort by confidence
+    sorted_ind = np.argsort(-confidence)
+    BB = BB[sorted_ind, :]
+    image_ids = [image_ids[x] for x in sorted_ind]
+
+    # Go down detections and mark TPs and FPs
+    nd = len(image_ids)
+    tp, fp = np.zeros(nd), np.zeros(nd)
+    for d in range(nd):
+        R = class_recs[image_ids[d]]
+        bb = BB[d, :].astype(float)
+        ovmax, jmax = -np.inf, 0
+        BBGT = R['bbox'].astype(float)
+
+        if BBGT.size > 0:
+            # Compute overlaps intersection
+            ixmin = np.maximum(BBGT[:, 0], bb[0])
+            iymin = np.maximum(BBGT[:, 1], bb[1])
+            ixmax = np.minimum(BBGT[:, 2], bb[2])
+            iymax = np.minimum(BBGT[:, 3], bb[3])
+            iw = np.maximum(ixmax - ixmin + 1., 0.)
+            ih = np.maximum(iymax - iymin + 1., 0.)
+            inters = iw * ih
+
+            # Union
+            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
+                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *
+                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
+
+            overlaps = inters / uni
+            ovmax = np.max(overlaps)
+            jmax = np.argmax(overlaps)
+
+        if ovmax > IoU:
+            if not R['difficult'][jmax]:
+                if not R['det'][jmax]:
+                    tp[d] = 1.
+                    R['det'][jmax] = 1
+                else:
+                    fp[d] = 1.
+        else:
+            fp[d] = 1.
+
+    # compute precision recall
+    fp = np.cumsum(fp)
+    tp = np.cumsum(tp)
+    rec = tp / float(n_pos)
+    # avoid divide by zero in case the first detection matches a difficult
+    # ground truth
+    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
+    ap = voc_ap(rec, prec, use_07_metric)
+
+    return rec, prec, ap
+
+
+def voc_segm_eval(
+    det_file,
+    seg_file,
+    gt_recs,
+    cls_name,
+    IoU=0.5,
+    use_07_metric=False,
+):
+    # 0. Constants
+    M = cfg.MRCNN.RESOLUTION
+    binary_thresh = cfg.TEST.BINARY_THRESH
+    scale = (M + 2.0) / M
+    padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
+
+    # 1. Get bbox & mask ground truths
+    image_names, class_recs, n_pos = [], {}, 0
+    for image_name, rec in gt_recs.items():
+        R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
+        bbox = np.array([x['bbox'] for x in R])
+        mask = np.array([mask_rle2im([x['mask']], rec['height'], rec['width'])[0] for x in R])
+        difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
+        det = [False] * len(R)
+        n_pos = n_pos + sum(~difficult)
+        class_recs[image_name] = {
+            'bbox': bbox,
+            'mask': mask,
+            'difficult': difficult,
+            'det': det
+        }
+        image_names.append(image_name)
+
+    # 2. Get predict pickle file for this class
+    with open(det_file, 'rb') as f:
+        boxes_pkl = cPickle.load(f)
+    with open(seg_file, 'rb') as f:
+        masks_pkl = cPickle.load(f)
+
+    # 3. Pre-compute number of total instances to allocate memory
+    num_images = len(gt_recs)
+    box_num = 0
+    for im_i in range(num_images):
+        box_num += len(boxes_pkl[im_i])
+
+    # avoid IndexError if detecting nothing
+    if box_num == 0:
+        return 0, 0, -1
+
+    # 4. Re-organize all the predicted boxes
+    new_boxes = np.zeros((box_num, 5))
+    new_masks = np.zeros((box_num, M, M))
+    new_images = []
+    cnt = 0
+    for image_ind in range(num_images):
+        boxes = boxes_pkl[image_ind]
+        masks = masks_pkl[image_ind]
+        num_instance = len(boxes)
+        for box_ind in range(num_instance):
+            new_boxes[cnt] = boxes[box_ind]
+            new_masks[cnt] = masks[box_ind]
+            new_images.append(image_names[image_ind])
+            cnt += 1
+
+    # 5. Rearrange boxes according to their scores
+    seg_scores = new_boxes[:, -1]
+    keep_inds = np.argsort(-seg_scores)
+    new_boxes = new_boxes[keep_inds, :]
+    new_masks = new_masks[keep_inds, :, :]
+    num_pred = new_boxes.shape[0]
+
+    # 6. Calculate t/f positive
+    fp = np.zeros((num_pred, 1))
+    tp = np.zeros((num_pred, 1))
+
+    ref_boxes = expand_boxes(new_boxes, scale)
+    ref_boxes = ref_boxes.astype(np.int32)
+
+    for i in range(num_pred):
+        image_name = new_images[keep_inds[i]]
+        if image_name not in class_recs:
+            print('Warning: {} does not exist in the ground-truths.'.format(image_name))
+            fp[i] = 1
+            continue
+        R = class_recs[image_name]
+        im_h = gt_recs[image_name]['height']
+        im_w = gt_recs[image_name]['width']
+
+        # Decode mask
+        ref_box = ref_boxes[i, :4]
+        mask = new_masks[i]
+        padded_mask[1:-1, 1:-1] = mask[:, :]
+        w = ref_box[2] - ref_box[0] + 1
+        h = ref_box[3] - ref_box[1] + 1
+        w = np.maximum(w, 1)
+        h = np.maximum(h, 1)
+        mask = cv2.resize(padded_mask, (w, h))
+        mask = np.array(mask > binary_thresh, dtype=np.uint8)
+        x1 = max(ref_box[0], 0)
+        y1 = max(ref_box[1], 0)
+        x2 = min(ref_box[2] + 1, im_w)
+        y2 = min(ref_box[3] + 1, im_h)
+        pred_mask = mask[(y1 - ref_box[1]): (y2 - ref_box[1]),
+                         (x1 - ref_box[0]): (x2 - ref_box[0])]
+
+        # Calculate max region overlap
+        ovmax, jmax = -1, -1
+
+        for j in range(len(R['det'])):
+            gt_mask_bound = R['bbox'][j].astype(int)
+            pred_mask_bound = new_boxes[i, :4].astype(int)
+            crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1,
+                                     gt_mask_bound[0]:gt_mask_bound[2] + 1]
+
+            ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask)
+
+            if ov > ovmax:
+                ovmax = ov
+                jmax = j
+
+        if ovmax > IoU:
+            if not R['difficult'][jmax]:
+                if not R['det'][jmax]:
+                    tp[i] = 1.
+                    R['det'][jmax] = 1
+                else:
+                    fp[i] = 1.
+        else:
+            fp[i] = 1
+
+    # 7. Calculate precision
+    fp = np.cumsum(fp)
+    tp = np.cumsum(tp)
+    rec = tp / float(n_pos)
+    # avoid divide by zero in case the first matches a difficult gt
+    prec = tp / np.maximum(fp + tp, np.finfo(np.float64).eps)
+    ap = voc_ap(rec, prec, use_07_metric=use_07_metric)
+    return ap
--- a/lib/faster_rcnn/__init__.py
+++ b/lib/faster_rcnn/__init__.py
@@ -13,7 +13,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer
-from lib.faster_rcnn.layers.data_layer import DataLayer
-from lib.faster_rcnn.layers.proposal_layer import ProposalLayer
-from lib.faster_rcnn.layers.proposal_target_layer import ProposalTargetLayer
+from lib.faster_rcnn.anchor_target_layer import AnchorTargetLayer
+from lib.faster_rcnn.data_layer import DataLayer
+from lib.faster_rcnn.proposal_layer import ProposalLayer
+from lib.faster_rcnn.proposal_target_layer import ProposalTargetLayer
--- a/lib/faster_rcnn/layers/anchor_target_layer.py
+++ b/lib/faster_rcnn/layers/anchor_target_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import numpy.random as npr
-import dragon.vm.torch as torch
-
-from lib.core.config import cfg
-from lib.utils import logger
-from lib.utils.blob import blob_to_tensor
-from lib.utils.boxes import bbox_transform
-from lib.utils.boxes import dismantle_gt_boxes
-from lib.utils.cython_bbox import bbox_overlaps
-from lib.faster_rcnn.generate_anchors import generate_anchors
-
-
-class AnchorTargetLayer(torch.nn.Module):
-    """Assign anchors to ground-truth targets."""
-
-    def __init__(self):
-        super(AnchorTargetLayer, self).__init__()
-        # Load the basic configs
-        # C4 backbone takes the first stride
-        self.scales = cfg.RPN.SCALES
-        self.stride = cfg.RPN.STRIDES[0]
-        self.ratios = cfg.RPN.ASPECT_RATIOS
-
-        # Allow boxes to sit over the edge by a small amount
-        self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
-
-        # Generate base anchors
-        self.base_anchors = generate_anchors(
-            base_size=self.stride,
-            ratios=self.ratios,
-            scales=np.array(self.scales),
-        )
-
-    def forward(self, features, gt_boxes, ims_info):
-        """Produces anchor classification labels and bounding-box regression targets.
-
-        Parameters
-        ----------
-        features : sequence of dragon.vm.torch.Tensor
-            The features of specific conv layers.
-        gt_boxes : numpy.ndarray
-            The packed ground-truth boxes.
-        ims_info : numpy.ndarray
-            The information of input images.
-
-        """
-        num_images = cfg.TRAIN.IMS_PER_BATCH
-        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
-
-        if len(gt_boxes_wide) != num_images:
-            logger.fatal(
-                'Input {} images, got {} slices of gt boxes.'
-                .format(num_images, len(gt_boxes_wide))
-            )
-
-        # Generate proposals from shifted anchors
-        height, width = features[0].shape[-2:]
-        shift_x = np.arange(0, width) * self.stride
-        shift_y = np.arange(0, height) * self.stride
-        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
-        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
-                            shift_x.ravel(), shift_y.ravel())).transpose()
-        # Add A anchors (1, A, 4) to
-        # cell K shifts (K, 1, 4) to get
-        # shift anchors (K, A, 4)
-        # Reshape to (K * A, 4) shifted anchors
-        A = self.base_anchors.shape[0]
-        K = shifts.shape[0]
-        all_anchors = (self.base_anchors.reshape((1, A, 4)) +
-                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
-        all_anchors = all_anchors.reshape((K * A, 4))
-        total_anchors = int(K * A)
-
-        # label: 1 is positive, 0 is negative, -1 is don not care
-        all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32)
-        all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
-        all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
-        all_bbox_outside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
-
-        for ix in range(num_images):
-            # GT boxes (x1, y1, x2, y2, label)
-            gt_boxes = gt_boxes_wide[ix]
-            im_info = ims_info[ix]
-
-            if self._allowed_border >= 0:
-                # Only keep anchors inside the image
-                inds_inside = np.where(
-                    (all_anchors[:, 0] >= -self._allowed_border) &
-                    (all_anchors[:, 1] >= -self._allowed_border) &
-                    (all_anchors[:, 2] < im_info[1] + self._allowed_border) &
-                    (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
-                anchors = all_anchors[inds_inside, :]
-            else:
-                inds_inside = np.arange(all_anchors.shape[0])
-                anchors = all_anchors
-            num_inside = len(inds_inside)
-
-            # label: 1 is positive, 0 is negative, -1 is don't care
-            labels = np.empty((num_inside,), dtype=np.float32)
-            labels.fill(-1)
-
-            # Overlaps between the anchors and the gt boxes
-            overlaps = bbox_overlaps(
-                np.ascontiguousarray(anchors, dtype=np.float),
-                np.ascontiguousarray(gt_boxes, dtype=np.float),
-            )
-            argmax_overlaps = overlaps.argmax(axis=1)
-            max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
-            gt_argmax_overlaps = overlaps.argmax(axis=0)
-            gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
-            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
-
-            if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
-                # Assign bg labels first so that positive labels can clobber them
-                labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
-
-            # fg label: for each gt, anchor with highest overlap
-            labels[gt_argmax_overlaps] = 1
-
-            # fg label: above threshold IOU
-            labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
-
-            if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
-                # Assign bg labels last so that negative labels can clobber positives
-                labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
-
-            # Subsample positive labels if we have too many
-            num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
-            fg_inds = np.where(labels == 1)[0]
-            if len(fg_inds) > num_fg:
-                disable_inds = npr.choice(
-                    fg_inds,
-                    size=len(fg_inds) - num_fg,
-                    replace=False,
-                )
-                labels[disable_inds] = -1
-                fg_inds = np.where(labels == 1)[0]
-
-            # Subsample negative labels if we have too many
-            num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
-            bg_inds = np.where(labels == 0)[0]
-            if len(bg_inds) > num_bg:
-                disable_inds = npr.choice(
-                    bg_inds,
-                    size=len(bg_inds) - num_bg,
-                    replace=False,
-                )
-                labels[disable_inds] = -1
-
-            bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_targets[fg_inds, :] = bbox_transform(
-                ex_rois=anchors[fg_inds, :],
-                gt_rois=gt_boxes[argmax_overlaps[fg_inds], 0:4],
-            )
-            bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
-            bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
-            bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
-
-            all_labels[ix, inds_inside] = labels  # label
-            all_bbox_targets[ix, inds_inside] = bbox_targets
-            all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights
-            all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights
-
-        labels = all_labels \
-            .reshape((num_images, height, width, A)) \
-            .transpose(0, 3, 1, 2) \
-            .reshape((num_images, total_anchors))
-
-        bbox_targets = all_bbox_targets \
-            .reshape((num_images, height, width, A * 4)) \
-            .transpose(0, 3, 1, 2)
-
-        bbox_inside_weights = all_bbox_inside_weights \
-            .reshape((num_images, height, width, A * 4)) \
-            .transpose(0, 3, 1, 2)
-
-        bbox_outside_weights = all_bbox_outside_weights \
-            .reshape((num_images, height, width, A * 4)) \
-            .transpose(0, 3, 1, 2)
-
-        return {
-            'labels': blob_to_tensor(labels),
-            'bbox_targets': blob_to_tensor(bbox_targets),
-            'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
-            'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
-        }
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import numpy.random as npr
+import dragon.vm.torch as torch
+
+from lib.core.config import cfg
+from lib.utils import logger
+from lib.utils.blob import blob_to_tensor
+from lib.utils.boxes import bbox_transform
+from lib.utils.boxes import dismantle_gt_boxes
+from lib.utils.cython_bbox import bbox_overlaps
+from lib.faster_rcnn.generate_anchors import generate_anchors
+
+
+class AnchorTargetLayer(torch.nn.Module):
+    """Assign anchors to ground-truth targets."""
+
+    def __init__(self):
+        super(AnchorTargetLayer, self).__init__()
+        # Load the basic configs
+        # C4 backbone takes the first stride
+        self.scales = cfg.RPN.SCALES
+        self.stride = cfg.RPN.STRIDES[0]
+        self.ratios = cfg.RPN.ASPECT_RATIOS
+
+        # Allow boxes to sit over the edge by a small amount
+        self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
+
+        # Generate base anchors
+        self.base_anchors = generate_anchors(
+            base_size=self.stride,
+            ratios=self.ratios,
+            scales=np.array(self.scales),
+        )
+
+    def forward(self, features, gt_boxes, ims_info):
+        """Produces anchor classification labels and bounding-box regression targets.
+
+        Parameters
+        ----------
+        features : sequence of dragon.vm.torch.Tensor
+            The features of specific conv layers.
+        gt_boxes : numpy.ndarray
+            The packed ground-truth boxes.
+        ims_info : numpy.ndarray
+            The information of input images.
+
+        """
+        num_images = cfg.TRAIN.IMS_PER_BATCH
+        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
+
+        if len(gt_boxes_wide) != num_images:
+            logger.fatal(
+                'Input {} images, got {} slices of gt boxes.'
+                .format(num_images, len(gt_boxes_wide))
+            )
+
+        # Generate proposals from shifted anchors
+        height, width = features[0].shape[-2:]
+        shift_x = np.arange(0, width) * self.stride
+        shift_y = np.arange(0, height) * self.stride
+        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                            shift_x.ravel(), shift_y.ravel())).transpose()
+        # Add A anchors (1, A, 4) to
+        # cell K shifts (K, 1, 4) to get
+        # shift anchors (K, A, 4)
+        # Reshape to (K * A, 4) shifted anchors
+        A = self.base_anchors.shape[0]
+        K = shifts.shape[0]
+        all_anchors = (self.base_anchors.reshape((1, A, 4)) +
+                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
+        all_anchors = all_anchors.reshape((K * A, 4))
+        total_anchors = int(K * A)
+
+        # label: 1 is positive, 0 is negative, -1 is don not care
+        all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32)
+        all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
+        all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
+        all_bbox_outside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
+
+        for ix in range(num_images):
+            # GT boxes (x1, y1, x2, y2, label)
+            gt_boxes = gt_boxes_wide[ix]
+            im_info = ims_info[ix]
+
+            if self._allowed_border >= 0:
+                # Only keep anchors inside the image
+                inds_inside = np.where(
+                    (all_anchors[:, 0] >= -self._allowed_border) &
+                    (all_anchors[:, 1] >= -self._allowed_border) &
+                    (all_anchors[:, 2] < im_info[1] + self._allowed_border) &
+                    (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
+                anchors = all_anchors[inds_inside, :]
+            else:
+                inds_inside = np.arange(all_anchors.shape[0])
+                anchors = all_anchors
+            num_inside = len(inds_inside)
+
+            # label: 1 is positive, 0 is negative, -1 is don't care
+            labels = np.empty((num_inside,), dtype=np.float32)
+            labels.fill(-1)
+
+            # Overlaps between the anchors and the gt boxes
+            overlaps = bbox_overlaps(
+                np.ascontiguousarray(anchors, dtype=np.float),
+                np.ascontiguousarray(gt_boxes, dtype=np.float),
+            )
+            argmax_overlaps = overlaps.argmax(axis=1)
+            max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
+            gt_argmax_overlaps = overlaps.argmax(axis=0)
+            gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
+            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
+
+            if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
+                # Assign bg labels first so that positive labels can clobber them
+                labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
+
+            # fg label: for each gt, anchor with highest overlap
+            labels[gt_argmax_overlaps] = 1
+
+            # fg label: above threshold IOU
+            labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
+
+            if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
+                # Assign bg labels last so that negative labels can clobber positives
+                labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
+
+            # Subsample positive labels if we have too many
+            num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
+            fg_inds = np.where(labels == 1)[0]
+            if len(fg_inds) > num_fg:
+                disable_inds = npr.choice(
+                    fg_inds,
+                    size=len(fg_inds) - num_fg,
+                    replace=False,
+                )
+                labels[disable_inds] = -1
+                fg_inds = np.where(labels == 1)[0]
+
+            # Subsample negative labels if we have too many
+            num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
+            bg_inds = np.where(labels == 0)[0]
+            if len(bg_inds) > num_bg:
+                disable_inds = npr.choice(
+                    bg_inds,
+                    size=len(bg_inds) - num_bg,
+                    replace=False,
+                )
+                labels[disable_inds] = -1
+
+            bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_targets[fg_inds, :] = bbox_transform(
+                ex_rois=anchors[fg_inds, :],
+                gt_rois=gt_boxes[argmax_overlaps[fg_inds], :4],
+            )
+            bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
+            bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
+            bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
+
+            all_labels[ix, inds_inside] = labels  # label
+            all_bbox_targets[ix, inds_inside] = bbox_targets
+            all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights
+            all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights
+
+        labels = all_labels \
+            .reshape((num_images, height, width, A)) \
+            .transpose(0, 3, 1, 2) \
+            .reshape((num_images, total_anchors))
+
+        bbox_targets = all_bbox_targets \
+            .reshape((num_images, height, width, A * 4)) \
+            .transpose(0, 3, 1, 2)
+
+        bbox_inside_weights = all_bbox_inside_weights \
+            .reshape((num_images, height, width, A * 4)) \
+            .transpose(0, 3, 1, 2)
+
+        bbox_outside_weights = all_bbox_outside_weights \
+            .reshape((num_images, height, width, A * 4)) \
+            .transpose(0, 3, 1, 2)
+
+        return {
+            'labels': blob_to_tensor(labels),
+            'bbox_targets': blob_to_tensor(bbox_targets),
+            'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
+            'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
+        }
--- a/lib/faster_rcnn/data/__init__.py
+++ b/lib/faster_rcnn/data/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
--- a/lib/faster_rcnn/data/blob_fetcher.py
+++ b/lib/faster_rcnn/data/blob_fetcher.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import multiprocessing
-import numpy as np
-
-from lib.core.config import cfg
-from lib.utils.blob import im_list_to_blob
-
-
-class BlobFetcher(multiprocessing.Process):
-    def __init__(self, **kwargs):
-        super(BlobFetcher, self).__init__()
-        self.q1_in = self.q2_in = self.q_out = None
-        self.daemon = True
-
-    def get(self, Q_in):
-        processed_ims, ims_info, all_boxes = [], [], []
-        for ix in range(cfg.TRAIN.IMS_PER_BATCH):
-            im, im_scale, gt_boxes = Q_in.get()
-            processed_ims.append(im)
-            ims_info.append(list(im.shape[0:2]) + [im_scale])
-            # Encode boxes by adding the idx of images
-            im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), dtype=np.float32)
-            im_boxes[:, 0:gt_boxes.shape[1]] = gt_boxes
-            im_boxes[:, -1] = ix
-            all_boxes.append(im_boxes)
-
-        return {
-            'data': im_list_to_blob(processed_ims),
-            'ims_info': np.array(ims_info, dtype=np.float32),
-            'gt_boxes': np.concatenate(all_boxes, axis=0),
-        }
-
-    def run(self):
-        while True:
-            if self.q1_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
-                self.q_out.put(self.get(self.q1_in))
-            elif self.q2_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
-                self.q_out.put(self.get(self.q2_in))
--- a/lib/faster_rcnn/data/data_reader.py
+++ b/lib/faster_rcnn/data/data_reader.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import multiprocessing
-import numpy
-
-from dragon.tools import db
-from lib.core.config import cfg
-
-
-class DataReader(multiprocessing.Process):
-    """Collect encoded str from `LMDB`_.
-
-    Partition and shuffle records over distributed nodes.
-
-    Parameters
-    ----------
-    source : str
-        The path of database.
-    shuffle : bool, optional, default=False
-        Whether to shuffle the data.
-    num_chunks : int, optional, default=2048
-        The number of chunks to split.
-
-    """
-
-    def __init__(self, **kwargs):
-        """Create a DataReader."""
-        super(DataReader, self).__init__()
-        self._source = kwargs.get('source', '')
-        self._use_shuffle = kwargs.get('shuffle', False)
-        self._num_chunks = kwargs.get('num_chunks', 2048)
-        self._part_idx, self._num_parts = 0, 1
-        self._cursor, self._chunk_cursor = 0, 0
-        self._chunk_size, self._perm_size = 0, 0
-        self._head, self._tail, self._num_entries = 0, 0, 0
-        self._db, self._zfill, self._perm = None, None, None
-        self._rng_seed = cfg.RNG_SEED
-        self.q_out = None
-        self.daemon = True
-
-    def element(self):
-        """Get the value of current record.
-
-        Returns
-        -------
-        str
-            The encoded str.
-
-        """
-        return self._db.value()
-
-    def redirect(self, target):
-        """Redirect to the target position.
-
-        Parameters
-        ----------
-        target : int
-            The key of the record.
-
-        Notes
-        -----
-        The redirection reopens the database.
-
-        You can drop caches by ``echo 3 > /proc/sys/vm/drop_caches``.
-
-        This will disturb getting stuck when *Database Size* >> *RAM Size*.
-
-        """
-        self._db.close()
-        self._db.open(self._source)
-        self._cursor = target
-        self._db.set(str(target).zfill(self._zfill))
-
-    def reset(self):
-        """Reset the cursor and environment."""
-        if self._num_parts > 1 or self._use_shuffle:
-            self._chunk_cursor = 0
-            self._part_idx = (self._part_idx + 1) % self._num_parts
-            if self._use_shuffle:
-                self._perm = numpy.random.permutation(self._perm_size)
-            self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
-            self._tail = self._head * self._chunk_size
-            if self._head >= self._num_entries: self.next_chunk()
-            self._tail = self._head + self._chunk_size
-            self._tail = min(self._num_entries, self._tail)
-        else:
-            self._head, self._tail = 0, self._num_entries
-        self.redirect(self._head)
-
-    def next_record(self):
-        """Step the cursor of records."""
-        self._db.next()
-        self._cursor += 1
-
-    def next_chunk(self):
-        """Step the cursor of chunks."""
-        self._chunk_cursor += 1
-        if self._chunk_cursor >= self._perm_size:
-            self.reset()
-        else:
-            self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
-            self._head = self._head * self._chunk_size
-            if self._head >= self._num_entries:
-                self.next_chunk()
-            else:
-                self._tail = self._head + self._chunk_size
-                self._tail = min(self._num_entries, self._tail)
-            self.redirect(self._head)
-
-    def run(self):
-        """Start the process."""
-        # Fix seed
-        numpy.random.seed(self._rng_seed)
-
-        # Init db
-        self._db = db.LMDB()
-        self._db.open(self._source)
-        self._zfill = self._db.zfill()
-        self._num_entries = self._db.num_entries()
-
-        epoch_size = self._num_entries // self._num_parts + 1
-
-        if self._use_shuffle:
-            if self._num_chunks <= 0:
-                # Each chunk has at most 1 record (Record-Wise)
-                self._chunk_size, self._perm_size = 1, epoch_size
-            else:
-                # Search a optimal chunk size (Chunk-Wise)
-                min_size, max_size = \
-                    1, self._db._total_size * 1.0 \
-                    / (self._num_chunks * (1 << 20))
-                while min_size * 2 < max_size: min_size *= 2
-                self._perm_size = int(math.ceil(
-                    self._db._total_size * 1.1 /
-                    (self._num_parts * min_size << 20)))
-                self._chunk_size = int(
-                    self._num_entries * 1.0 /
-                    (self._perm_size * self._num_parts) + 1)
-                limit = (self._num_parts - 0.5) * self._perm_size * self._chunk_size
-                if self._num_entries <= limit:
-                    # Roll back to Record-Wise shuffle
-                    self._chunk_size, self._perm_size = 1, epoch_size
-        else:
-            # One chunk has at most K records
-            self._chunk_size, self._perm_size = epoch_size, 1
-
-        self._perm = numpy.arange(self._perm_size)
-
-        # Init env
-        self.reset()
-
-        # Run!
-        while True:
-            self.q_out.put(self.element())
-            self.next_record()
-            if self._cursor >= self._tail:
-                if self._num_parts > 1 or self._use_shuffle:
-                    self.next_chunk()
-                else:
-                    self.reset()
--- a/lib/faster_rcnn/data/data_batch.py
+++ b/lib/faster_rcnn/data/data_batch.py
@@ -13,55 +13,70 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from multiprocessing import Queue
+import multiprocessing as mp
 import time

 import dragon
-import pprint
+import dragon.vm.torch as torch
+import numpy as np

 from lib.core.config import cfg
-from lib.faster_rcnn.data.data_reader import DataReader
-from lib.faster_rcnn.data.data_transformer import DataTransformer
-from lib.faster_rcnn.data.blob_fetcher import BlobFetcher
+from lib.faster_rcnn.data_transformer import DataTransformer
+from lib.datasets.factory import get_imdb
 from lib.utils import logger
+from lib.utils.blob import im_list_to_blob


-class DataBatch(object):
-    """DataBatch aims to prefetch data by ``Triple-Buffering``.
+class DataLayer(torch.nn.Module):
+    """Generate a mini-batch of data."""

-    It takes full advantages of the Process/Thread of Python,
+    def __init__(self):
+        super(DataLayer, self).__init__()
+        database = get_imdb(cfg.TRAIN.DATABASE)
+        self.data_batch = DataBatch(**{
+            'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
+            'classes': database.classes,
+            'shuffle': cfg.TRAIN.USE_SHUFFLE,
+            'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
+            'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
+        })
+
+    def forward(self):
+        # Get an array blob from the Queue
+        outputs = self.data_batch.get()
+        # Zero-Copy the array to tensor
+        outputs['data'] = torch.from_numpy(outputs['data'])
+        return outputs
+
+
+class DataBatch(mp.Process):
+    """Prefetch the batch of data."""

-    which provides remarkable I/O speed up for scalable distributed training.
-    
-    """
    def __init__(self, **kwargs):
        """Construct a ``DataBatch``.

        Parameters
        ----------
-        source : str
-            The path of database.
+        dataset : lambda
+            The creator of a dataset.
        shuffle : bool, optional, default=False
            Whether to shuffle the data.
-        num_chunks : int, optional, default=2048
+        num_chunks : int, optional, default=0
            The number of chunks to split.
-        batch_size : int, optional, default=128
+        batch_size : int, optional, default=2
            The size of a mini-batch.
        prefetch : int, optional, default=5
            The prefetch count.

        """
        super(DataBatch, self).__init__()
-        # Init mpi
-        global_rank, local_rank, group_size = 0, 0, 1
-        if dragon.mpi.is_init():
-            group = dragon.mpi.is_parallel()
-            if group is not None:  # DataParallel
-                global_rank = dragon.mpi.rank()
-                group_size = len(group)
-                for i, node in enumerate(group):
-                    if global_rank == node:
-                        local_rank = i
+        # Distributed settings
+        rank, group_size = 0, 1
+        process_group = dragon.distributed.get_default_process_group()
+        if process_group is not None and kwargs.get(
+                'phase', 'TRAIN') == 'TRAIN':
+            group_size = process_group.size
+            rank = dragon.distributed.get_rank(process_group)
        kwargs['group_size'] = group_size

        # Configuration
@@ -71,6 +86,7 @@ class DataBatch(object):
        self._num_transformers = kwargs.get('num_transformers', -1)
        self._max_transformers = kwargs.get('max_transformers', 3)
        self._num_fetchers = kwargs.get('num_fetchers', 1)
+        self.daemon = True

        # Io-Aware Policy
        if self._num_transformers == -1:
@@ -81,66 +97,52 @@ class DataBatch(object):
        self._num_transformers = min(
            self._num_transformers, self._max_transformers)

-        # Init queues
-        self.Q1 = Queue(self._prefetch * self._num_readers * self._batch_size)
-        self.Q21 = Queue(self._prefetch * self._num_readers * self._batch_size)
-        self.Q22 = Queue(self._prefetch * self._num_readers * self._batch_size)
-        self.Q3 = Queue(self._prefetch * self._num_readers)
+        # Initialize queues
+        num_batches = self._prefetch * self._num_readers
+        self.Q1 = mp.Queue(num_batches * self._batch_size)
+        self.Q21 = mp.Queue(num_batches * self._batch_size)
+        self.Q22 = mp.Queue(num_batches * self._batch_size)
+        self.Q3 = mp.Queue(num_batches)

-        # Init readers
+        # Initialize readers
        self._readers = []
        for i in range(self._num_readers):
-            self._readers.append(DataReader(**kwargs))
-            self._readers[-1].q_out = self.Q1
-
-        for i in range(self._num_readers):
            part_idx, num_parts = i, self._num_readers
            num_parts *= group_size
-            part_idx += local_rank * self._num_readers
-            self._readers[i]._num_parts = num_parts
-            self._readers[i]._part_idx = part_idx
-            self._readers[i]._rng_seed += part_idx
+            part_idx += rank * self._num_readers
+            self._readers.append(dragon.io.DataReader(
+                num_parts=num_parts, part_idx=part_idx, **kwargs))
+            self._readers[i]._seed += part_idx
+            self._readers[i].q_out = self.Q1
            self._readers[i].start()
            time.sleep(0.1)

-        # Init transformers
+        # Initialize transformers
        self._transformers = []
        for i in range(self._num_transformers):
            transformer = DataTransformer(**kwargs)
-            transformer._rng_seed += (i + local_rank * self._num_transformers)
+            transformer._rng_seed += (i + rank * self._num_transformers)
            transformer.q_in = self.Q1
-            transformer.q1_out = self.Q21
-            transformer.q2_out = self.Q22
+            transformer.q1_out, transformer.q2_out = self.Q21, self.Q22
            transformer.start()
            self._transformers.append(transformer)
            time.sleep(0.1)

-        # Init blob fetchers
-        self._fetchers = []
-        for i in range(self._num_fetchers):
-            fetcher = BlobFetcher(**kwargs)
-            fetcher.q1_in = self.Q21
-            fetcher.q2_in = self.Q22
-            fetcher.q_out = self.Q3
-            fetcher.start()
-            self._fetchers.append(fetcher)
-            time.sleep(0.1)
-
-        # Prevent to echo multiple nodes
-        if local_rank == 0:
-            self.echo()
+        # Initialize batch-producer
+        self.start()

+        # Register cleanup callbacks
        def cleanup():
            def terminate(processes):
                for process in processes:
                    process.terminate()
                    process.join()
-            terminate(self._fetchers)
-            logger.info('Terminating BlobFetcher ......')
+            terminate([self])
+            logger.info('Terminate DataBatch.')
            terminate(self._transformers)
-            logger.info('Terminating DataTransformer ......')
+            logger.info('Terminate DataTransformer.')
            terminate(self._readers)
-            logger.info('Terminating DataReader......')
+            logger.info('Terminate DataReader.')

        import atexit
        atexit.register(cleanup)
@@ -156,20 +158,27 @@ class DataBatch(object):
        """
        return self.Q3.get()

-    def echo(self):
-        """Print I/O Information.
-
-        Returns
-        -------
-        None
-
-        """
-        print('---------------------------------------------------------')
-        print('BatchFetcher({} Threads), Using config:'.format(
-            self._num_readers + self._num_transformers + self._num_fetchers))
-        params = {'queue_size': self._prefetch,
-                  'n_readers': self._num_readers,
-                  'n_transformers': self._num_transformers,
-                  'n_fetchers': self._num_fetchers}
-        pprint.pprint(params)
-        print('---------------------------------------------------------')
+    def run(self):
+        """Start the process to produce batches."""
+        def produce(q_in):
+            processed_ims, ims_info, all_boxes = [], [], []
+            for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
+                im, im_scale, gt_boxes = q_in.get()
+                processed_ims.append(im)
+                ims_info.append(list(im.shape[:2]) + [im_scale])
+                im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
+                im_boxes[:, :gt_boxes.shape[1]], im_boxes[:, -1] = gt_boxes, image_index
+                all_boxes.append(im_boxes)
+            return {
+                'data': im_list_to_blob(processed_ims),
+                'ims_info': np.array(ims_info, dtype=np.float32),
+                'gt_boxes': np.concatenate(all_boxes, axis=0),
+            }
+
+        q1, q2 = self.Q21, self.Q22
+        while True:
+            if q1.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
+                self.Q3.put(produce(q1))
+            elif q2.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
+                self.Q3.put(produce(q2))
+            q1, q2 = q2, q1  # Sample two queues uniformly
--- a/lib/faster_rcnn/data/data_transformer.py
+++ b/lib/faster_rcnn/data/data_transformer.py
@@ -14,22 +14,13 @@ from __future__ import division
 from __future__ import print_function

 import multiprocessing
-import numpy as np
-import numpy.random as npr

-try:
-    import cv2
-except ImportError as e:
-    print('Failed to import cv2. Error: {0}'.format(str(e)))
-try:
-    import PIL.Image
-except ImportError as e:
-    print('Failed to import PIL. Error: {0}'.format(str(e)))
+import cv2
+import numpy as np

 from lib.core.config import cfg
-from lib.proto import anno_pb2 as pb
-from lib.utils import logger
 from lib.utils.blob import prep_im_for_blob
+from lib.utils.boxes import flip_boxes


 class DataTransformer(multiprocessing.Process):
@@ -47,44 +38,45 @@ class DataTransformer(multiprocessing.Process):

    def make_roi_dict(
        self,
-        ann_datum,
+        example,
        im_scale,
        apply_flip=False,
        offsets=None,
    ):
-        annotations = ann_datum.annotation
        n_objects = 0
        if not self._use_diff:
-            for ann in annotations:
-                if not ann.difficult:
+            for obj in example['object']:
+                if obj.get('difficult', 0) == 0:
                    n_objects += 1
        else:
-            n_objects = len(annotations)
+            n_objects = len(example['object'])

        roi_dict = {
-            'width': ann_datum.datum.width,
-            'height': ann_datum.datum.height,
+            'width': example['width'],
+            'height': example['height'],
            'gt_classes': np.zeros((n_objects,), 'int32'),
            'boxes': np.zeros((n_objects, 4), 'float32'),
        }

        # Filter the difficult instances
-        rec_idx = 0
-        for ann in annotations:
-            if not self._use_diff and ann.difficult:
+        object_idx = 0
+        for obj in example['object']:
+            if not self._use_diff and \
+                    obj.get('difficult', 0) > 0:
                continue
-            roi_dict['boxes'][rec_idx, :] = [
-                max(0, ann.x1),
-                max(0, ann.y1),
-                min(ann.x2, ann_datum.datum.width - 1),
-                min(ann.y2, ann_datum.datum.height - 1),
+            roi_dict['boxes'][object_idx, :] = [
+                max(0, obj['xmin']),
+                max(0, obj['ymin']),
+                min(obj['xmax'], example['width'] - 1),
+                min(obj['ymax'], example['height'] - 1),
            ]
-            roi_dict['gt_classes'][rec_idx] = self._class_to_ind[ann.name]
-            rec_idx += 1
+            roi_dict['gt_classes'][object_idx] = \
+                self._class_to_ind[obj['name']]
+            object_idx += 1

        # Flip the boxes if necessary
        if apply_flip:
-            roi_dict['boxes'] = _flip_boxes(
+            roi_dict['boxes'] = flip_boxes(
                roi_dict['boxes'], roi_dict['width'])

        # Scale the boxes to the detecting scale
@@ -102,50 +94,34 @@ class DataTransformer(multiprocessing.Process):
        return roi_dict

    @classmethod
-    def get_image(cls, serialized):
-        datum = pb.AnnotatedDatum()
-        datum.ParseFromString(serialized)
-        datum = datum.datum
-        im = np.fromstring(datum.data, np.uint8)
-        return cv2.imdecode(im, -1) if datum.encoded is True else \
-            im.reshape((datum.height, datum.width, datum.channels))
+    def get_image(cls, example):
+        img = np.frombuffer(example['content'], np.uint8)
+        return cv2.imdecode(img, -1)

    @classmethod
-    def get_annotations(cls, serialized):
-        datum = pb.AnnotatedDatum()
-        datum.ParseFromString(serialized)
-        filename = datum.filename
-        annotations = datum.annotation
+    def get_annotations(cls, example):
        objects = []
-        for ix, ann in enumerate(annotations):
+        for ix, obj in enumerate(example['object']):
            objects.append({
-                'name': ann.name,
-                'difficult': int(ann.difficult),
-                'bbox': [ann.x1, ann.y1, ann.x2, ann.y2],
-                'mask': ann.mask,
+                'name': obj['name'],
+                'difficult': obj.get('difficult', 0),
+                'bbox': [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']],
            })
-        return filename, objects
-
-    def get(self, serialized):
-        datum = pb.AnnotatedDatum()
-        datum.ParseFromString(serialized)
-        im_datum = datum.datum
-        im = np.fromstring(im_datum.data, np.uint8)
-        if im_datum.encoded is True:
-            im = cv2.imdecode(im, -1)
-        else:
-            h, w = im_datum.height, im_datum.width
-            im = im.reshape((h, w, im_datum.channels))
+        return example['id'], objects
+
+    def get(self, example):
+        img = np.frombuffer(example['content'], np.uint8)
+        img = cv2.imdecode(img, -1)

        # Scale
-        scale_indices = npr.randint(len(cfg.TRAIN.SCALES))
+        scale_indices = np.random.randint(len(cfg.TRAIN.SCALES))
        target_size = cfg.TRAIN.SCALES[scale_indices]
-        im, im_scale, jitter = prep_im_for_blob(im, target_size, cfg.TRAIN.MAX_SIZE)
+        im, im_scale, jitter = prep_im_for_blob(img, target_size, cfg.TRAIN.MAX_SIZE)

        # Flip
        apply_flip = False
        if self._use_flipped:
-            if npr.randint(0, 2) > 0:
+            if np.random.randint(2) > 0:
                im = im[:, ::-1, :]
                apply_flip = True

@@ -160,8 +136,8 @@ class DataTransformer(multiprocessing.Process):
            # To a square (target_size, target_size)
            im, offsets = _get_image_with_target_size([target_size] * 2, im)

-        # Datum -> RoIDict
-        roi_dict = self.make_roi_dict(datum, im_scale, apply_flip, offsets)
+        # Example -> RoIDict
+        roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets)

        # Post-Process for gt boxes
        # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
@@ -171,29 +147,16 @@ class DataTransformer(multiprocessing.Process):
        return im, im_scale, gt_boxes

    def run(self):
-        npr.seed(self._rng_seed)
+        np.random.seed(self._rng_seed)
        while True:
-            serialized = self.q_in.get()
-            data = self.get(serialized)
-            # Ensure that there should be at least 1 ground-truth
-            if len(data[2]) < 1:
-                continue
-            aspect_ratio = float(data[0].shape[0]) / data[0].shape[1]
-            if aspect_ratio > 1.0:
-                self.q1_out.put(data)
+            outputs = self.get(self.q_in.get())
+            if len(outputs[2]) < 1:
+                continue  # Ignore the non-object image
+            aspect_ratio = float(outputs[0].shape[0]) / outputs[0].shape[1]
+            if aspect_ratio > 1.:
+                self.q1_out.put(outputs)
            else:
-                self.q2_out.put(data)
-
-
-def _flip_boxes(boxes, width):
-    flip_boxes = boxes.copy()
-    old_x1 = boxes[:, 0].copy()
-    old_x2 = boxes[:, 2].copy()
-    flip_boxes[:, 0] = width - old_x2 - 1
-    flip_boxes[:, 2] = width - old_x1 - 1
-    if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
-        logger.fatal('Encounter invalid coordinates after flipping boxes.')
-    return flip_boxes
+                self.q2_out.put(outputs)


 def _get_image_with_target_size(target_size, img):

--- a/lib/faster_rcnn/generate_anchors.py
+++ b/lib/faster_rcnn/generate_anchors.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# Codes are based on:
-#
-#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/generate_anchors.py>
-#
-# ------------------------------------------------------------
-
-import numpy as np
-
-# Verify that we compute the same anchors as Shaoqing's matlab implementation:
-#
-#    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
-#    >> anchors
-#
-#    anchors =
-#
-#       -83   -39   100    56
-#      -175   -87   192   104
-#      -359  -183   376   200
-#       -55   -55    72    72
-#      -119  -119   136   136
-#      -247  -247   264   264
-#       -35   -79    52    96
-#       -79  -167    96   184
-#      -167  -343   184   360
-
-# array([[ -83.,  -39.,  100.,   56.],
-#       [-175.,  -87.,  192.,  104.],
-#       [-359., -183.,  376.,  200.],
-#       [ -55.,  -55.,   72.,   72.],
-#       [-119., -119.,  136.,  136.],
-#       [-247., -247.,  264.,  264.],
-#       [ -35.,  -79.,   52.,   96.],
-#       [ -79., -167.,   96.,  184.],
-#       [-167., -343.,  184.,  360.]])
-
-
-def generate_anchors(
-    base_size=16,
-    ratios=(0.5, 1, 2),
-    scales=2**np.arange(3, 6),
-):
-    """
-    Generate anchor (reference) windows by enumerating aspect ratios X
-    scales wrt a reference (0, 0, 15, 15) window.
-    """
-    base_anchor = np.array([1, 1, base_size, base_size]) - 1
-    ratio_anchors = _ratio_enum(base_anchor, ratios)
-    anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
-                         for i in range(ratio_anchors.shape[0])])
-    return anchors
-
-
-def generate_anchors_v2(
-    stride=16,
-    ratios=(0.5, 1, 2),
-    sizes=(32, 64, 128, 256, 512),
-):
-    """
-    Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
-    are centered on stride / 2, have (approximate) sqrt areas of the specified
-    sizes, and aspect ratios as given.
-    """
-    return generate_anchors(
-        base_size=stride,
-        ratios=ratios,
-        scales=np.array(sizes, dtype=np.float) / stride,
-    )
-
-
-def _whctrs(anchor):
-    """Return width, height, x center, and y center for an anchor (window)."""
-    w = anchor[2] - anchor[0] + 1
-    h = anchor[3] - anchor[1] + 1
-    x_ctr = anchor[0] + 0.5 * (w - 1)
-    y_ctr = anchor[1] + 0.5 * (h - 1)
-    return w, h, x_ctr, y_ctr
-
-
-def _mkanchors(ws, hs, x_ctr, y_ctr):
-    """
-    Given a vector of widths (ws) and heights (hs) around a center
-    (x_ctr, y_ctr), output a set of anchors (windows).
-    """
-    ws = ws[:, np.newaxis]
-    hs = hs[:, np.newaxis]
-    anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
-                         y_ctr - 0.5 * (hs - 1),
-                         x_ctr + 0.5 * (ws - 1),
-                         y_ctr + 0.5 * (hs - 1)))
-    return anchors
-
-
-def _ratio_enum(anchor, ratios):
-    """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
-    w, h, x_ctr, y_ctr = _whctrs(anchor)
-    size = w * h
-    size_ratios = size / ratios
-    ws = np.round(np.sqrt(size_ratios))
-    hs = np.round(ws * ratios)
-    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
-    return anchors
-
-
-def _scale_enum(anchor, scales):
-    """Enumerate a set of anchors for each scale wrt an anchor."""
-    w, h, x_ctr, y_ctr = _whctrs(anchor)
-    ws = w * scales
-    hs = h * scales
-    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
-    return anchors
-
-
-if __name__ == '__main__':
-    print(generate_anchors())
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/generate_anchors.py>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+# Verify that we compute the same anchors as Shaoqing's matlab implementation:
+#
+#    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
+#    >> anchors
+#
+#    anchors =
+#
+#       -83   -39   100    56
+#      -175   -87   192   104
+#      -359  -183   376   200
+#       -55   -55    72    72
+#      -119  -119   136   136
+#      -247  -247   264   264
+#       -35   -79    52    96
+#       -79  -167    96   184
+#      -167  -343   184   360
+
+# array([[ -83.,  -39.,  100.,   56.],
+#       [-175.,  -87.,  192.,  104.],
+#       [-359., -183.,  376.,  200.],
+#       [ -55.,  -55.,   72.,   72.],
+#       [-119., -119.,  136.,  136.],
+#       [-247., -247.,  264.,  264.],
+#       [ -35.,  -79.,   52.,   96.],
+#       [ -79., -167.,   96.,  184.],
+#       [-167., -343.,  184.,  360.]])
+
+
+def generate_anchors(
+    base_size=16,
+    ratios=(0.5, 1, 2),
+    scales=2**np.arange(3, 6),
+):
+    """
+    Generate anchor (reference) windows by enumerating aspect ratios X
+    scales wrt a reference (0, 0, 15, 15) window.
+    """
+    base_anchor = np.array([1, 1, base_size, base_size]) - 1
+    ratio_anchors = _ratio_enum(base_anchor, ratios)
+    anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
+                         for i in range(ratio_anchors.shape[0])])
+    return anchors
+
+
+def generate_anchors_v2(
+    stride=16,
+    ratios=(0.5, 1, 2),
+    sizes=(32, 64, 128, 256, 512),
+):
+    """
+    Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
+    are centered on stride / 2, have (approximate) sqrt areas of the specified
+    sizes, and aspect ratios as given.
+    """
+    return generate_anchors(
+        base_size=stride,
+        ratios=ratios,
+        scales=np.array(sizes, dtype=np.float) / stride,
+    )
+
+
+def _whctrs(anchor):
+    """Return width, height, x center, and y center for an anchor (window)."""
+    w = anchor[2] - anchor[0] + 1
+    h = anchor[3] - anchor[1] + 1
+    x_ctr = anchor[0] + 0.5 * (w - 1)
+    y_ctr = anchor[1] + 0.5 * (h - 1)
+    return w, h, x_ctr, y_ctr
+
+
+def _mkanchors(ws, hs, x_ctr, y_ctr):
+    """
+    Given a vector of widths (ws) and heights (hs) around a center
+    (x_ctr, y_ctr), output a set of anchors (windows).
+    """
+    ws = ws[:, np.newaxis]
+    hs = hs[:, np.newaxis]
+    anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
+                         y_ctr - 0.5 * (hs - 1),
+                         x_ctr + 0.5 * (ws - 1),
+                         y_ctr + 0.5 * (hs - 1)))
+    return anchors
+
+
+def _ratio_enum(anchor, ratios):
+    """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
+    w, h, x_ctr, y_ctr = _whctrs(anchor)
+    size = w * h
+    size_ratios = size / ratios
+    ws = np.round(np.sqrt(size_ratios))
+    hs = np.round(ws * ratios)
+    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+    return anchors
+
+
+def _scale_enum(anchor, scales):
+    """Enumerate a set of anchors for each scale wrt an anchor."""
+    w, h, x_ctr, y_ctr = _whctrs(anchor)
+    ws = w * scales
+    hs = h * scales
+    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+    return anchors
+
+
+if __name__ == '__main__':
+    print(generate_anchors())
--- a/lib/faster_rcnn/layers/__init__.py
+++ b/lib/faster_rcnn/layers/__init__.py
-# --------------------------------------------------------
-# Mask R-CNN @ Detectron
-# Copyright (c) 2017 SeetaTech
-# Written by Ting Pan
-# --------------------------------------------------------
\ No newline at end of file
--- a/lib/faster_rcnn/layers/data_layer.py
+++ b/lib/faster_rcnn/layers/data_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import dragon.vm.torch as torch
-
-from lib.core.config import cfg
-from lib.datasets.factory import get_imdb
-from lib.faster_rcnn.data.data_batch import DataBatch
-
-
-class DataLayer(torch.nn.Module):
-    def __init__(self):
-        super(DataLayer, self).__init__()
-        database = get_imdb(cfg.TRAIN.DATABASE)
-        self.data_batch = DataBatch(**{
-            'source': database.source,
-            'classes': database.classes,
-            'shuffle': cfg.TRAIN.USE_SHUFFLE,
-            'num_chunks': 0,  # Record-Wise Shuffle
-            'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
-        })
-
-    def forward(self):
-        # Get an array blob from the Queue
-        outputs = self.data_batch.get()
-        # Zero-Copy the array to tensor
-        outputs['data'] = torch.from_numpy(outputs['data'])
-        return outputs
--- a/lib/faster_rcnn/layers/proposal_layer.py
+++ b/lib/faster_rcnn/layers/proposal_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# --------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import dragon.vm.torch as torch
-import numpy as np
-
-from lib.core.config import cfg
-from lib.faster_rcnn.generate_anchors import generate_anchors
-from lib.nms.nms_wrapper import nms
-from lib.utils.blob import blob_to_tensor
-from lib.utils.boxes import bbox_transform_inv
-from lib.utils.boxes import clip_tiled_boxes
-from lib.utils.boxes import filter_boxes
-
-
-class ProposalLayer(torch.nn.Module):
-    """
-    Compute proposals by applying estimated bounding-box
-    transformations to a set of regular boxes (called "anchors").
-
-    """
-
-    def __init__(self):
-        super(ProposalLayer, self).__init__()
-        # Load the basic configs
-        self.scales = cfg.RPN.SCALES
-        self.stride = cfg.RPN.STRIDES[0]
-        self.ratios = cfg.RPN.ASPECT_RATIOS
-
-        # Generate base anchors
-        self.base_anchors = generate_anchors(
-            base_size=self.stride,
-            ratios=self.ratios,
-            scales=np.array(self.scales),
-        )
-
-    def forward(self, features, cls_prob, bbox_pred, ims_info):
-        cfg_key = 'TRAIN' if self.training else 'TEST'
-        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
-        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
-        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
-        min_size = cfg[cfg_key].RPN_MIN_SIZE
-
-        # Get resources
-        num_images = ims_info.shape[0]
-
-        # Generate proposals from shifted anchors
-        height, width = cls_prob.shape[-2:]
-        shift_x = np.arange(0, width) * self.stride
-        shift_y = np.arange(0, height) * self.stride
-        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
-        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
-                            shift_x.ravel(), shift_y.ravel())).transpose()
-        # Add A anchors (1, A, 4) to
-        # cell K shifts (K, 1, 4) to get
-        # shift anchors (K, A, 4)
-        # Reshape to (K * A, 4) shifted anchors
-        A = self.base_anchors.shape[0]
-        K = shifts.shape[0]
-        anchors = \
-            self.base_anchors.reshape((1, A, 4)) + \
-            shifts.reshape((1, K, 4)).transpose((1, 0, 2))
-        all_anchors = anchors.reshape((K * A, 4))
-
-        # Prepare for the outputs
-        batch_rois = []
-        # scores & deltas are (1, A, H, W) format
-        # Transpose to (1, H, W, A)
-        batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1))
-        batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1))
-
-        # Extract RoIs separately
-        for ix in range(num_images):
-            scores = batch_scores[ix].reshape((-1, 1))  # [1, n] -> [n, 1]
-            deltas = batch_deltas[ix].reshape((-1, 4))
-
-            if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
-                order = np.argsort(-scores.squeeze())
-            else:
-                # Avoid sorting possibly large arrays; First partition to get top K
-                # unsorted and then sort just those (~20x faster for 200k scores)
-                inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
-                order = np.argsort(-scores[inds].squeeze())
-                order = inds[order]
-            deltas = deltas[order]
-            anchors = all_anchors[order]
-            scores = scores[order]
-
-            # 1. Convert anchors into proposals via bbox transformations
-            proposals = bbox_transform_inv(anchors, deltas)
-
-            # 2. Clip predicted boxes to image
-            proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
-
-            # 3. remove predicted boxes with either height or width < threshold
-            # (NOTE: convert min_size to input image scale stored in im_info[2])
-            keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
-            proposals = proposals[keep, :]
-            scores = scores[keep]
-
-            # 6. Apply nms (e.g. threshold = 0.7)
-            # 7. Take after_nms_topN (e.g. 300)
-            # 8. Return the top proposals (-> RoIs top)
-            keep = nms(np.hstack((proposals, scores)), nms_thresh)
-            if post_nms_topN > 0:
-                keep = keep[:post_nms_topN]
-            proposals = proposals[keep, :]
-
-            # Output rois blob
-            batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
-            batch_inds.fill(ix)
-            rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
-            batch_rois.append(rpn_rois)
-
-        # Merge RoIs into a blob
-        rpn_rois = np.concatenate(batch_rois, axis=0)
-        if cfg_key == 'TRAIN':
-            return rpn_rois
-        else:
-            return [blob_to_tensor(rpn_rois)]
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# --------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dragon.vm.torch as torch
+import numpy as np
+
+from lib.core.config import cfg
+from lib.faster_rcnn.generate_anchors import generate_anchors
+from lib.nms.nms_wrapper import nms
+from lib.utils.blob import blob_to_tensor
+from lib.utils.boxes import bbox_transform_inv
+from lib.utils.boxes import clip_tiled_boxes
+from lib.utils.boxes import filter_boxes
+
+
+class ProposalLayer(torch.nn.Module):
+    """
+    Compute proposals by applying estimated bounding-box
+    transformations to a set of regular boxes (called "anchors").
+
+    """
+
+    def __init__(self):
+        super(ProposalLayer, self).__init__()
+        # Load the basic configs
+        self.scales = cfg.RPN.SCALES
+        self.stride = cfg.RPN.STRIDES[0]
+        self.ratios = cfg.RPN.ASPECT_RATIOS
+
+        # Generate base anchors
+        self.base_anchors = generate_anchors(
+            base_size=self.stride,
+            ratios=self.ratios,
+            scales=np.array(self.scales),
+        )
+
+    def forward(self, features, cls_prob, bbox_pred, ims_info):
+        cfg_key = 'TRAIN' if self.training else 'TEST'
+        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
+        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
+        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
+        min_size = cfg[cfg_key].RPN_MIN_SIZE
+
+        # Get resources
+        num_images = ims_info.shape[0]
+
+        # Generate proposals from shifted anchors
+        height, width = cls_prob.shape[-2:]
+        shift_x = np.arange(0, width) * self.stride
+        shift_y = np.arange(0, height) * self.stride
+        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                            shift_x.ravel(), shift_y.ravel())).transpose()
+        # Add A anchors (1, A, 4) to
+        # cell K shifts (K, 1, 4) to get
+        # shift anchors (K, A, 4)
+        # Reshape to (K * A, 4) shifted anchors
+        A = self.base_anchors.shape[0]
+        K = shifts.shape[0]
+        anchors = \
+            self.base_anchors.reshape((1, A, 4)) + \
+            shifts.reshape((1, K, 4)).transpose((1, 0, 2))
+        all_anchors = anchors.reshape((K * A, 4))
+
+        # Prepare for the outputs
+        batch_rois = []
+        # scores & deltas are (1, A, H, W) format
+        # Transpose to (1, H, W, A)
+        batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1))
+        batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1))
+
+        # Extract RoIs separately
+        for ix in range(num_images):
+            scores = batch_scores[ix].reshape((-1, 1))  # [1, n] -> [n, 1]
+            deltas = batch_deltas[ix].reshape((-1, 4))
+
+            if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
+                order = np.argsort(-scores.squeeze())
+            else:
+                # Avoid sorting possibly large arrays; First partition to get top K
+                # unsorted and then sort just those (~20x faster for 200k scores)
+                inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
+                order = np.argsort(-scores[inds].squeeze())
+                order = inds[order]
+            deltas = deltas[order]
+            anchors = all_anchors[order]
+            scores = scores[order]
+
+            # 1. Convert anchors into proposals via bbox transformations
+            proposals = bbox_transform_inv(anchors, deltas)
+
+            # 2. Clip predicted boxes to image
+            proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
+
+            # 3. remove predicted boxes with either height or width < threshold
+            # (NOTE: convert min_size to input image scale stored in im_info[2])
+            keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
+            proposals = proposals[keep, :]
+            scores = scores[keep]
+
+            # 6. Apply nms (e.g. threshold = 0.7)
+            # 7. Take after_nms_topN (e.g. 300)
+            # 8. Return the top proposals (-> RoIs top)
+            keep = nms(np.hstack((proposals, scores)), nms_thresh)
+            if post_nms_topN > 0:
+                keep = keep[:post_nms_topN]
+            proposals = proposals[keep, :]
+
+            # Output rois blob
+            batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
+            batch_inds.fill(ix)
+            rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
+            batch_rois.append(rpn_rois)
+
+        # Merge RoIs into a blob
+        rpn_rois = np.concatenate(batch_rois, axis=0)
+        if cfg_key == 'TRAIN':
+            return rpn_rois
+        else:
+            return [blob_to_tensor(rpn_rois)]
--- a/lib/faster_rcnn/layers/proposal_target_layer.py
+++ b/lib/faster_rcnn/layers/proposal_target_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# --------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import dragon.vm.torch as torch
-import numpy as np
-import numpy.random as npr
-
-from lib.core.config import cfg
-from lib.utils.blob import blob_to_tensor
-from lib.utils.boxes import bbox_transform
-from lib.utils.boxes import dismantle_gt_boxes
-from lib.utils.cython_bbox import bbox_overlaps
-
-
-class ProposalTargetLayer(torch.nn.Module):
-    """Assign object detection proposals to ground-truth targets."""
-
-    def __init__(self):
-        super(ProposalTargetLayer, self).__init__()
-        self.num_classes = cfg.MODEL.NUM_CLASSES
-
-    def forward(self, rpn_rois, gt_boxes):
-        num_images = cfg.TRAIN.IMS_PER_BATCH
-        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
-        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
-        all_rois = rpn_rois
-        # GT boxes (x1, y1, x2, y2, label)
-        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
-
-        # Prepare for the outputs
-        keys = ['labels', 'rois', 'bbox_targets',
-                'bbox_inside_weights', 'bbox_outside_weights']
-        batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
-
-        # Generate targets separately
-        for ix in range(num_images):
-            gt_boxes = gt_boxes_wide[ix]
-            # Extract proposals for this image
-            rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
-            # Include ground-truth boxes in the set of candidate rois
-            inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
-            rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
-            # Sample a batch of rois for training
-            rois_per_image = cfg.TRAIN.BATCH_SIZE
-            fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
-            labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
-                rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
-            bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
-            _fmap_batch([
-                labels,
-                rois, 
-                bbox_targets,
-                bbox_inside_weights,
-                bbox_outside_weights],
-                batch_outputs,
-                keys,
-            )
-
-        # Merge targets into blobs
-        for k, v in batch_outputs.items():
-            batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
-
-        return {
-            'rois': [blob_to_tensor(batch_outputs['rois'])],
-            'labels': blob_to_tensor(batch_outputs['labels']),
-            'bbox_targets': blob_to_tensor(batch_outputs['bbox_targets']),
-            'bbox_inside_weights': blob_to_tensor(batch_outputs['bbox_inside_weights']),
-            'bbox_outside_weights': blob_to_tensor(batch_outputs['bbox_outside_weights']),
-        }
-
-
-def _get_bbox_regression_labels(bbox_target_data, num_classes):
-    """Bounding-box regression targets (bbox_target_data) are stored in a
-    compact form N x (class, tx, ty, tw, th)
-
-    This function expands those targets into the 4-of-4*K representation used
-    by the network (i.e. only one class has non-zero targets).
-
-    Returns:
-        bbox_target (ndarray): N x 4K blob of regression targets
-        bbox_inside_weights (ndarray): N x 4K blob of loss weights
-
-    """
-    clss = bbox_target_data[:, 0]
-    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
-    bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
-    inds = np.where(clss > 0)[0]
-    for ind in inds:
-        cls = clss[ind]
-        start = 4 * cls
-        end = start + 4
-        bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
-        bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
-
-    return bbox_targets, bbox_inside_weights
-
-
-def _compute_targets(ex_rois, gt_rois, labels):
-    """Compute bounding-box regression targets for an image."""
-    assert ex_rois.shape[0] == gt_rois.shape[0]
-    assert ex_rois.shape[1] == 4
-    assert gt_rois.shape[1] == 4
-    targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
-    return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
-
-
-def _sample_rois(
-    all_rois,
-    gt_boxes,
-    fg_rois_per_image,
-    rois_per_image,
-    num_classes,
-):
-    """Generate a random sample of RoIs."""
-    overlaps = bbox_overlaps(
-        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
-        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float),
-    )
-    gt_assignment = overlaps.argmax(axis=1)
-    max_overlaps = overlaps.max(axis=1)
-    labels = gt_boxes[gt_assignment, 4]
-
-    # Select foreground RoIs as those with >= FG_THRESH overlap
-    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
-    # Guard against the case when an image has fewer than fg_rois_per_image
-    # foreground RoIs
-    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
-    # Sample foreground regions without replacement
-    if fg_inds.size > 0:
-        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
-
-    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
-    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
-                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
-    # Compute number of background RoIs to take from this image (guarding
-    # against there being fewer than desired)
-    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
-    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
-    # Sample background regions without replacement
-    if bg_inds.size > 0:
-        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
-
-    # The indices that we're selecting (both fg and bg)
-    keep_inds = np.append(fg_inds, bg_inds)
-    # Select sampled values from various arrays:
-    labels = labels[keep_inds]
-    # Clamp labels for the background RoIs to 0
-    labels[fg_rois_per_this_image:] = 0
-    rois = all_rois[keep_inds]
-
-    bbox_target_data = _compute_targets(
-        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
-
-    bbox_targets, bbox_inside_weights = \
-        _get_bbox_regression_labels(bbox_target_data, num_classes)
-
-    return labels, rois, bbox_targets, bbox_inside_weights
-
-
-def _fmap_batch(inputs, outputs, keys):
-    for i, key in enumerate(keys):
-        outputs[key].append(inputs[i])
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# --------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dragon.vm.torch as torch
+import numpy as np
+import numpy.random as npr
+
+from lib.core.config import cfg
+from lib.utils.blob import blob_to_tensor
+from lib.utils.boxes import bbox_transform
+from lib.utils.boxes import dismantle_gt_boxes
+from lib.utils.cython_bbox import bbox_overlaps
+
+
+class ProposalTargetLayer(torch.nn.Module):
+    """Assign object detection proposals to ground-truth targets."""
+
+    def __init__(self):
+        super(ProposalTargetLayer, self).__init__()
+        self.num_classes = cfg.MODEL.NUM_CLASSES
+
+    def forward(self, rpn_rois, gt_boxes):
+        num_images = cfg.TRAIN.IMS_PER_BATCH
+        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
+        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
+        all_rois = rpn_rois
+        # GT boxes (x1, y1, x2, y2, label)
+        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
+
+        # Prepare for the outputs
+        keys = ['labels', 'rois', 'bbox_targets',
+                'bbox_inside_weights', 'bbox_outside_weights']
+        batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
+
+        # Generate targets separately
+        for ix in range(num_images):
+            gt_boxes = gt_boxes_wide[ix]
+            # Extract proposals for this image
+            rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
+            # Include ground-truth boxes in the set of candidate rois
+            inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
+            rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
+            # Sample a batch of rois for training
+            rois_per_image = cfg.TRAIN.BATCH_SIZE
+            fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
+            labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
+                rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
+            bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
+            _fmap_batch([
+                labels,
+                rois, 
+                bbox_targets,
+                bbox_inside_weights,
+                bbox_outside_weights],
+                batch_outputs,
+                keys,
+            )
+
+        # Merge targets into blobs
+        for k, v in batch_outputs.items():
+            batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
+
+        return {
+            'rois': [blob_to_tensor(batch_outputs['rois'])],
+            'labels': blob_to_tensor(batch_outputs['labels']),
+            'bbox_targets': blob_to_tensor(batch_outputs['bbox_targets']),
+            'bbox_inside_weights': blob_to_tensor(batch_outputs['bbox_inside_weights']),
+            'bbox_outside_weights': blob_to_tensor(batch_outputs['bbox_outside_weights']),
+        }
+
+
+def _get_bbox_regression_labels(bbox_target_data, num_classes):
+    """Bounding-box regression targets (bbox_target_data) are stored in a
+    compact form N x (class, tx, ty, tw, th)
+
+    This function expands those targets into the 4-of-4*K representation used
+    by the network (i.e. only one class has non-zero targets).
+
+    Returns:
+        bbox_target (ndarray): N x 4K blob of regression targets
+        bbox_inside_weights (ndarray): N x 4K blob of loss weights
+
+    """
+    clss = bbox_target_data[:, 0]
+    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
+    bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
+    inds = np.where(clss > 0)[0]
+    for ind in inds:
+        cls = clss[ind]
+        start = 4 * cls
+        end = start + 4
+        bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
+        bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
+
+    return bbox_targets, bbox_inside_weights
+
+
+def _compute_targets(ex_rois, gt_rois, labels):
+    """Compute bounding-box regression targets for an image."""
+    assert ex_rois.shape[0] == gt_rois.shape[0]
+    assert ex_rois.shape[1] == 4
+    assert gt_rois.shape[1] == 4
+    targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
+    return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
+
+
+def _sample_rois(
+    all_rois,
+    gt_boxes,
+    fg_rois_per_image,
+    rois_per_image,
+    num_classes,
+):
+    """Generate a random sample of RoIs."""
+    overlaps = bbox_overlaps(
+        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
+        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float),
+    )
+    gt_assignment = overlaps.argmax(axis=1)
+    max_overlaps = overlaps.max(axis=1)
+    labels = gt_boxes[gt_assignment, 4]
+
+    # Select foreground RoIs as those with >= FG_THRESH overlap
+    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
+    # Guard against the case when an image has fewer than fg_rois_per_image
+    # foreground RoIs
+    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
+    # Sample foreground regions without replacement
+    if fg_inds.size > 0:
+        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
+
+    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
+    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
+                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
+    # Compute number of background RoIs to take from this image (guarding
+    # against there being fewer than desired)
+    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
+    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
+    # Sample background regions without replacement
+    if bg_inds.size > 0:
+        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
+
+    # The indices that we're selecting (both fg and bg)
+    keep_inds = np.append(fg_inds, bg_inds)
+    # Select sampled values from various arrays:
+    labels = labels[keep_inds]
+    # Clamp labels for the background RoIs to 0
+    labels[fg_rois_per_this_image:] = 0
+    rois = all_rois[keep_inds]
+
+    bbox_target_data = _compute_targets(
+        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
+
+    bbox_targets, bbox_inside_weights = \
+        _get_bbox_regression_labels(bbox_target_data, num_classes)
+
+    return labels, rois, bbox_targets, bbox_inside_weights
+
+
+def _fmap_batch(inputs, outputs, keys):
+    for i, key in enumerate(keys):
+        outputs[key].append(inputs[i])
--- a/lib/faster_rcnn/test.py
+++ b/lib/faster_rcnn/test.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import dragon.vm.torch as torch
-import numpy as np
-
-from lib.core.config import cfg
-from lib.nms.nms_wrapper import nms
-from lib.nms.nms_wrapper import soft_nms
-from lib.utils.blob import im_list_to_blob
-from lib.utils.blob import tensor_to_blob
-from lib.utils.boxes import bbox_transform_inv
-from lib.utils.boxes import clip_tiled_boxes
-from lib.utils.image import scale_image
-from lib.utils.timer import Timer
-from lib.utils.vis import vis_one_image
-
-
-def im_detect(detector, raw_image):
-    """Detect a image, with single or multiple scales."""
-    # Prepare images
-    ims, ims_scale = scale_image(raw_image)
-
-    # Prepare blobs
-    blobs = {'data': im_list_to_blob(ims)}
-    blobs['ims_info'] = np.array([
-        list(blobs['data'].shape[1:3]) + [im_scale]
-        for im_scale in ims_scale], dtype=np.float32)
-    blobs['data'] = torch.from_numpy(blobs['data'])
-
-    # Do Forward
-    with torch.no_grad():
-        outputs = detector.forward(inputs=blobs)
-
-    # Decode results
-    batch_rois = tensor_to_blob(outputs['rois'])
-    batch_scores = tensor_to_blob(outputs['cls_prob'])
-    batch_deltas = tensor_to_blob(outputs['bbox_pred'])
-
-    batch_boxes = bbox_transform_inv(
-        boxes=batch_rois[:, 1:5],
-        deltas=batch_deltas,
-        weights=cfg.BBOX_REG_WEIGHTS,
-    )
-
-    scores_wide, boxes_wide = [], []
-
-    for im_idx in range(len(ims)):
-        indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0]
-        boxes = batch_boxes[indices]
-        boxes /= ims_scale[im_idx]
-        clip_tiled_boxes(boxes, raw_image.shape)
-        scores_wide.append(batch_scores[indices])
-        boxes_wide.append(boxes)
-
-    return (np.vstack(scores_wide), np.vstack(boxes_wide)) \
-        if len(scores_wide) > 1 else (scores_wide[0], boxes_wide[0])
-
-
-def test_net(detector, server):
-    # Load settings
-    classes = server.classes
-    num_images = server.num_images
-    num_classes = server.num_classes
-    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
-
-    _t = {'im_detect': Timer(), 'misc': Timer()}
-
-    for i in range(num_images):
-        image_id, raw_image = server.get_image()
-
-        _t['im_detect'].tic()
-        scores, boxes = im_detect(detector, raw_image)
-        _t['im_detect'].toc()
-
-        _t['misc'].tic()
-        boxes_this_image = [[]]
-        for j in range(1, num_classes):
-            inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
-            cls_scores = scores[inds, j]
-            cls_boxes = boxes[inds, j*4:(j+1)*4]
-            cls_detections = np.hstack(
-                (cls_boxes, cls_scores[:, np.newaxis])
-            ).astype(np.float32, copy=False)
-            if cfg.TEST.USE_SOFT_NMS:
-                keep = soft_nms(
-                    cls_detections, cfg.TEST.NMS,
-                    method=cfg.TEST.SOFT_NMS_METHOD,
-                    sigma=cfg.TEST.SOFT_NMS_SIGMA,
-                )
-            else:
-                keep = nms(cls_detections, cfg.TEST.NMS, force_cpu=True)
-            cls_detections = cls_detections[keep, :]
-            all_boxes[j][i] = cls_detections
-            boxes_this_image.append(cls_detections)
-
-        if cfg.VIS or cfg.VIS_ON_FILE:
-            vis_one_image(
-                raw_image, classes, boxes_this_image,
-                thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
-                filename=server.get_save_filename(image_id),
-            )
-
-        # Limit to max_per_image detections *over all classes*
-        if cfg.TEST.DETECTIONS_PER_IM > 0:
-            image_scores = []
-            for j in range(1, num_classes):
-                if len(all_boxes[j][i]) < 1: continue
-                image_scores.append(all_boxes[j][i][:, -1])
-            if len(image_scores) > 0:
-                image_scores = np.hstack(image_scores)
-            if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
-                image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
-                for j in range(1, num_classes):
-                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
-                    all_boxes[j][i] = all_boxes[j][i][keep, :]
-        _t['misc'].toc()
-
-        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
-              .format(i + 1, num_images, _t['im_detect'].average_time,
-                      _t['misc'].average_time), end='')
-
-    print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
-
-    print('Evaluating detections')
-    server.evaluate_detections(all_boxes)
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dragon.vm.torch as torch
+import numpy as np
+
+from lib.core.config import cfg
+from lib.nms.nms_wrapper import nms
+from lib.nms.nms_wrapper import soft_nms
+from lib.utils.blob import im_list_to_blob
+from lib.utils.blob import tensor_to_blob
+from lib.utils.boxes import bbox_transform_inv
+from lib.utils.boxes import clip_tiled_boxes
+from lib.utils.image import scale_image
+from lib.utils.timer import Timer
+from lib.utils.vis import vis_one_image
+
+
+def im_detect(detector, raw_image):
+    """Detect a image, with single or multiple scales."""
+    # Prepare images
+    ims, ims_scale = scale_image(raw_image)
+
+    # Prepare blobs
+    blobs = {'data': im_list_to_blob(ims)}
+    blobs['ims_info'] = np.array([
+        list(blobs['data'].shape[1:3]) + [im_scale]
+        for im_scale in ims_scale], dtype=np.float32)
+
+    blobs['data'] = torch.from_numpy(blobs['data'])
+
+    # Do Forward
+    with torch.no_grad():
+        outputs = detector.forward(inputs=blobs)
+
+    # Decode results
+    batch_rois = tensor_to_blob(outputs['rois'])
+    batch_scores = tensor_to_blob(outputs['cls_prob'])
+    batch_deltas = tensor_to_blob(outputs['bbox_pred'])
+
+    batch_boxes = bbox_transform_inv(
+        boxes=batch_rois[:, 1:5],
+        deltas=batch_deltas,
+        weights=cfg.BBOX_REG_WEIGHTS,
+    )
+
+    scores_wide, boxes_wide = [], []
+
+    for im_idx in range(len(ims)):
+        indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0]
+        boxes = batch_boxes[indices]
+        boxes /= ims_scale[im_idx]
+        clip_tiled_boxes(boxes, raw_image.shape)
+        scores_wide.append(batch_scores[indices])
+        boxes_wide.append(boxes)
+
+    return (np.vstack(scores_wide), np.vstack(boxes_wide)) \
+        if len(scores_wide) > 1 else (scores_wide[0], boxes_wide[0])
+
+
+def test_net(detector, server):
+    # Load settings
+    classes = server.classes
+    num_images = server.num_images
+    num_classes = server.num_classes
+    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
+
+    _t = {'im_detect': Timer(), 'misc': Timer()}
+
+    for i in range(num_images):
+        image_id, raw_image = server.get_image()
+
+        _t['im_detect'].tic()
+        scores, boxes = im_detect(detector, raw_image)
+        _t['im_detect'].toc()
+
+        _t['misc'].tic()
+        boxes_this_image = [[]]
+        for j in range(1, num_classes):
+            inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
+            cls_scores = scores[inds, j]
+            cls_boxes = boxes[inds, j*4:(j+1)*4]
+            cls_detections = np.hstack(
+                (cls_boxes, cls_scores[:, np.newaxis])
+            ).astype(np.float32, copy=False)
+            if cfg.TEST.USE_SOFT_NMS:
+                keep = soft_nms(
+                    cls_detections, cfg.TEST.NMS,
+                    method=cfg.TEST.SOFT_NMS_METHOD,
+                    sigma=cfg.TEST.SOFT_NMS_SIGMA,
+                )
+            else:
+                keep = nms(cls_detections, cfg.TEST.NMS, force_cpu=True)
+            cls_detections = cls_detections[keep, :]
+            all_boxes[j][i] = cls_detections
+            boxes_this_image.append(cls_detections)
+
+        if cfg.VIS or cfg.VIS_ON_FILE:
+            vis_one_image(
+                raw_image, classes, boxes_this_image,
+                thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
+                filename=server.get_save_filename(image_id),
+            )
+
+        # Limit to max_per_image detections *over all classes*
+        if cfg.TEST.DETECTIONS_PER_IM > 0:
+            image_scores = []
+            for j in range(1, num_classes):
+                if len(all_boxes[j][i]) < 1: continue
+                image_scores.append(all_boxes[j][i][:, -1])
+            if len(image_scores) > 0:
+                image_scores = np.hstack(image_scores)
+            if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
+                image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
+                for j in range(1, num_classes):
+                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
+                    all_boxes[j][i] = all_boxes[j][i][keep, :]
+        _t['misc'].toc()
+
+        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
+              .format(i + 1, num_images,
+                      _t['im_detect'].average_time,
+                      _t['misc'].average_time),
+              end='')
+
+    print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
+
+    print('Evaluating detections')
+    server.evaluate_detections(all_boxes)
--- a/lib/fpn/__init__.py
+++ b/lib/fpn/__init__.py
@@ -13,6 +13,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from lib.fpn.layers.anchor_target_layer import AnchorTargetLayer
-from lib.fpn.layers.proposal_layer import ProposalLayer
-from lib.fpn.layers.proposal_target_layer import ProposalTargetLayer
+from lib.fpn.anchor_target_layer import AnchorTargetLayer
+from lib.fpn.proposal_layer import ProposalLayer
+from lib.fpn.proposal_target_layer import ProposalTargetLayer
--- a/lib/fpn/layers/anchor_target_layer.py
+++ b/lib/fpn/layers/anchor_target_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import dragon.vm.torch as torch
-import numpy as np
-import numpy.random as npr
-
-from lib.core.config import cfg
-from lib.faster_rcnn.generate_anchors import generate_anchors
-from lib.utils import logger
-from lib.utils.blob import blob_to_tensor
-from lib.utils.boxes import bbox_transform
-from lib.utils.boxes import dismantle_gt_boxes
-from lib.utils.cython_bbox import bbox_overlaps
-
-
-class AnchorTargetLayer(torch.nn.Module):
-    """Assign anchors to ground-truth targets."""
-
-    def __init__(self):
-        super(AnchorTargetLayer, self).__init__()
-        # Load the basic configs
-        self.scales = cfg.RPN.SCALES
-        self.strides = cfg.RPN.STRIDES
-        self.ratios = cfg.RPN.ASPECT_RATIOS
-        if len(self.scales) != len(self.strides):
-            logger.fatal(
-                'Given {} scales and {} strides.'
-                .format(len(self.scales), len(self.strides))
-            )
-
-        # Allow boxes to sit over the edge by a small amount
-        self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
-
-        # Generate base anchors
-        self.base_anchors = []
-        for i in range(len(self.strides)):
-            base_size, scale = self.strides[i], self.scales[i]
-            if not isinstance(scale, collections.Iterable):
-                scale = [scale]
-            self.base_anchors.append(
-                generate_anchors(
-                    base_size=base_size,
-                    ratios=self.ratios,
-                    scales=np.array(scale),
-                )
-            )
-
-    def forward(self, features, gt_boxes, ims_info):
-        """Produces anchor classification labels and bounding-box regression targets."""
-        num_images = cfg.TRAIN.IMS_PER_BATCH
-        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
-
-        if len(gt_boxes_wide) != num_images:
-            logger.fatal(
-                'Input {} images, got {} slices of gt boxes.'
-                .format(num_images, len(gt_boxes_wide))
-            )
-
-        # Generate proposals from shifted anchors
-        all_anchors, total_anchors = [], 0
-        for i in range(len(self.strides)):
-            height, width = features[i].shape[-2:]
-            shift_x = np.arange(0, width) * self.strides[i]
-            shift_y = np.arange(0, height) * self.strides[i]
-            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
-            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
-                                shift_x.ravel(), shift_y.ravel())).transpose()
-            # Add A anchors (1, A, 4) to
-            # cell K shifts (K, 1, 4) to get
-            # shift anchors (K, A, 4)
-            # Reshape to (K * A, 4) shifted anchors
-            A = self.base_anchors[i].shape[0]
-            K = shifts.shape[0]
-            anchors = (self.base_anchors[i].reshape((1, A, 4)) +
-                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
-            # [K, A, 4] -> [A, K, 4]
-            anchors = anchors.transpose((1, 0, 2))
-            anchors = anchors.reshape((A * K, 4))
-            all_anchors.append(anchors)
-            total_anchors += anchors.shape[0]
-
-        all_anchors = np.vstack(all_anchors)
-
-        # label: 1 is positive, 0 is negative, -1 is don't care
-        labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
-        bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
-        bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
-        bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
-
-        for ix in range(num_images):
-            # GT boxes (x1, y1, x2, y2, label, has_mask)
-            gt_boxes = gt_boxes_wide[ix]
-            im_info = ims_info[ix]
-            if self._allowed_border >= 0:
-                # Only keep anchors inside the image
-                inds_inside = np.where(
-                    (all_anchors[:, 0] >= -self._allowed_border) &
-                    (all_anchors[:, 1] >= -self._allowed_border) &
-                    (all_anchors[:, 2] < im_info[1] + self._allowed_border) &
-                    (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
-                anchors = all_anchors[inds_inside, :]
-            else:
-                inds_inside = np.arange(all_anchors.shape[0])
-                anchors = all_anchors
-            num_inside = len(inds_inside)
-
-            # label: 1 is positive, 0 is negative, -1 is don't care
-            labels = np.empty((num_inside,), dtype=np.float32)
-            labels.fill(-1)
-
-            # Overlaps between the anchors and the gt boxes
-            overlaps = bbox_overlaps(
-                np.ascontiguousarray(anchors, dtype=np.float),
-                np.ascontiguousarray(gt_boxes, dtype=np.float),
-            )
-            argmax_overlaps = overlaps.argmax(axis=1)
-            max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
-
-            gt_argmax_overlaps = overlaps.argmax(axis=0)
-            gt_max_overlaps = overlaps[gt_argmax_overlaps,
-                                       np.arange(overlaps.shape[1])]
-            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
-
-            # fg label: for each gt, anchor with highest overlap
-            labels[gt_argmax_overlaps] = 1
-
-            # fg label: above threshold IOU
-            labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
-
-            # bg label: below threshold IOU
-            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
-
-            # Subsample positive labels if we have too many
-            num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
-            fg_inds = np.where(labels == 1)[0]
-            if len(fg_inds) > num_fg:
-                disable_inds = npr.choice(
-                    fg_inds, size=(len(fg_inds) - num_fg), replace=False)
-                labels[disable_inds] = -1
-                fg_inds = np.where(labels == 1)[0]
-
-            # Subsample negative labels if we have too many
-            num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
-            bg_inds = np.where(labels == 0)[0]
-            if len(bg_inds) > num_bg:
-                disable_inds = npr.choice(
-                    bg_inds, size=(len(bg_inds) - num_bg), replace=False)
-                labels[disable_inds] = -1
-
-            bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_targets[fg_inds, :] = bbox_transform(
-                anchors[fg_inds, :],
-                gt_boxes[argmax_overlaps[fg_inds], 0:4],
-            )
-            bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
-            bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
-            bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
-
-            labels_wide[ix, inds_inside] = labels  # label
-            bbox_targets_wide[ix, inds_inside] = bbox_targets
-            bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
-            bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
-
-        labels = labels_wide.reshape((num_images, total_anchors))
-        bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
-        bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
-        bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
-
-        return {
-            'labels': blob_to_tensor(labels),
-            'bbox_targets': blob_to_tensor(bbox_targets),
-            'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
-            'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
-        }
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import dragon.vm.torch as torch
+import numpy as np
+import numpy.random as npr
+
+from lib.core.config import cfg
+from lib.faster_rcnn.generate_anchors import generate_anchors
+from lib.utils import logger
+from lib.utils.blob import blob_to_tensor
+from lib.utils.boxes import bbox_transform
+from lib.utils.boxes import dismantle_gt_boxes
+from lib.utils.cython_bbox import bbox_overlaps
+
+
+class AnchorTargetLayer(torch.nn.Module):
+    """Assign anchors to ground-truth targets."""
+
+    def __init__(self):
+        super(AnchorTargetLayer, self).__init__()
+        # Load the basic configs
+        self.scales = cfg.RPN.SCALES
+        self.strides = cfg.RPN.STRIDES
+        self.ratios = cfg.RPN.ASPECT_RATIOS
+        if len(self.scales) != len(self.strides):
+            logger.fatal(
+                'Given {} scales and {} strides.'
+                .format(len(self.scales), len(self.strides))
+            )
+
+        # Allow boxes to sit over the edge by a small amount
+        self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
+
+        # Generate base anchors
+        self.base_anchors = []
+        for i in range(len(self.strides)):
+            base_size, scale = self.strides[i], self.scales[i]
+            if not isinstance(scale, collections.Iterable):
+                scale = [scale]
+            self.base_anchors.append(
+                generate_anchors(
+                    base_size=base_size,
+                    ratios=self.ratios,
+                    scales=np.array(scale),
+                )
+            )
+
+    def forward(self, features, gt_boxes, ims_info):
+        """Produces anchor classification labels and bounding-box regression targets."""
+        num_images = cfg.TRAIN.IMS_PER_BATCH
+        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
+
+        if len(gt_boxes_wide) != num_images:
+            logger.fatal(
+                'Input {} images, got {} slices of gt boxes.'
+                .format(num_images, len(gt_boxes_wide))
+            )
+
+        # Generate proposals from shifted anchors
+        all_anchors, total_anchors = [], 0
+        for i in range(len(self.strides)):
+            height, width = features[i].shape[-2:]
+            shift_x = np.arange(0, width) * self.strides[i]
+            shift_y = np.arange(0, height) * self.strides[i]
+            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                                shift_x.ravel(), shift_y.ravel())).transpose()
+            # Add A anchors (1, A, 4) to
+            # cell K shifts (K, 1, 4) to get
+            # shift anchors (K, A, 4)
+            # Reshape to (K * A, 4) shifted anchors
+            A = self.base_anchors[i].shape[0]
+            K = shifts.shape[0]
+            anchors = (self.base_anchors[i].reshape((1, A, 4)) +
+                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
+            # [K, A, 4] -> [A, K, 4]
+            anchors = anchors.transpose((1, 0, 2))
+            anchors = anchors.reshape((A * K, 4))
+            all_anchors.append(anchors)
+            total_anchors += anchors.shape[0]
+
+        all_anchors = np.vstack(all_anchors)
+
+        # label: 1 is positive, 0 is negative, -1 is don't care
+        labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
+        bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
+        bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
+        bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
+
+        for ix in range(num_images):
+            # GT boxes (x1, y1, x2, y2, label, has_mask)
+            gt_boxes = gt_boxes_wide[ix]
+            im_info = ims_info[ix]
+            if self._allowed_border >= 0:
+                # Only keep anchors inside the image
+                inds_inside = np.where(
+                    (all_anchors[:, 0] >= -self._allowed_border) &
+                    (all_anchors[:, 1] >= -self._allowed_border) &
+                    (all_anchors[:, 2] < im_info[1] + self._allowed_border) &
+                    (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
+                anchors = all_anchors[inds_inside, :]
+            else:
+                inds_inside = np.arange(all_anchors.shape[0])
+                anchors = all_anchors
+            num_inside = len(inds_inside)
+
+            # label: 1 is positive, 0 is negative, -1 is don't care
+            labels = np.empty((num_inside,), dtype=np.float32)
+            labels.fill(-1)
+
+            # Overlaps between the anchors and the gt boxes
+            overlaps = bbox_overlaps(
+                np.ascontiguousarray(anchors, dtype=np.float),
+                np.ascontiguousarray(gt_boxes, dtype=np.float),
+            )
+            argmax_overlaps = overlaps.argmax(axis=1)
+            max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
+
+            gt_argmax_overlaps = overlaps.argmax(axis=0)
+            gt_max_overlaps = overlaps[gt_argmax_overlaps,
+                                       np.arange(overlaps.shape[1])]
+            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
+
+            # fg label: for each gt, anchor with highest overlap
+            labels[gt_argmax_overlaps] = 1
+
+            # fg label: above threshold IOU
+            labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
+
+            # bg label: below threshold IOU
+            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
+
+            # Subsample positive labels if we have too many
+            num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
+            fg_inds = np.where(labels == 1)[0]
+            if len(fg_inds) > num_fg:
+                disable_inds = npr.choice(
+                    fg_inds, size=(len(fg_inds) - num_fg), replace=False)
+                labels[disable_inds] = -1
+                fg_inds = np.where(labels == 1)[0]
+
+            # Subsample negative labels if we have too many
+            num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
+            bg_inds = np.where(labels == 0)[0]
+            if len(bg_inds) > num_bg:
+                disable_inds = npr.choice(
+                    bg_inds, size=(len(bg_inds) - num_bg), replace=False)
+                labels[disable_inds] = -1
+
+            bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_targets[fg_inds, :] = bbox_transform(
+                anchors[fg_inds, :],
+                gt_boxes[argmax_overlaps[fg_inds], 0:4],
+            )
+            bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
+            bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
+            bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
+
+            labels_wide[ix, inds_inside] = labels  # label
+            bbox_targets_wide[ix, inds_inside] = bbox_targets
+            bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
+            bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
+
+        labels = labels_wide.reshape((num_images, total_anchors))
+        bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
+        bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
+        bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
+
+        return {
+            'labels': blob_to_tensor(labels),
+            'bbox_targets': blob_to_tensor(bbox_targets),
+            'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
+            'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
+        }
--- a/lib/fpn/layers/__init__.py
+++ b/lib/fpn/layers/__init__.py
-# --------------------------------------------------------
-# Mask R-CNN @ Detectron
-# Copyright (c) 2017 SeetaTech
-# Written by Ting Pan
-# --------------------------------------------------------
\ No newline at end of file
--- a/lib/fpn/layers/proposal_layer.py
+++ b/lib/fpn/layers/proposal_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import dragon.vm.torch as torch
-import numpy as np
-
-from lib.core.config import cfg
-from lib.faster_rcnn.generate_anchors import generate_anchors
-from lib.nms.nms_wrapper import nms
-from lib.utils import logger
-from lib.utils.blob import blob_to_tensor
-from lib.utils.boxes import bbox_transform_inv
-from lib.utils.boxes import clip_tiled_boxes
-from lib.utils.boxes import filter_boxes
-
-
-class ProposalLayer(torch.nn.Module):
-    """
-    Compute proposals by applying estimated bounding-box
-    transformations to a set of regular boxes (called "anchors").
-
-    """
-
-    def __init__(self):
-        super(ProposalLayer, self).__init__()
-        # Load the basic configs
-        self.scales = cfg.RPN.SCALES
-        self.strides = cfg.RPN.STRIDES
-        self.ratios = cfg.RPN.ASPECT_RATIOS
-        if len(self.scales) != len(self.strides):
-            logger.fatal(
-                'Given {} scales and {} strides.'
-                .format(len(self.scales), len(self.strides))
-            )
-
-        # Generate base anchors
-        self.base_anchors = []
-        for i in range(len(self.strides)):
-            base_size, scale = self.strides[i], self.scales[i]
-            if not isinstance(scale, collections.Iterable):
-                scale = [scale]
-            self.base_anchors.append(
-                generate_anchors(
-                    base_size=base_size,
-                    ratios=self.ratios,
-                    scales=np.array(scale),
-                )
-            )
-
-    def generate_grid_anchors(self, features):
-        # Generate proposals from shifted anchors
-        anchors_wide = []
-        for i in range(len(self.strides)):
-            height, width = features[i].shape[-2:]
-            shift_x = np.arange(0, width) * self.strides[i]
-            shift_y = np.arange(0, height) * self.strides[i]
-            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
-            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
-                                shift_x.ravel(), shift_y.ravel())).transpose()
-            # Add A anchors (1, A, 4) to
-            # cell K shifts (K, 1, 4) to get
-            # shift anchors (K, A, 4)
-            # Reshape to (K * A, 4) shifted anchors
-            A = self.base_anchors[i].shape[0]
-            K = shifts.shape[0]
-            anchors = (self.base_anchors[i].reshape((1, A, 4)) +
-                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
-            # [K, A, 4] -> [A, K, 4]
-            anchors = anchors.transpose((1, 0, 2))
-            anchors = anchors.reshape((A * K, 4))
-            anchors_wide.append(anchors)
-        return np.vstack(anchors_wide)
-
-    def forward(self, features, cls_prob, bbox_pred, ims_info):
-        cfg_key = 'TRAIN' if self.training else 'TEST'
-        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
-        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
-        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
-        min_size = cfg[cfg_key].RPN_MIN_SIZE
-
-        # Get resources
-        num_images = ims_info.shape[0]
-        all_anchors = self.generate_grid_anchors(features)  # [n, 4]
-
-        if cls_prob.shape[0] != num_images or \
-                bbox_pred.shape[0] != num_images:
-            logger.fatal('Incorrect num of images: {}'.format(num_images))
-
-        # Prepare for the outputs
-        batch_rois = []
-        batch_scores = cls_prob.numpy(True)
-        batch_deltas = bbox_pred.numpy(True) \
-            .transpose((0, 2, 1))  # [?, 4, n] -> [?, n, 4]
-
-        # Extract RoIs separately
-        for ix in range(num_images):
-            scores = batch_scores[ix].reshape((-1, 1))  # [1, n] -> [n, 1]
-            deltas = batch_deltas[ix]  # [n, 4]
-
-            if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
-                order = np.argsort(-scores.squeeze())
-            else:
-                # Avoid sorting possibly large arrays; First partition to get top K
-                # unsorted and then sort just those (~20x faster for 200k scores)
-                inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
-                order = np.argsort(-scores[inds].squeeze())
-                order = inds[order]
-            deltas = deltas[order]
-            anchors = all_anchors[order]
-            scores = scores[order]
-
-            # 1. Convert anchors into proposals via bbox transformations
-            proposals = bbox_transform_inv(anchors, deltas)
-
-            # 2. Clip predicted boxes to image
-            proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
-
-            # 3. remove predicted boxes with either height or width < threshold
-            keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
-            proposals = proposals[keep, :]
-            scores = scores[keep]
-
-            # 6. Apply nms (e.g. threshold = 0.7)
-            # 7. Take after_nms_topN (e.g. 300)
-            # 8. Return the top proposals (-> RoIs top)
-            keep = nms(np.hstack((proposals, scores)), nms_thresh)
-            if post_nms_topN > 0:
-                keep = keep[:post_nms_topN]
-            proposals = proposals[keep, :]
-
-            # Output rois blob
-            batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
-            batch_inds.fill(ix)
-            rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
-            batch_rois.append(rpn_rois)
-
-        # Merge RoIs into a blob
-        rpn_rois = np.concatenate(batch_rois, axis=0)
-
-        if cfg_key == 'TRAIN':
-            return rpn_rois
-        else:
-            # Distribute rois into K levels
-            min_level = cfg.FPN.ROI_MIN_LEVEL
-            max_level = cfg.FPN.ROI_MAX_LEVEL
-            K = max_level - min_level + 1
-            fpn_levels = _map_rois_to_fpn_levels(rpn_rois, min_level, max_level)
-            all_rois = []
-            for i in range(K):
-                lv_indices = np.where(fpn_levels == (i + min_level))[0]
-                if len(lv_indices) == 0:
-                    # Fake a tiny roi to avoid empty roi pooling
-                    all_rois.append(blob_to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
-                else:
-                    all_rois.append(blob_to_tensor(rpn_rois[lv_indices]))
-            return all_rois
-
-
-def _map_rois_to_fpn_levels(rois, k_min, k_max):
-    """
-    Determine which FPN level each RoI in a set of RoIs
-    should map to based on the heuristic in the FPN paper.
-    """
-    if len(rois) == 0:
-        return []
-    ws = rois[:, 3] - rois[:, 1] + 1
-    hs = rois[:, 4] - rois[:, 2] + 1
-    s = np.sqrt(ws * hs)
-    s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
-    lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4
-    target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
-    return np.clip(target_levels, k_min, k_max)
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import dragon.vm.torch as torch
+import numpy as np
+
+from lib.core.config import cfg
+from lib.faster_rcnn.generate_anchors import generate_anchors
+from lib.nms.nms_wrapper import nms
+from lib.utils import logger
+from lib.utils.blob import blob_to_tensor
+from lib.utils.boxes import bbox_transform_inv
+from lib.utils.boxes import clip_tiled_boxes
+from lib.utils.boxes import filter_boxes
+
+
+class ProposalLayer(torch.nn.Module):
+    """
+    Compute proposals by applying estimated bounding-box
+    transformations to a set of regular boxes (called "anchors").
+
+    """
+
+    def __init__(self):
+        super(ProposalLayer, self).__init__()
+        # Load the basic configs
+        self.scales = cfg.RPN.SCALES
+        self.strides = cfg.RPN.STRIDES
+        self.ratios = cfg.RPN.ASPECT_RATIOS
+        if len(self.scales) != len(self.strides):
+            logger.fatal(
+                'Given {} scales and {} strides.'
+                .format(len(self.scales), len(self.strides))
+            )
+
+        # Generate base anchors
+        self.base_anchors = []
+        for i in range(len(self.strides)):
+            base_size, scale = self.strides[i], self.scales[i]
+            if not isinstance(scale, collections.Iterable):
+                scale = [scale]
+            self.base_anchors.append(
+                generate_anchors(
+                    base_size=base_size,
+                    ratios=self.ratios,
+                    scales=np.array(scale),
+                )
+            )
+
+    def generate_grid_anchors(self, features):
+        # Generate proposals from shifted anchors
+        anchors_wide = []
+        for i in range(len(self.strides)):
+            height, width = features[i].shape[-2:]
+            shift_x = np.arange(0, width) * self.strides[i]
+            shift_y = np.arange(0, height) * self.strides[i]
+            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                                shift_x.ravel(), shift_y.ravel())).transpose()
+            # Add A anchors (1, A, 4) to
+            # cell K shifts (K, 1, 4) to get
+            # shift anchors (K, A, 4)
+            # Reshape to (K * A, 4) shifted anchors
+            A = self.base_anchors[i].shape[0]
+            K = shifts.shape[0]
+            anchors = (self.base_anchors[i].reshape((1, A, 4)) +
+                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
+            # [K, A, 4] -> [A, K, 4]
+            anchors = anchors.transpose((1, 0, 2))
+            anchors = anchors.reshape((A * K, 4))
+            anchors_wide.append(anchors)
+        return np.vstack(anchors_wide)
+
+    def forward(self, features, cls_prob, bbox_pred, ims_info):
+        cfg_key = 'TRAIN' if self.training else 'TEST'
+        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
+        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
+        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
+        min_size = cfg[cfg_key].RPN_MIN_SIZE
+
+        # Get resources
+        num_images = ims_info.shape[0]
+        all_anchors = self.generate_grid_anchors(features)  # [n, 4]
+
+        if cls_prob.shape[0] != num_images or \
+                bbox_pred.shape[0] != num_images:
+            logger.fatal('Incorrect num of images: {}'.format(num_images))
+
+        # Prepare for the outputs
+        batch_rois = []
+        batch_scores = cls_prob.numpy(True)
+        batch_deltas = bbox_pred.numpy(True) \
+            .transpose((0, 2, 1))  # [?, 4, n] -> [?, n, 4]
+
+        # Extract RoIs separately
+        for ix in range(num_images):
+            scores = batch_scores[ix].reshape((-1, 1))  # [1, n] -> [n, 1]
+            deltas = batch_deltas[ix]  # [n, 4]
+
+            if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
+                order = np.argsort(-scores.squeeze())
+            else:
+                # Avoid sorting possibly large arrays; First partition to get top K
+                # unsorted and then sort just those (~20x faster for 200k scores)
+                inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
+                order = np.argsort(-scores[inds].squeeze())
+                order = inds[order]
+            deltas = deltas[order]
+            anchors = all_anchors[order]
+            scores = scores[order]
+
+            # 1. Convert anchors into proposals via bbox transformations
+            proposals = bbox_transform_inv(anchors, deltas)
+
+            # 2. Clip predicted boxes to image
+            proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
+
+            # 3. remove predicted boxes with either height or width < threshold
+            keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
+            proposals = proposals[keep, :]
+            scores = scores[keep]
+
+            # 6. Apply nms (e.g. threshold = 0.7)
+            # 7. Take after_nms_topN (e.g. 300)
+            # 8. Return the top proposals (-> RoIs top)
+            keep = nms(np.hstack((proposals, scores)), nms_thresh)
+            if post_nms_topN > 0:
+                keep = keep[:post_nms_topN]
+            proposals = proposals[keep, :]
+
+            # Output rois blob
+            batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
+            batch_inds.fill(ix)
+            rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
+            batch_rois.append(rpn_rois)
+
+        # Merge RoIs into a blob
+        rpn_rois = np.concatenate(batch_rois, axis=0)
+
+        if cfg_key == 'TRAIN':
+            return rpn_rois
+        else:
+            # Distribute rois into K levels
+            min_level = cfg.FPN.ROI_MIN_LEVEL
+            max_level = cfg.FPN.ROI_MAX_LEVEL
+            K = max_level - min_level + 1
+            fpn_levels = _map_rois_to_fpn_levels(rpn_rois, min_level, max_level)
+            all_rois = []
+            for i in range(K):
+                lv_indices = np.where(fpn_levels == (i + min_level))[0]
+                if len(lv_indices) == 0:
+                    # Fake a tiny roi to avoid empty roi pooling
+                    all_rois.append(blob_to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
+                else:
+                    all_rois.append(blob_to_tensor(rpn_rois[lv_indices]))
+            return all_rois
+
+
+def _map_rois_to_fpn_levels(rois, k_min, k_max):
+    """
+    Determine which FPN level each RoI in a set of RoIs
+    should map to based on the heuristic in the FPN paper.
+    """
+    if len(rois) == 0:
+        return []
+    ws = rois[:, 3] - rois[:, 1] + 1
+    hs = rois[:, 4] - rois[:, 2] + 1
+    s = np.sqrt(ws * hs)
+    s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
+    lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4
+    target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
+    return np.clip(target_levels, k_min, k_max)
--- a/lib/fpn/layers/proposal_target_layer.py
+++ b/lib/fpn/layers/proposal_target_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import numpy.random as npr
-import dragon.vm.torch as torch
-
-from lib.core.config import cfg
-from lib.utils.blob import blob_to_tensor
-from lib.utils.boxes import bbox_transform
-from lib.utils.boxes import dismantle_gt_boxes
-from lib.utils.cython_bbox import bbox_overlaps
-
-
-class ProposalTargetLayer(torch.nn.Module):
-    """Assign object detection proposals to ground-truth targets.
-
-    Produces proposal classification labels and bounding-box regression targets.
-
-    """
-    def __init__(self):
-        super(ProposalTargetLayer, self).__init__()
-        self.num_classes = cfg.MODEL.NUM_CLASSES
-        self.fake_outputs = {
-            'rois': np.array([[0, 0, 0, 1, 1]], dtype=np.float32),
-            'labels': np.array([-1], dtype=np.float32),
-            'bbox_targets': np.zeros((1, self.num_classes * 4), dtype=np.float32),
-            'bbox_inside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
-            'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
-        }
-
-    def forward(self, rpn_rois, gt_boxes):
-        num_images = cfg.TRAIN.IMS_PER_BATCH
-        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
-        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
-        all_rois = rpn_rois
-        # GT boxes (x1, y1, x2, y2, label)
-        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
-
-        # Prepare for the outputs
-        keys = ['labels', 'rois', 'bbox_targets',
-                'bbox_inside_weights', 'bbox_outside_weights']
-        outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
-        batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
-
-        # Generate targets separately
-        for ix in range(num_images):
-            gt_boxes = gt_boxes_wide[ix]
-            # Extract proposals for this image
-            rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
-            # Include ground-truth boxes in the set of candidate rois
-            inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
-            rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
-            # Sample a batch of rois for training
-            rois_per_image = cfg.TRAIN.BATCH_SIZE
-            fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
-            labels, rois, bbox_targets, bbox_inside_weights = \
-                _sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
-            bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
-
-            _fmap_batch([
-                labels,
-                rois,
-                bbox_targets,
-                bbox_inside_weights,
-                bbox_outside_weights],
-                batch_outputs,
-                keys,
-            )
-
-        # Merge targets into blobs
-        for k, v in batch_outputs.items():
-            batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
-
-        # Distribute rois into K levels
-        min_level = cfg.FPN.ROI_MIN_LEVEL
-        max_level = cfg.FPN.ROI_MAX_LEVEL
-        K = max_level - min_level + 1
-        fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
-        lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)]
-        _fmap_rois(
-            inputs=[batch_outputs[key] for key in keys],
-            fake_outputs=self.fake_outputs,
-            outputs=outputs,
-            keys=keys,
-            levels=lvs_indices,
-        )
-
-        return {
-            'rois': [blob_to_tensor(outputs['rois'][i]) for i in range(K)],
-            'labels': blob_to_tensor(np.concatenate(outputs['labels'], axis=0)),
-            'bbox_targets': blob_to_tensor(np.vstack(outputs['bbox_targets'])),
-            'bbox_inside_weights': blob_to_tensor(np.vstack(outputs['bbox_inside_weights'])),
-            'bbox_outside_weights': blob_to_tensor(np.vstack(outputs['bbox_outside_weights'])),
-        }
-
-
-def _get_bbox_regression_labels(bbox_target_data, num_classes):
-    """Bounding-box regression targets (bbox_target_data) are stored in a
-    compact form N x (class, tx, ty, tw, th)
-
-    This function expands those targets into the 4-of-4*K representation used
-    by the network (i.e. only one class has non-zero targets).
-
-    Returns:
-        bbox_target (ndarray): N x 4K blob of regression targets
-        bbox_inside_weights (ndarray): N x 4K blob of loss weights
-
-    """
-    clss = bbox_target_data[:, 0]
-    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
-    bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
-    inds = np.where(clss > 0)[0]
-    for ind in inds:
-        cls = clss[ind]
-        start = 4 * cls
-        end = start + 4
-        bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
-        bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
-    return bbox_targets, bbox_inside_weights
-
-
-def _compute_targets(ex_rois, gt_rois, labels):
-    """Compute bounding-box regression targets for an image."""
-    assert ex_rois.shape[0] == gt_rois.shape[0]
-    assert ex_rois.shape[1] == 4
-    assert gt_rois.shape[1] == 4
-    targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
-    return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
-
-
-def _map_rois_to_fpn_levels(rois, k_min, k_max):
-    """
-    Determine which FPN level each RoI in a set of RoIs
-    should map to based on the heuristic in the FPN paper.
-    """
-    if len(rois) == 0:
-        return []
-    ws = rois[:, 3] - rois[:, 1] + 1
-    hs = rois[:, 4] - rois[:, 2] + 1
-    s = np.sqrt(ws * hs)
-    s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
-    lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4
-    target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
-    return np.clip(target_levels, k_min, k_max)
-
-
-def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
-    """Sample a batch of RoIs comprising foreground and background examples."""
-    # overlaps: (rois x gt_boxes)
-    overlaps = bbox_overlaps(
-        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
-        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
-    gt_assignment = overlaps.argmax(axis=1)
-    max_overlaps = overlaps.max(axis=1)
-    labels = gt_boxes[gt_assignment, 4]
-
-    # Select foreground RoIs as those with >= FG_THRESH overlap
-    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
-    # Guard against the case when an image has fewer than fg_rois_per_image
-    # foreground RoIs
-
-    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
-    # Sample foreground regions without replacement
-    if fg_inds.size > 0:
-        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
-
-    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
-    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
-                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
-    # Compute number of background RoIs to take from this image (guarding
-    # against there being fewer than desired)
-    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
-    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
-    # Sample background regions without replacement
-    if bg_inds.size > 0:
-        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
-
-    # The indices that we're selecting (both fg and bg)
-    keep_inds = np.append(fg_inds, bg_inds)
-    # Select sampled values from various arrays:
-    labels = labels[keep_inds]
-    # Clamp labels for the background RoIs to 0
-    labels[fg_rois_per_this_image:] = 0
-    rois = all_rois[keep_inds]
-
-    bbox_target_data = _compute_targets(
-        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
-
-    bbox_targets, bbox_inside_weights = \
-        _get_bbox_regression_labels(bbox_target_data, num_classes)
-
-    return labels, rois, bbox_targets, bbox_inside_weights
-
-
-def _fmap_batch(inputs, outputs, keys):
-    for i, key in enumerate(keys):
-        outputs[key].append(inputs[i])
-
-
-def _fmap_rois(inputs, fake_outputs, outputs, keys, levels):
-    def impl(a, b, indices):
-        return a[indices] if len(indices) > 0 else b
-    for k in range(len(levels)):
-        inds = levels[k]
-        for i, key in enumerate(keys):
-            outputs[key].append(impl(inputs[i], fake_outputs[key], inds))
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import numpy.random as npr
+import dragon.vm.torch as torch
+
+from lib.core.config import cfg
+from lib.utils.blob import blob_to_tensor
+from lib.utils.boxes import bbox_transform
+from lib.utils.boxes import dismantle_gt_boxes
+from lib.utils.cython_bbox import bbox_overlaps
+
+
+class ProposalTargetLayer(torch.nn.Module):
+    """Assign object detection proposals to ground-truth targets.
+
+    Produces proposal classification labels and bounding-box regression targets.
+
+    """
+    def __init__(self):
+        super(ProposalTargetLayer, self).__init__()
+        self.num_classes = cfg.MODEL.NUM_CLASSES
+        self.fake_outputs = {
+            'rois': np.array([[0, 0, 0, 1, 1]], dtype=np.float32),
+            'labels': np.array([-1], dtype=np.float32),
+            'bbox_targets': np.zeros((1, self.num_classes * 4), dtype=np.float32),
+            'bbox_inside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
+            'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
+        }
+
+    def forward(self, rpn_rois, gt_boxes):
+        num_images = cfg.TRAIN.IMS_PER_BATCH
+        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
+        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
+        all_rois = rpn_rois
+        # GT boxes (x1, y1, x2, y2, label)
+        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
+
+        # Prepare for the outputs
+        keys = ['labels', 'rois', 'bbox_targets',
+                'bbox_inside_weights', 'bbox_outside_weights']
+        outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
+        batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
+
+        # Generate targets separately
+        for ix in range(num_images):
+            gt_boxes = gt_boxes_wide[ix]
+            # Extract proposals for this image
+            rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
+            # Include ground-truth boxes in the set of candidate rois
+            inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
+            rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
+            # Sample a batch of rois for training
+            rois_per_image = cfg.TRAIN.BATCH_SIZE
+            fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
+            labels, rois, bbox_targets, bbox_inside_weights = \
+                _sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
+            bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
+
+            _fmap_batch([
+                labels,
+                rois,
+                bbox_targets,
+                bbox_inside_weights,
+                bbox_outside_weights],
+                batch_outputs,
+                keys,
+            )
+
+        # Merge targets into blobs
+        for k, v in batch_outputs.items():
+            batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
+
+        # Distribute rois into K levels
+        min_level = cfg.FPN.ROI_MIN_LEVEL
+        max_level = cfg.FPN.ROI_MAX_LEVEL
+        K = max_level - min_level + 1
+        fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
+        lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)]
+        _fmap_rois(
+            inputs=[batch_outputs[key] for key in keys],
+            fake_outputs=self.fake_outputs,
+            outputs=outputs,
+            keys=keys,
+            levels=lvs_indices,
+        )
+
+        return {
+            'rois': [blob_to_tensor(outputs['rois'][i]) for i in range(K)],
+            'labels': blob_to_tensor(np.concatenate(outputs['labels'], axis=0)),
+            'bbox_targets': blob_to_tensor(np.vstack(outputs['bbox_targets'])),
+            'bbox_inside_weights': blob_to_tensor(np.vstack(outputs['bbox_inside_weights'])),
+            'bbox_outside_weights': blob_to_tensor(np.vstack(outputs['bbox_outside_weights'])),
+        }
+
+
+def _get_bbox_regression_labels(bbox_target_data, num_classes):
+    """Bounding-box regression targets (bbox_target_data) are stored in a
+    compact form N x (class, tx, ty, tw, th)
+
+    This function expands those targets into the 4-of-4*K representation used
+    by the network (i.e. only one class has non-zero targets).
+
+    Returns:
+        bbox_target (ndarray): N x 4K blob of regression targets
+        bbox_inside_weights (ndarray): N x 4K blob of loss weights
+
+    """
+    clss = bbox_target_data[:, 0]
+    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
+    bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
+    inds = np.where(clss > 0)[0]
+    for ind in inds:
+        cls = clss[ind]
+        start = 4 * cls
+        end = start + 4
+        bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
+        bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
+    return bbox_targets, bbox_inside_weights
+
+
+def _compute_targets(ex_rois, gt_rois, labels):
+    """Compute bounding-box regression targets for an image."""
+    assert ex_rois.shape[0] == gt_rois.shape[0]
+    assert ex_rois.shape[1] == 4
+    assert gt_rois.shape[1] == 4
+    targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
+    return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
+
+
+def _map_rois_to_fpn_levels(rois, k_min, k_max):
+    """
+    Determine which FPN level each RoI in a set of RoIs
+    should map to based on the heuristic in the FPN paper.
+    """
+    if len(rois) == 0:
+        return []
+    ws = rois[:, 3] - rois[:, 1] + 1
+    hs = rois[:, 4] - rois[:, 2] + 1
+    s = np.sqrt(ws * hs)
+    s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
+    lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4
+    target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
+    return np.clip(target_levels, k_min, k_max)
+
+
+def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
+    """Sample a batch of RoIs comprising foreground and background examples."""
+    # overlaps: (rois x gt_boxes)
+    overlaps = bbox_overlaps(
+        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
+        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
+    gt_assignment = overlaps.argmax(axis=1)
+    max_overlaps = overlaps.max(axis=1)
+    labels = gt_boxes[gt_assignment, 4]
+
+    # Select foreground RoIs as those with >= FG_THRESH overlap
+    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
+    # Guard against the case when an image has fewer than fg_rois_per_image
+    # foreground RoIs
+
+    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
+    # Sample foreground regions without replacement
+    if fg_inds.size > 0:
+        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
+
+    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
+    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
+                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
+    # Compute number of background RoIs to take from this image (guarding
+    # against there being fewer than desired)
+    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
+    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
+    # Sample background regions without replacement
+    if bg_inds.size > 0:
+        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
+
+    # The indices that we're selecting (both fg and bg)
+    keep_inds = np.append(fg_inds, bg_inds)
+    # Select sampled values from various arrays:
+    labels = labels[keep_inds]
+    # Clamp labels for the background RoIs to 0
+    labels[fg_rois_per_this_image:] = 0
+    rois = all_rois[keep_inds]
+
+    bbox_target_data = _compute_targets(
+        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
+
+    bbox_targets, bbox_inside_weights = \
+        _get_bbox_regression_labels(bbox_target_data, num_classes)
+
+    return labels, rois, bbox_targets, bbox_inside_weights
+
+
+def _fmap_batch(inputs, outputs, keys):
+    for i, key in enumerate(keys):
+        outputs[key].append(inputs[i])
+
+
+def _fmap_rois(inputs, fake_outputs, outputs, keys, levels):
+    def impl(a, b, indices):
+        return a[indices] if len(indices) > 0 else b
+    for k in range(len(levels)):
+        inds = levels[k]
+        for i, key in enumerate(keys):
+            outputs[key].append(impl(inputs[i], fake_outputs[key], inds))
--- a/lib/modeling/__init__.py
+++ b/lib/modeling/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-# Import custom modules
-from lib.modeling.base import affine
-from lib.modeling.base import bn
-from lib.modeling.base import conv1x1
-from lib.modeling.base import conv3x3
-from lib.modeling.fast_rcnn import FastRCNN
-from lib.modeling.fpn import FPN
-from lib.modeling.retinanet import RetinaNet
-from lib.modeling.rpn import RPN
-from lib.modeling.ssd import SSD
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# Import custom modules
+from lib.modeling.base import affine
+from lib.modeling.base import bn
+from lib.modeling.base import conv1x1
+from lib.modeling.base import conv3x3
+from lib.modeling.fast_rcnn import FastRCNN
+from lib.modeling.fpn import FPN
+from lib.modeling.retinanet import RetinaNet
+from lib.modeling.rpn import RPN
+from lib.modeling.ssd import SSD
--- a/lib/modeling/airnet.py
+++ b/lib/modeling/airnet.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import dragon.vm.torch as torch
-
-from lib.modeling import affine
-from lib.modeling import conv1x1
-from lib.modeling import conv3x3
-
-
-class WideResBlock(torch.nn.Module):
-    def __init__(self, dim_in, dim_out, stride=1, downsample=None):
-        super(WideResBlock, self).__init__()
-        self.conv1 = conv3x3(dim_in, dim_out, stride)
-        self.bn1 = affine(dim_out)
-        self.conv2 = conv3x3(dim_out, dim_out)
-        self.bn2 = affine(dim_out)
-        self.downsample = downsample
-        self.relu = torch.nn.ReLU(inplace=True)
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-
-        if self.downsample is not None:
-            residual = self.downsample(residual)
-
-        out += residual
-        out = self.relu(out)
-        return out
-
-
-class InceptionBlock(torch.nn.Module):
-    def __init__(self, dim_in, dim_out):
-        super(InceptionBlock, self).__init__()
-        self.conv1 = conv1x1(dim_in, dim_out)
-        self.bn1 = affine(dim_out)
-        self.conv2 = conv3x3(dim_out, dim_out // 2)
-        self.bn2 = affine(dim_out // 2)
-        self.conv3a = conv3x3(dim_out // 2, dim_out)
-        self.bn3a = affine(dim_out)
-        self.conv3b = conv3x3(dim_out, dim_out)
-        self.bn3b = affine(dim_out)
-        self.conv4 = conv3x3(dim_out * 3, dim_out)
-        self.bn4 = affine(dim_out)
-        self.relu = torch.nn.ReLU(inplace=True)
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        out_1x1 = self.bn1(out)
-        out_1x1 = self.relu(out_1x1)
-
-        out = self.conv2(out_1x1)
-        out = self.bn2(out)
-        out = self.relu(out)
-
-        out = self.conv3a(out)
-        out_3x3_a = self.bn3a(out)
-        out_3x3_a = self.relu(out_3x3_a)
-
-        out = self.conv3b(out_1x1)
-        out_3x3_b = self.bn3b(out)
-        out_3x3_b = self.relu(out_3x3_b)
-
-        out = torch.cat([out_1x1, out_3x3_a, out_3x3_b], dim=1)
-        out = self.conv4(out)
-        out = self.bn4(out)
-
-        out += residual
-        out = self.relu(out)
-        return out
-
-
-class AirNet(torch.nn.Module):
-    def __init__(self, blocks, num_stages):
-        super(AirNet, self).__init__()
-        self.dim_in, filters = 64, [64, 128, 256, 384]
-        self.feature_dims = [None, None] + \
-                            filters[1:num_stages - 1]
-        self.conv1 = torch.nn.Conv2d(
-            3, 64,
-            kernel_size=7,
-            stride=2,
-            padding=3,
-            bias=False,
-        )
-        self.bn1 = affine(self.dim_in)
-        self.relu = torch.nn.ReLU(inplace=True)
-        self.maxpool = torch.nn.MaxPool2d(
-            kernel_size=2,
-            stride=2,
-            padding=0,
-            ceil_mode=True,
-        )
-        self.layer1 = self.make_blocks(filters[0], blocks[0])
-        self.layer2 = self.make_blocks(filters[1], blocks[1], 2)
-        if num_stages >= 4:
-            self.layer3 = self.make_blocks(filters[2], blocks[2], 2)
-        if num_stages >= 5:
-            self.layer4 = self.make_blocks(filters[3], blocks[3], 2)
-        self.reset_parameters()
-
-    def reset_parameters(self):
-        # The Kaiming Initialization
-        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                torch.nn.init.kaiming_uniform_(
-                    m.weight,
-                    # Fix the gain for [-127, 127]
-                    a=1,
-                )  # Xavier Initialization
-
-    def make_blocks(self, dim_out, blocks, stride=1):
-        downsample = torch.nn.Sequential(
-            conv1x1(self.dim_in, dim_out, stride=stride),
-            affine(dim_out),
-        )
-        layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)]
-        self.dim_in = dim_out
-        for i in range(1, len(blocks)):
-            if blocks[i] == 'r':
-                layers.append(WideResBlock(dim_out, dim_out))
-            elif blocks[i] == 'i':
-                layers.append(InceptionBlock(dim_out, dim_out))
-            else:
-                raise ValueError('Unknown block flag: ' + blocks[i])
-        return torch.nn.Sequential(*layers)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = self.bn1(x)
-        x = self.relu(x)
-        x = self.maxpool(x)
-
-        x = self.layer1(x)
-        outputs = [None, None, self.layer2(x)]
-        if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])]
-        if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])]
-
-        return outputs
-
-
-def airnet(num_stages):
-    blocks = (
-        ('r', 'r'),  # conv2
-        ('r', 'i'),  # conv3
-        ('r', 'i'),  # conv4
-        ('r', 'i'),  # conv5
-    )
-    return AirNet(blocks, num_stages)
-
-
-def make_airnet_(): return airnet(5)
-
-
-def make_airnet_3b(): return airnet(3)
-
-
-def make_airnet_4b(): return airnet(4)
-
-
-def make_airnet_5b(): return airnet(5)
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dragon.vm.torch as torch
+
+from lib.modeling import affine
+from lib.modeling import conv1x1
+from lib.modeling import conv3x3
+
+
+class WideResBlock(torch.nn.Module):
+    def __init__(self, dim_in, dim_out, stride=1, downsample=None):
+        super(WideResBlock, self).__init__()
+        self.conv1 = conv3x3(dim_in, dim_out, stride)
+        self.bn1 = affine(dim_out)
+        self.conv2 = conv3x3(dim_out, dim_out)
+        self.bn2 = affine(dim_out)
+        self.downsample = downsample
+        self.relu = torch.nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(residual)
+
+        out += residual
+        out = self.relu(out)
+        return out
+
+
+class InceptionBlock(torch.nn.Module):
+    def __init__(self, dim_in, dim_out):
+        super(InceptionBlock, self).__init__()
+        self.conv1 = conv1x1(dim_in, dim_out)
+        self.bn1 = affine(dim_out)
+        self.conv2 = conv3x3(dim_out, dim_out // 2)
+        self.bn2 = affine(dim_out // 2)
+        self.conv3a = conv3x3(dim_out // 2, dim_out)
+        self.bn3a = affine(dim_out)
+        self.conv3b = conv3x3(dim_out, dim_out)
+        self.bn3b = affine(dim_out)
+        self.conv4 = conv3x3(dim_out * 3, dim_out)
+        self.bn4 = affine(dim_out)
+        self.relu = torch.nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out_1x1 = self.bn1(out)
+        out_1x1 = self.relu(out_1x1)
+
+        out = self.conv2(out_1x1)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3a(out)
+        out_3x3_a = self.bn3a(out)
+        out_3x3_a = self.relu(out_3x3_a)
+
+        out = self.conv3b(out_1x1)
+        out_3x3_b = self.bn3b(out)
+        out_3x3_b = self.relu(out_3x3_b)
+
+        out = torch.cat([out_1x1, out_3x3_a, out_3x3_b], dim=1)
+        out = self.conv4(out)
+        out = self.bn4(out)
+
+        out += residual
+        out = self.relu(out)
+        return out
+
+
+class AirNet(torch.nn.Module):
+    def __init__(self, blocks, num_stages):
+        super(AirNet, self).__init__()
+        self.dim_in, filters = 64, [64, 128, 256, 384]
+        self.feature_dims = [None, None] + \
+                            filters[1:num_stages - 1]
+        self.conv1 = torch.nn.Conv2d(
+            3, 64,
+            kernel_size=7,
+            stride=2,
+            padding=3,
+            bias=False,
+        )
+        self.bn1 = affine(self.dim_in)
+        self.relu = torch.nn.ReLU(inplace=True)
+        self.maxpool = torch.nn.MaxPool2d(
+            kernel_size=2,
+            stride=2,
+            padding=0,
+            ceil_mode=True,
+        )
+        self.layer1 = self.make_blocks(filters[0], blocks[0])
+        self.layer2 = self.make_blocks(filters[1], blocks[1], 2)
+        if num_stages >= 4:
+            self.layer3 = self.make_blocks(filters[2], blocks[2], 2)
+        if num_stages >= 5:
+            self.layer4 = self.make_blocks(filters[3], blocks[3], 2)
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        # The Kaiming Initialization
+        for m in self.modules():
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.init.kaiming_uniform_(
+                    m.weight,
+                    # Fix the gain for [-127, 127]
+                    a=1,
+                )  # Xavier Initialization
+
+    def make_blocks(self, dim_out, blocks, stride=1):
+        downsample = torch.nn.Sequential(
+            conv1x1(self.dim_in, dim_out, stride=stride),
+            affine(dim_out),
+        )
+        layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)]
+        self.dim_in = dim_out
+        for i in range(1, len(blocks)):
+            if blocks[i] == 'r':
+                layers.append(WideResBlock(dim_out, dim_out))
+            elif blocks[i] == 'i':
+                layers.append(InceptionBlock(dim_out, dim_out))
+            else:
+                raise ValueError('Unknown block flag: ' + blocks[i])
+        return torch.nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        outputs = [None, None, self.layer2(x)]
+        if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])]
+        if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])]
+
+        return outputs
+
+
+def airnet(num_stages):
+    blocks = (
+        ('r', 'r'),  # conv2
+        ('r', 'i'),  # conv3
+        ('r', 'i'),  # conv4
+        ('r', 'i'),  # conv5
+    )
+    return AirNet(blocks, num_stages)
+
+
+def make_airnet_(): return airnet(5)
+
+
+def make_airnet_3b(): return airnet(3)
+
+
+def make_airnet_4b(): return airnet(4)
+
+
+def make_airnet_5b(): return airnet(5)
--- a/lib/modeling/base.py
+++ b/lib/modeling/base.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-"""Define some basic structures."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import dragon.vm.torch as torch
-
-
-def affine(dim_in, inplace=True):
-    """AffineBN, weight and bias are fixed."""
-    return torch.nn.Affine(
-        dim_in,
-        fix_weight=True,
-        fix_bias=True,
-        inplace=inplace,
-    )
-
-
-def bn(dim_in, eps=1e-5):
-    """The BatchNorm."""
-    return torch.nn.BatchNorm2d(dim_in, eps=eps)
-
-
-def conv1x1(dim_in, dim_out, stride=1, bias=False):
-    """1x1 convolution."""
-    return torch.nn.Conv2d(
-        dim_in,
-        dim_out,
-        kernel_size=1,
-        stride=stride,
-        bias=bias,
-    )
-
-
-def conv3x3(dim_in, dim_out, stride=1, bias=False):
-    """3x3 convolution with padding."""
-    return torch.nn.Conv2d(
-        dim_in,
-        dim_out,
-        kernel_size=3,
-        stride=stride,
-        padding=1,
-        bias=bias,
-    )
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+"""Define some basic structures."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dragon.vm.torch as torch
+
+
+def affine(dim_in, inplace=True):
+    """AffineBN, weight and bias are fixed."""
+    return torch.nn.Affine(
+        dim_in,
+        fix_weight=True,
+        fix_bias=True,
+        inplace=inplace,
+    )
+
+
+def bn(dim_in, eps=1e-5):
+    """The BatchNorm."""
+    return torch.nn.BatchNorm2d(dim_in, eps=eps)
+
+
+def conv1x1(dim_in, dim_out, stride=1, bias=False):
+    """1x1 convolution."""
+    return torch.nn.Conv2d(
+        dim_in,
+        dim_out,
+        kernel_size=1,
+        stride=stride,
+        bias=bias,
+    )
+
+
+def conv3x3(dim_in, dim_out, stride=1, bias=False):
+    """3x3 convolution with padding."""
+    return torch.nn.Conv2d(
+        dim_in,
+        dim_out,
+        kernel_size=3,
+        stride=stride,
+        padding=1,
+        bias=bias,
+    )
--- a/lib/modeling/detector.py
+++ b/lib/modeling/detector.py
@@ -35,11 +35,13 @@ class Detector(torch.nn.Module):
    ``lib.core.config`` for their hyper-parameters.

    """
+
    def __init__(self):
        super(Detector, self).__init__()
        model = cfg.MODEL.TYPE
        backbone = cfg.MODEL.BACKBONE.lower().split('.')
        body, modules = backbone[0], backbone[1:]
+        self.recorder = None

        # + Data Loader
        self.data_layer = importlib.import_module(
@@ -92,9 +94,14 @@ class Detector(torch.nn.Module):

        Parameters
        ----------
-        inputs : dict or None
+        inputs : dict, optional
            The inputs.

+        Returns
+        -------
+        dict
+            The outputs.
+
        """
        # 0. Get the inputs
        if inputs is None:
@@ -161,7 +168,6 @@ class Detector(torch.nn.Module):
        """Optimize the graph for the inference.

        It usually involves the removing of BN or Affine.
-
        """
        ##################################
        #  Merge Affine into Convolution #

--- a/lib/modeling/factory.py
+++ b/lib/modeling/factory.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import importlib
-
-
-_STORE = collections.defaultdict(dict)
-
-
-###########################################
-#                                         #
-#                 Body                    #
-#                                         #
-###########################################
-
-
-# ResNet
-for D in [18, 34, 50, 101, 152, 200, 269]:
-    _STORE['BODY']['resnet{}'.format(D)] = \
-        'lib.modeling.resnet.make_resnet_{}'.format(D)
-
-# VGG
-for D in [16, 19]:
-    for T in ['', '_reduced_300', '_reduced_512']:
-        _STORE['BODY']['vgg{}{}'.format(D, T)] = \
-            'lib.modeling.vgg.make_vgg_{}{}'.format(D, T)
-
-# AirNet
-for D in ['', '3b', '4b', '5b']:
-    _STORE['BODY']['airnet{}'.format(D)] = \
-        'lib.modeling.airnet.make_airnet_{}'.format(D)
-
-
-def get_template_func(name, sets, desc):
-    name = name.lower()
-    if name not in sets:
-        raise ValueError(
-            'The {} for {} was not registered.\n'
-            'Registered modules: [{}]'.format(
-                name, desc, ', '.join(sets.keys())))
-    module_name = '.'.join(sets[name].split('.')[0:-1])
-    func_name = sets[name].split('.')[-1]
-    try:
-        module = importlib.import_module(module_name)
-        return getattr(module, func_name)
-    except ImportError as e:
-        raise ValueError('Can not import module from: ' + module_name)
-
-
-def get_body_func(name):
-    return get_template_func(
-        name, _STORE['BODY'], 'Body')
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import importlib
+
+
+_STORE = collections.defaultdict(dict)
+
+
+###########################################
+#                                         #
+#                 Body                    #
+#                                         #
+###########################################
+
+
+# ResNet
+for D in [18, 34, 50, 101, 152, 200, 269]:
+    _STORE['BODY']['resnet{}'.format(D)] = \
+        'lib.modeling.resnet.make_resnet_{}'.format(D)
+
+# VGG
+for D in [16, 19]:
+    for T in ['', '_reduced_300', '_reduced_512']:
+        _STORE['BODY']['vgg{}{}'.format(D, T)] = \
+            'lib.modeling.vgg.make_vgg_{}{}'.format(D, T)
+
+# AirNet
+for D in ['', '3b', '4b', '5b']:
+    _STORE['BODY']['airnet{}'.format(D)] = \
+        'lib.modeling.airnet.make_airnet_{}'.format(D)
+
+
+def get_template_func(name, sets, desc):
+    name = name.lower()
+    if name not in sets:
+        raise ValueError(
+            'The {} for {} was not registered.\n'
+            'Registered modules: [{}]'.format(
+                name, desc, ', '.join(sets.keys())))
+    module_name = '.'.join(sets[name].split('.')[0:-1])
+    func_name = sets[name].split('.')[-1]
+    try:
+        module = importlib.import_module(module_name)
+        return getattr(module, func_name)
+    except ImportError as e:
+        raise ValueError('Can not import module from: ' + module_name)
+
+
+def get_body_func(name):
+    return get_template_func(
+        name, _STORE['BODY'], 'Body')
--- a/lib/modeling/fast_rcnn.py
+++ b/lib/modeling/fast_rcnn.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import dragon.vm.torch as torch
-
-from lib.core.config import cfg
-from lib.ops.modules import RPNDecoder
-
-
-class FastRCNN(torch.nn.Module):
-    """Generate proposal regions for R-CNN series.
-
-    The pipeline is as follows:
-
-    ... ->   RoIs   \                          /-> cls_score -> cls_loss
-                     -> RoIFeatureXform -> MLP
-    ... -> Features /                          \-> bbox_pred -> bbox_loss
-
-    """
-    def __init__(self, dim_in=256):
-        super(FastRCNN, self).__init__()
-        if len(cfg.RPN.STRIDES) > 1:
-            # RPN with multiple strides(i.e. FPN)
-            from lib.fpn import ProposalLayer, ProposalTargetLayer
-        else:
-            # RPN with single stride(i.e. C4)
-            from lib.faster_rcnn import ProposalLayer, ProposalTargetLayer
-        self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
-        self.fc6 = torch.nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
-        self.fc7 = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
-        self.cls_score = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
-        self.bbox_pred = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
-        self.rpn_decoder = RPNDecoder()
-        self.proposal_layer = ProposalLayer()
-        self.proposal_target_layer = ProposalTargetLayer()
-        self.softmax = torch.nn.Softmax(dim=1)
-        self.relu = torch.nn.ReLU(inplace=True)
-        self.sigmoid = torch.nn.Sigmoid(inplace=False)
-        self.roi_func = {
-            'RoIPool': torch.vision.ops.roi_pool,
-            'RoIAlign': torch.vision.ops.roi_align,
-        }[cfg.FRCNN.ROI_XFORM_METHOD]
-        self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
-        self.bbox_loss = torch.nn.SmoothL1Loss(beta=1., reduction='batch_size')
-        # Compute spatial scales for multiple strides
-        roi_levels = [level for level in range(
-            cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)]
-        self.spatial_scales = [1.0 / (2 ** level) for level in roi_levels]
-        self.reset_parameters()
-
-    def reset_parameters(self):
-        # Careful initialization for Fast R-CNN
-        torch.nn.init.normal_(self.cls_score.weight, std=0.01)
-        torch.nn.init.normal_(self.bbox_pred.weight, std=0.001)
-        for name, p in self.named_parameters():
-            if 'bias' in name:
-                torch.nn.init.constant_(p, 0)
-
-    def RoIFeatureTransform(self, feature, rois, spatial_scale):
-        return self.roi_func(
-            feature, rois,
-            output_size=(
-                cfg.FRCNN.ROI_XFORM_RESOLUTION,
-                cfg.FRCNN.ROI_XFORM_RESOLUTION,
-            ),
-            spatial_scale=spatial_scale,
-        )
-
-    def forward(self, **kwargs):
-        # Generate Proposals
-        # Apply the CXX implementation during inference
-        proposal_func = self.proposal_layer \
-            if self.training else self.rpn_decoder
-        self.rcnn_data = {
-            'rois': proposal_func(
-                kwargs['features'],
-                self.sigmoid(kwargs['rpn_cls_score'].data),
-                kwargs['rpn_bbox_pred'],
-                kwargs['ims_info'],
-            )
-        }
-
-        # Generate Targets from Proposals
-        if self.training:
-            self.rcnn_data.update(
-                self.proposal_target_layer(
-                    rpn_rois=self.rcnn_data['rois'],
-                    gt_boxes=kwargs['gt_boxes'],
-                )
-            )
-
-        # Transform RoI Feature
-        roi_features = []
-        if len(self.rcnn_data['rois']) > 1:
-            for i, spatial_scale in enumerate(self.spatial_scales):
-                roi_features.append(
-                    self.RoIFeatureTransform(
-                        kwargs['features'][i],
-                        self.rcnn_data['rois'][i],
-                        spatial_scale,
-                    )
-                )
-            roi_features = torch.cat(roi_features, dim=0)
-        else:
-            spatial_scale = 1.0 / cfg.RPN.STRIDES[0]
-            roi_features = \
-                self.RoIFeatureTransform(
-                    kwargs['features'][0],
-                    self.rcnn_data['rois'][0],
-                    spatial_scale,
-                )
-
-        # Apply a simple MLP
-        roi_features = roi_features.view(-1, self.roi_head_dim)
-        rcnn_output = self.relu(self.fc6(roi_features))
-        rcnn_output = self.relu(self.fc7(rcnn_output))
-
-        # Compute rcnn logits
-        cls_score = self.cls_score(rcnn_output).float()
-        outputs = collections.OrderedDict({
-            'bbox_pred':
-                self.bbox_pred(rcnn_output).float(),
-        })
-
-        if self.training:
-            # Compute rcnn losses
-            outputs.update(collections.OrderedDict({
-                'cls_loss': self.cls_loss(
-                    cls_score,
-                    self.rcnn_data['labels'],
-                ),
-                'bbox_loss': self.bbox_loss(
-                    outputs['bbox_pred'],
-                    self.rcnn_data['bbox_targets'],
-                    self.rcnn_data['bbox_inside_weights'],
-                    self.rcnn_data['bbox_outside_weights'],
-                ),
-            }))
-        else:
-            # Return the rois to decode the refine boxes
-            if len(self.rcnn_data['rois']) > 1:
-                outputs['rois'] = torch.cat(
-                    self.rcnn_data['rois'], dim=0)
-            else:
-                outputs['rois'] = self.rcnn_data['rois'][0]
-            # Return the classification prob
-            outputs['cls_prob'] = self.softmax(cls_score)
-
-        return outputs
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import dragon.vm.torch as torch
+
+from lib.core.config import cfg
+from lib.ops.modules import RPNDecoder
+
+
+class FastRCNN(torch.nn.Module):
+    """Generate proposal regions for R-CNN series.
+
+    The pipeline is as follows:
+
+    ... ->   RoIs   \                          /-> cls_score -> cls_loss
+                     -> RoIFeatureXform -> MLP
+    ... -> Features /                          \-> bbox_pred -> bbox_loss
+
+    """
+    def __init__(self, dim_in=256):
+        super(FastRCNN, self).__init__()
+        if len(cfg.RPN.STRIDES) > 1:
+            # RPN with multiple strides(i.e. FPN)
+            from lib.fpn import ProposalLayer, ProposalTargetLayer
+        else:
+            # RPN with single stride(i.e. C4)
+            from lib.faster_rcnn import ProposalLayer, ProposalTargetLayer
+        self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
+        self.fc6 = torch.nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
+        self.fc7 = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
+        self.cls_score = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
+        self.bbox_pred = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
+        self.rpn_decoder = RPNDecoder()
+        self.proposal_layer = ProposalLayer()
+        self.proposal_target_layer = ProposalTargetLayer()
+        self.softmax = torch.nn.Softmax(dim=1)
+        self.relu = torch.nn.ReLU(inplace=True)
+        self.sigmoid = torch.nn.Sigmoid(inplace=False)
+        self.roi_func = {
+            'RoIPool': torch.vision.ops.roi_pool,
+            'RoIAlign': torch.vision.ops.roi_align,
+        }[cfg.FRCNN.ROI_XFORM_METHOD]
+        self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
+        self.bbox_loss = torch.nn.SmoothL1Loss(reduction='batch_size')
+        # Compute spatial scales for multiple strides
+        roi_levels = [level for level in range(
+            cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)]
+        self.spatial_scales = [1.0 / (2 ** level) for level in roi_levels]
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        # Careful initialization for Fast R-CNN
+        torch.nn.init.normal_(self.cls_score.weight, std=0.01)
+        torch.nn.init.normal_(self.bbox_pred.weight, std=0.001)
+        for name, p in self.named_parameters():
+            if 'bias' in name:
+                torch.nn.init.constant_(p, 0)
+
+    def RoIFeatureTransform(self, feature, rois, spatial_scale):
+        return self.roi_func(
+            feature, rois,
+            output_size=(
+                cfg.FRCNN.ROI_XFORM_RESOLUTION,
+                cfg.FRCNN.ROI_XFORM_RESOLUTION,
+            ),
+            spatial_scale=spatial_scale,
+        )
+
+    def forward(self, **kwargs):
+        # Generate Proposals
+        # Apply the CXX implementation during inference
+        proposal_func = self.proposal_layer \
+            if self.training else self.rpn_decoder
+        self.rcnn_data = {
+            'rois': proposal_func(
+                kwargs['features'],
+                self.sigmoid(kwargs['rpn_cls_score'].data),
+                kwargs['rpn_bbox_pred'],
+                kwargs['ims_info'],
+            )
+        }
+
+        # Generate Targets from Proposals
+        if self.training:
+            self.rcnn_data.update(
+                self.proposal_target_layer(
+                    rpn_rois=self.rcnn_data['rois'],
+                    gt_boxes=kwargs['gt_boxes'],
+                )
+            )
+
+        # Transform RoI Feature
+        roi_features = []
+        if len(self.rcnn_data['rois']) > 1:
+            for i, spatial_scale in enumerate(self.spatial_scales):
+                roi_features.append(
+                    self.RoIFeatureTransform(
+                        kwargs['features'][i],
+                        self.rcnn_data['rois'][i],
+                        spatial_scale,
+                    )
+                )
+            roi_features = torch.cat(roi_features, dim=0)
+        else:
+            spatial_scale = 1.0 / cfg.RPN.STRIDES[0]
+            roi_features = \
+                self.RoIFeatureTransform(
+                    kwargs['features'][0],
+                    self.rcnn_data['rois'][0],
+                    spatial_scale,
+                )
+
+        # Apply a simple MLP
+        roi_features = roi_features.view(-1, self.roi_head_dim)
+        rcnn_output = self.relu(self.fc6(roi_features))
+        rcnn_output = self.relu(self.fc7(rcnn_output))
+
+        # Compute rcnn logits
+        cls_score = self.cls_score(rcnn_output).float()
+        outputs = collections.OrderedDict([
+            ('bbox_pred', self.bbox_pred(rcnn_output).float()),
+        ])
+
+        if self.training:
+            # Compute rcnn losses
+            outputs.update(collections.OrderedDict([
+                ('cls_loss', self.cls_loss(
+                    cls_score, self.rcnn_data['labels'])),
+                ('bbox_loss', self.bbox_loss(
+                    outputs['bbox_pred'],
+                    self.rcnn_data['bbox_targets'],
+                    self.rcnn_data['bbox_inside_weights'],
+                    self.rcnn_data['bbox_outside_weights'],
+                )),
+            ]))
+        else:
+            # Return the rois to decode the refine boxes
+            if len(self.rcnn_data['rois']) > 1:
+                outputs['rois'] = torch.cat(
+                    self.rcnn_data['rois'], dim=0)
+            else:
+                outputs['rois'] = self.rcnn_data['rois'][0]
+            # Return the classification prob
+            outputs['cls_prob'] = self.softmax(cls_score)
+
+        return outputs
--- a/lib/modeling/fpn.py
+++ b/lib/modeling/fpn.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import dragon.vm.torch as torch
-
-from lib.core.config import cfg
-from lib.modeling import conv1x1
-from lib.modeling import conv3x3
-
-
-HIGHEST_BACKBONE_LVL = 5  # E.g., "conv5"-like level
-
-
-class FPN(torch.nn.Module):
-    """Feature Pyramid Networks for R-CNN and RetinaNet."""
-
-    def __init__(self, feature_dims):
-        super(FPN, self).__init__()
-        self.C = torch.nn.ModuleList()
-        self.P = torch.nn.ModuleList()
-        self.apply_func = self.apply_on_rcnn
-        for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1):
-            self.C.append(conv1x1(feature_dims[lvl - 1], cfg.FPN.DIM, bias=True))
-            self.P.append(conv3x3(cfg.FPN.DIM, cfg.FPN.DIM, bias=True))
-        if 'retinanet' in cfg.MODEL.TYPE:
-            for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
-                dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else cfg.FPN.DIM
-                self.P.append(conv3x3(dim_in, cfg.FPN.DIM, stride=2, bias=True))
-            self.apply_func = self.apply_on_retinanet
-        self.relu = torch.nn.ReLU(inplace=False)
-        self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True)
-        self.reset_parameters()
-        self.feature_dims = [cfg.FPN.DIM]
-
-    def reset_parameters(self):
-        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                torch.nn.init.kaiming_uniform_(
-                    m.weight,
-                    a=1,  # Fix the gain for [-127, 127]
-                )  # Xavier Initialization
-                torch.nn.init.constant_(m.bias, 0)
-
-    def apply_on_rcnn(self, features):
-        fpn_input = self.C[-1](features[-1])
-        min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
-        outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
-        # Apply MaxPool for higher features
-        for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
-            outputs.append(self.maxpool(outputs[-1]))
-        # Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
-        for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
-            lateral_output = self.C[i - min_lvl](features[i - 1])
-            upscale_output = torch.vision.ops.nn_resize(
-                fpn_input, dsize=lateral_output.shape[-2:])
-            fpn_input = lateral_output.__iadd__(upscale_output)
-            outputs.insert(0, self.P[i - min_lvl](fpn_input))
-        return outputs
-
-    def apply_on_retinanet(self, features):
-        fpn_input = self.C[-1](features[-1])
-        min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
-        outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)]
-        # Add extra convolutions for higher features
-        extra_input = features[-1]
-        for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
-            outputs.append(self.P[i - min_lvl](extra_input))
-            if i != max_lvl:
-                extra_input = self.relu(outputs[-1])
-        # Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
-        for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
-            lateral_output = self.C[i - min_lvl](features[i - 1])
-            upscale_output = torch.vision.ops.nn_resize(
-                fpn_input, dsize=lateral_output.shape[-2:])
-            fpn_input = lateral_output.__iadd__(upscale_output)
-            outputs.insert(0, self.P[i - min_lvl](fpn_input))
-        return outputs
-
-    def forward(self, features):
-        return self.apply_func(features)
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dragon.vm.torch as torch
+
+from lib.core.config import cfg
+from lib.modeling import conv1x1
+from lib.modeling import conv3x3
+
+
+HIGHEST_BACKBONE_LVL = 5  # E.g., "conv5"-like level
+
+
+class FPN(torch.nn.Module):
+    """Feature Pyramid Networks for R-CNN and RetinaNet."""
+
+    def __init__(self, feature_dims):
+        super(FPN, self).__init__()
+        self.C = torch.nn.ModuleList()
+        self.P = torch.nn.ModuleList()
+        self.apply_func = self.apply_on_rcnn
+        for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1):
+            self.C.append(conv1x1(feature_dims[lvl - 1], cfg.FPN.DIM, bias=True))
+            self.P.append(conv3x3(cfg.FPN.DIM, cfg.FPN.DIM, bias=True))
+        if 'retinanet' in cfg.MODEL.TYPE:
+            for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
+                dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else cfg.FPN.DIM
+                self.P.append(conv3x3(dim_in, cfg.FPN.DIM, stride=2, bias=True))
+            self.apply_func = self.apply_on_retinanet
+        self.relu = torch.nn.ReLU(inplace=False)
+        self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True)
+        self.reset_parameters()
+        self.feature_dims = [cfg.FPN.DIM]
+
+    def reset_parameters(self):
+        for m in self.modules():
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.init.kaiming_uniform_(
+                    m.weight,
+                    a=1,  # Fix the gain for [-127, 127]
+                )  # Xavier Initialization
+                torch.nn.init.constant_(m.bias, 0)
+
+    def apply_on_rcnn(self, features):
+        fpn_input = self.C[-1](features[-1])
+        min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
+        outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
+        # Apply MaxPool for higher features
+        for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
+            outputs.append(self.maxpool(outputs[-1]))
+        # Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
+        for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
+            lateral_output = self.C[i - min_lvl](features[i - 1])
+            upscale_output = torch.vision.ops.nn_resize(
+                fpn_input, dsize=lateral_output.shape[-2:])
+            fpn_input = lateral_output.__iadd__(upscale_output)
+            outputs.insert(0, self.P[i - min_lvl](fpn_input))
+        return outputs
+
+    def apply_on_retinanet(self, features):
+        fpn_input = self.C[-1](features[-1])
+        min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
+        outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
+        # Add extra convolutions for higher features
+        extra_input = features[-1]
+        for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
+            outputs.append(self.P[i - min_lvl](extra_input))
+            if i != max_lvl:
+                extra_input = self.relu(outputs[-1])
+        # Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
+        for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
+            lateral_output = self.C[i - min_lvl](features[i - 1])
+            upscale_output = torch.vision.ops.nn_resize(
+                fpn_input, dsize=lateral_output.shape[-2:])
+            fpn_input = lateral_output.__iadd__(upscale_output)
+            outputs.insert(0, self.P[i - min_lvl](fpn_input))
+        return outputs
+
+    def forward(self, features):
+        return self.apply_func(features)
--- a/lib/modeling/resnet.py
+++ b/lib/modeling/resnet.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# Codes are based on:
-#
-#      <https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import dragon.vm.torch as torch
-
-from lib.core.config import cfg
-from lib.modeling import affine
-from lib.modeling import conv1x1
-from lib.modeling import conv3x3
-
-
-class BasicBlock(torch.nn.Module):
-    def __init__(
-        self,
-        dim_in,
-        dim_out,
-        stride=1,
-        downsample=None,
-        dropblock=None,
-    ):
-        super(BasicBlock, self).__init__()
-        self.conv1 = conv3x3(dim_in, dim_out, stride)
-        self.bn1 = affine(dim_out)
-        self.relu = torch.nn.ReLU(inplace=True)
-        self.conv2 = conv3x3(dim_out, dim_out)
-        self.bn2 = affine(dim_out)
-        self.downsample = downsample
-        self.dropblock = dropblock
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        if self.dropblock is not None:
-            out = self.dropblock(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-
-        if self.dropblock is not None:
-            residual = self.dropblock(residual)
-
-        if self.downsample is not None:
-            residual = self.downsample(residual)
-
-        out += residual
-        out = self.relu(out)
-        return out
-
-
-class Bottleneck(torch.nn.Module):
-    # 1x64d => 0.25 (ResNet)
-    # 32x8d, 64x4d => 1.0 (ResNeXt)
-    contraction = cfg.RESNET.NUM_GROUPS \
-        * cfg.RESNET.GROUP_WIDTH / 256.0
-
-    def __init__(
-        self,
-        dim_in,
-        dim_out,
-        stride=1,
-        downsample=None,
-        dropblock=None,
-    ):
-        super(Bottleneck, self).__init__()
-        dim = int(dim_out * self.contraction)
-        self.conv1 = conv1x1(dim_in, dim)
-        self.bn1 = affine(dim)
-        self.conv2 = conv3x3(dim, dim, stride=stride)
-        self.bn2 = affine(dim)
-        self.conv3 = conv1x1(dim, dim_out)
-        self.bn3 = affine(dim_out)
-        self.relu = torch.nn.ReLU(inplace=True)
-        self.downsample = downsample
-        self.dropblock = dropblock
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-        out = self.relu(out)
-
-        if self.dropblock is not None:
-            out = self.dropblock(out)
-
-        out = self.conv3(out)
-        out = self.bn3(out)
-
-        if self.dropblock is not None:
-            residual = self.dropblock(residual)
-
-        if self.downsample is not None:
-            residual = self.downsample(residual)
-
-        out += residual
-        out = self.relu(out)
-        return out
-
-
-class ResNet(torch.nn.Module):
-    def __init__(self, block, layers, filters):
-        super(ResNet, self).__init__()
-        self.dim_in, filters = filters[0], filters[1:]
-        self.feature_dims = [self.dim_in] + filters
-        self.conv1 = torch.nn.Conv2d(
-            3, 64,
-            kernel_size=7,
-            stride=2,
-            padding=3,
-            bias=False,
-        )
-        self.bn1 = affine(self.dim_in)
-        self.relu = torch.nn.ReLU(inplace=True)
-        self.maxpool = torch.nn.MaxPool2d(
-            kernel_size=3,
-            stride=2,
-            padding=0,
-            ceil_mode=True,
-        )
-        self.drop3 = torch.nn.DropBlock2d(
-            kp=0.9,
-            block_size=7,
-            alpha=0.25,
-            decrement=cfg.DROPBLOCK.DECREMENT
-        ) if cfg.DROPBLOCK.DROP_ON else None
-        self.drop4 = torch.nn.DropBlock2d(
-            kp=0.9,
-            block_size=7,
-            alpha=1.00,
-            decrement=cfg.DROPBLOCK.DECREMENT
-        ) if cfg.DROPBLOCK.DROP_ON else None
-        self.layer1 = self.make_blocks(block, filters[0], layers[0])
-        self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
-        self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3)
-        self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, self.drop4)
-        self.reset_parameters()
-
-    def reset_parameters(self):
-        # The Kaiming Initialization
-        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                torch.nn.init.kaiming_normal_(
-                    m.weight,
-                    nonlinearity='relu',
-                )
-
-        # Stop the gradients if necessary
-        def freeze_func(m):
-            if isinstance(m, torch.nn.Conv2d):
-                m.weight.requires_grad = False
-                m._buffers['weight'] = m.weight
-                del m._parameters['weight']
-
-        if cfg.MODEL.FREEZE_AT > 0:
-            self.conv1.apply(freeze_func)
-
-        for i in range(cfg.MODEL.FREEZE_AT, 1, -1):
-            getattr(self, 'layer{}'.format(i - 1)).apply(freeze_func)
-
-    def make_blocks(self, block, dim_out, blocks, stride=1, dropblock=None):
-        downsample = None
-        if stride != 1 or self.dim_in != dim_out:
-            downsample = torch.nn.Sequential(
-                conv1x1(self.dim_in, dim_out, stride=stride),
-                affine(dim_out),
-            )
-        layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)]
-        self.dim_in = dim_out
-        for i in range(1, blocks):
-            layers.append(block(dim_out, dim_out, dropblock=dropblock))
-        return torch.nn.Sequential(*layers)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = self.bn1(x)
-        x = self.relu(x)
-        x = self.maxpool(x)
-        outputs = [x]
-        outputs += [self.layer1(outputs[-1])]
-        outputs += [self.layer2(outputs[-1])]
-        outputs += [self.layer3(outputs[-1])]
-        outputs += [self.layer4(outputs[-1])]
-        return outputs
-
-
-def resnet(depth):
-    if depth == 18:
-        units = [2, 2, 2, 2]
-    elif depth == 34:
-        units = [3, 4, 6, 3]
-    elif depth == 50:
-        units = [3, 4, 6, 3]
-    elif depth == 101:
-        units = [3, 4, 23, 3]
-    elif depth == 152:
-        units = [3, 8, 36, 3]
-    elif depth == 200:
-        units = [3, 24, 36, 3]
-    elif depth == 269:
-        units = [3, 30, 48, 8]
-    else:
-        raise ValueError('Unsupported depth: %d' % depth)
-    block = Bottleneck if depth >= 50 else BasicBlock
-    filters = [64, 256, 512, 1024, 2048] \
-        if depth >= 50 else [64, 64, 128, 256, 512]
-    return ResNet(block, units, filters)
-
-
-def make_resnet_18(): return resnet(18)
-
-
-def make_resnet_34(): return resnet(34)
-
-
-def make_resnet_50(): return resnet(50)
-
-
-def make_resnet_101(): return resnet(101)
-
-
-def make_resnet_152(): return resnet(152)
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dragon.vm.torch as torch
+
+from lib.core.config import cfg
+from lib.modeling import affine
+from lib.modeling import conv1x1
+from lib.modeling import conv3x3
+
+
+class BasicBlock(torch.nn.Module):
+    def __init__(
+        self,
+        dim_in,
+        dim_out,
+        stride=1,
+        downsample=None,
+        dropblock=None,
+    ):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(dim_in, dim_out, stride)
+        self.bn1 = affine(dim_out)
+        self.relu = torch.nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(dim_out, dim_out)
+        self.bn2 = affine(dim_out)
+        self.downsample = downsample
+        self.dropblock = dropblock
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        if self.dropblock is not None:
+            out = self.dropblock(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.dropblock is not None:
+            residual = self.dropblock(residual)
+
+        if self.downsample is not None:
+            residual = self.downsample(residual)
+
+        out += residual
+        out = self.relu(out)
+        return out
+
+
+class Bottleneck(torch.nn.Module):
+    # 1x64d => 0.25 (ResNet)
+    # 32x8d, 64x4d => 1.0 (ResNeXt)
+    contraction = cfg.RESNET.NUM_GROUPS \
+        * cfg.RESNET.GROUP_WIDTH / 256.0
+
+    def __init__(
+        self,
+        dim_in,
+        dim_out,
+        stride=1,
+        downsample=None,
+        dropblock=None,
+    ):
+        super(Bottleneck, self).__init__()
+        dim = int(dim_out * self.contraction)
+        self.conv1 = conv1x1(dim_in, dim)
+        self.bn1 = affine(dim)
+        self.conv2 = conv3x3(dim, dim, stride=stride)
+        self.bn2 = affine(dim)
+        self.conv3 = conv1x1(dim, dim_out)
+        self.bn3 = affine(dim_out)
+        self.relu = torch.nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.dropblock = dropblock
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        if self.dropblock is not None:
+            out = self.dropblock(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.dropblock is not None:
+            residual = self.dropblock(residual)
+
+        if self.downsample is not None:
+            residual = self.downsample(residual)
+
+        out += residual
+        out = self.relu(out)
+        return out
+
+
+class ResNet(torch.nn.Module):
+    def __init__(self, block, layers, filters):
+        super(ResNet, self).__init__()
+        self.dim_in, filters = filters[0], filters[1:]
+        self.feature_dims = [self.dim_in] + filters
+        self.conv1 = torch.nn.Conv2d(
+            3, 64,
+            kernel_size=7,
+            stride=2,
+            padding=3,
+            bias=False,
+        )
+        self.bn1 = affine(self.dim_in)
+        self.relu = torch.nn.ReLU(inplace=True)
+        self.maxpool = torch.nn.MaxPool2d(
+            kernel_size=3,
+            stride=2,
+            padding=0,
+            ceil_mode=True,
+        )
+        self.drop3 = torch.nn.DropBlock2d(
+            kp=0.9,
+            block_size=7,
+            alpha=0.25,
+            decrement=cfg.DROPBLOCK.DECREMENT
+        ) if cfg.DROPBLOCK.DROP_ON else None
+        self.drop4 = torch.nn.DropBlock2d(
+            kp=0.9,
+            block_size=7,
+            alpha=1.00,
+            decrement=cfg.DROPBLOCK.DECREMENT
+        ) if cfg.DROPBLOCK.DROP_ON else None
+        self.layer1 = self.make_blocks(block, filters[0], layers[0])
+        self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
+        self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3)
+        self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, self.drop4)
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        # The Kaiming Initialization
+        for m in self.modules():
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.init.kaiming_normal_(
+                    m.weight,
+                    nonlinearity='relu',
+                )
+
+        # Stop the gradients if necessary
+        def freeze_func(m):
+            if isinstance(m, torch.nn.Conv2d):
+                m.weight.requires_grad = False
+                m._buffers['weight'] = m.weight
+                del m._parameters['weight']
+
+        if cfg.MODEL.FREEZE_AT > 0:
+            self.conv1.apply(freeze_func)
+
+        for i in range(cfg.MODEL.FREEZE_AT, 1, -1):
+            getattr(self, 'layer{}'.format(i - 1)).apply(freeze_func)
+
+    def make_blocks(self, block, dim_out, blocks, stride=1, dropblock=None):
+        downsample = None
+        if stride != 1 or self.dim_in != dim_out:
+            downsample = torch.nn.Sequential(
+                conv1x1(self.dim_in, dim_out, stride=stride),
+                affine(dim_out),
+            )
+        layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)]
+        self.dim_in = dim_out
+        for i in range(1, blocks):
+            layers.append(block(dim_out, dim_out, dropblock=dropblock))
+        return torch.nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        outputs = [x]
+        outputs += [self.layer1(outputs[-1])]
+        outputs += [self.layer2(outputs[-1])]
+        outputs += [self.layer3(outputs[-1])]
+        outputs += [self.layer4(outputs[-1])]
+        return outputs
+
+
+def resnet(depth):
+    if depth == 18:
+        units = [2, 2, 2, 2]
+    elif depth == 34:
+        units = [3, 4, 6, 3]
+    elif depth == 50:
+        units = [3, 4, 6, 3]
+    elif depth == 101:
+        units = [3, 4, 23, 3]
+    elif depth == 152:
+        units = [3, 8, 36, 3]
+    elif depth == 200:
+        units = [3, 24, 36, 3]
+    elif depth == 269:
+        units = [3, 30, 48, 8]
+    else:
+        raise ValueError('Unsupported depth: %d' % depth)
+    block = Bottleneck if depth >= 50 else BasicBlock
+    filters = [64, 256, 512, 1024, 2048] \
+        if depth >= 50 else [64, 64, 128, 256, 512]
+    return ResNet(block, units, filters)
+
+
+def make_resnet_18(): return resnet(18)
+
+
+def make_resnet_34(): return resnet(34)
+
+
+def make_resnet_50(): return resnet(50)
+
+
+def make_resnet_101(): return resnet(101)
+
+
+def make_resnet_152(): return resnet(152)
--- a/lib/modeling/retinanet.py
+++ b/lib/modeling/retinanet.py
@@ -59,8 +59,7 @@ class RetinaNet(torch.nn.Module):
            gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
        )
        self.bbox_loss = torch.nn.SmoothL1Loss(
-            beta=1. / 9., reduction='batch_size',
-        )
+            beta=.11, reduction='batch_size')
        self.reset_parameters()

    def reset_parameters(self):
@@ -133,26 +132,22 @@ class RetinaNet(torch.nn.Module):
                gt_boxes=gt_boxes,
                ims_info=ims_info,
            )
-        return collections.OrderedDict({
-            'cls_loss':
-                self.cls_loss(
-                    cls_score,
-                    self.retinanet_data['labels'],
-                ),
-            'bbox_loss':
-                self.bbox_loss(
-                    bbox_pred,
-                    self.retinanet_data['bbox_targets'],
-                    self.retinanet_data['bbox_inside_weights'],
-                    self.retinanet_data['bbox_outside_weights'],
-                )
-        })
+        return collections.OrderedDict([
+            ('cls_loss', self.cls_loss(
+                cls_score, self.retinanet_data['labels'])),
+            ('bbox_loss', self.bbox_loss(
+                bbox_pred,
+                self.retinanet_data['bbox_targets'],
+                self.retinanet_data['bbox_inside_weights'],
+                self.retinanet_data['bbox_outside_weights'],
+            )),
+        ])

    def forward(self, *args, **kwargs):
        cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
        cls_score, bbox_pred = cls_score.float(), bbox_pred.float()

-        outputs = collections.OrderedDict({'bbox_pred': bbox_pred})
+        outputs = collections.OrderedDict([('bbox_pred', bbox_pred)])

        if self.training:
            outputs.update(

--- a/lib/modeling/rpn.py
+++ b/lib/modeling/rpn.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import dragon.vm.torch as torch
-
-from lib.core.config import cfg
-from lib.modeling import conv1x1
-from lib.modeling import conv3x3
-
-
-class RPN(torch.nn.Module):
-    """Region Proposal Networks for R-CNN series."""
-
-    def __init__(self, dim_in=256):
-        super(RPN, self).__init__()
-
-        ##################################
-        #           RPN outputs          #
-        ##################################
-
-        num_anchors = len(cfg.RPN.ASPECT_RATIOS) * (
-            len(cfg.RPN.SCALES) if len(cfg.RPN.STRIDES) == 1 else 1)
-        self.output = conv3x3(dim_in, dim_in, bias=True)
-        self.cls_score = conv1x1(dim_in, num_anchors, bias=True)
-        self.bbox_pred = conv1x1(dim_in, num_anchors * 4, bias=True)
-        self.relu = torch.nn.ReLU(inplace=True)
-
-        ##################################
-        #            RPN losses          #
-        ##################################
-
-        if len(cfg.RPN.STRIDES) > 1:
-            # RPN with multiple strides(i.e. FPN)
-            from lib.fpn.layers.anchor_target_layer import AnchorTargetLayer
-        else:
-            # RPN with single stride(i.e. C4)
-            from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer
-
-        self.anchor_target_layer = AnchorTargetLayer()
-        self.cls_loss = torch.nn.BCEWithLogitsLoss()
-        self.bbox_loss = torch.nn.SmoothL1Loss(beta=1. / 9.)
-        self.reset_parameters()
-
-    def reset_parameters(self):
-        # Initialization for the RPN
-        # Weight ~ Normal(0, 0.01)
-        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                torch.nn.init.normal_(m.weight, std=0.01)
-
-    def compute_outputs(self, features):
-        """Compute the RPN logits.
-
-        Parameters
-        ----------
-        features : sequence of dragon.vm.torch.Tensor
-            The features of specific conv layers.
-
-        """
-        # Compute rpn logits
-        cls_score_wide,  bbox_pred_wide = [], []
-        for feature in features:
-            x = self.relu(self.output(feature))
-            if len(features) > 1:
-                cls_score = self.cls_score(x).view(0, -1)
-                bbox_pred = self.bbox_pred(x).view(0, 4, -1)
-            else:
-                cls_score = self.cls_score(x)
-                bbox_pred = self.bbox_pred(x)
-            cls_score_wide.append(cls_score)
-            bbox_pred_wide.append(bbox_pred)
-
-        if len(features) > 1:
-            # Concat them if necessary
-            return torch.cat(cls_score_wide, dim=1), \
-                   torch.cat(bbox_pred_wide, dim=2)
-        else:
-            return cls_score_wide[0], bbox_pred_wide[0]
-
-    def compute_losses(
-        self,
-        features,
-        cls_score,
-        bbox_pred,
-        gt_boxes,
-        ims_info,
-    ):
-        """Compute the RPN classification loss and regression loss.
-
-        Parameters
-        ----------
-        features : sequence of dragon.vm.torch.Tensor
-            The features of specific conv layers.
-        cls_score : dragon.vm.torch.Tensor
-            The (binary) classification logits.
-        bbox_pred : dragon.vm.torch.Tensor
-            The bbox regression logits.
-        gt_boxes : numpy.ndarray
-            The packed ground-truth boxes.
-        ims_info : numpy.ndarray
-            The information of input images.
-
-        """
-        self.rpn_data = \
-            self.anchor_target_layer(
-                features=features,
-                gt_boxes=gt_boxes,
-                ims_info=ims_info,
-            )
-        return collections.OrderedDict({
-            'rpn_cls_loss':
-                self.cls_loss(cls_score, self.rpn_data['labels']),
-            'rpn_bbox_loss':
-                self.bbox_loss(
-                    bbox_pred,
-                    self.rpn_data['bbox_targets'],
-                    self.rpn_data['bbox_inside_weights'],
-                    self.rpn_data['bbox_outside_weights'],
-                )
-        })
-
-    def forward(self, *args, **kwargs):
-        cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
-        cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
-
-        outputs = collections.OrderedDict({
-            'rpn_cls_score': cls_score,
-            'rpn_bbox_pred': bbox_pred,
-        })
-
-        if self.training:
-            outputs.update(
-                self.compute_losses(
-                    kwargs['features'],
-                    cls_score,
-                    bbox_pred,
-                    kwargs['gt_boxes'],
-                    kwargs['ims_info'],
-                )
-            )
-
-        return outputs
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import dragon.vm.torch as torch
+
+from lib.core.config import cfg
+from lib.modeling import conv1x1
+from lib.modeling import conv3x3
+
+
+class RPN(torch.nn.Module):
+    """Region Proposal Networks for R-CNN series."""
+
+    def __init__(self, dim_in=256):
+        super(RPN, self).__init__()
+
+        ##################################
+        #           RPN outputs          #
+        ##################################
+
+        num_anchors = len(cfg.RPN.ASPECT_RATIOS) * (
+            len(cfg.RPN.SCALES) if len(cfg.RPN.STRIDES) == 1 else 1)
+        self.output = conv3x3(dim_in, dim_in, bias=True)
+        self.cls_score = conv1x1(dim_in, num_anchors, bias=True)
+        self.bbox_pred = conv1x1(dim_in, num_anchors * 4, bias=True)
+        self.relu = torch.nn.ReLU(inplace=True)
+
+        ##################################
+        #            RPN losses          #
+        ##################################
+
+        if len(cfg.RPN.STRIDES) > 1:
+            # RPN with multiple strides(i.e. FPN)
+            from lib.fpn.anchor_target_layer import AnchorTargetLayer
+        else:
+            # RPN with single stride(i.e. C4)
+            from lib.faster_rcnn.anchor_target_layer import AnchorTargetLayer
+
+        self.anchor_target_layer = AnchorTargetLayer()
+        self.cls_loss = torch.nn.BCEWithLogitsLoss()
+        self.bbox_loss = torch.nn.SmoothL1Loss(
+            beta=.11, reduction='batch_size')
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        # Initialization for the RPN
+        # Weight ~ Normal(0, 0.01)
+        for m in self.modules():
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.init.normal_(m.weight, std=0.01)
+
+    def compute_outputs(self, features):
+        """Compute the RPN logits.
+
+        Parameters
+        ----------
+        features : sequence of dragon.vm.torch.Tensor
+            The features of specific conv layers.
+
+        """
+        # Compute rpn logits
+        cls_score_wide,  bbox_pred_wide = [], []
+        for feature in features:
+            x = self.relu(self.output(feature))
+            if len(features) > 1:
+                cls_score = self.cls_score(x).view(0, -1)
+                bbox_pred = self.bbox_pred(x).view(0, 4, -1)
+            else:
+                cls_score = self.cls_score(x)
+                bbox_pred = self.bbox_pred(x)
+            cls_score_wide.append(cls_score)
+            bbox_pred_wide.append(bbox_pred)
+
+        if len(features) > 1:
+            # Concat them if necessary
+            return torch.cat(cls_score_wide, dim=1), \
+                   torch.cat(bbox_pred_wide, dim=2)
+        else:
+            return cls_score_wide[0], bbox_pred_wide[0]
+
+    def compute_losses(
+        self,
+        features,
+        cls_score,
+        bbox_pred,
+        gt_boxes,
+        ims_info,
+    ):
+        """Compute the RPN classification loss and regression loss.
+
+        Parameters
+        ----------
+        features : sequence of dragon.vm.torch.Tensor
+            The features of specific conv layers.
+        cls_score : dragon.vm.torch.Tensor
+            The (binary) classification logits.
+        bbox_pred : dragon.vm.torch.Tensor
+            The bbox regression logits.
+        gt_boxes : numpy.ndarray
+            The packed ground-truth boxes.
+        ims_info : numpy.ndarray
+            The information of input images.
+
+        """
+        self.rpn_data = \
+            self.anchor_target_layer(
+                features=features,
+                gt_boxes=gt_boxes,
+                ims_info=ims_info,
+            )
+        return collections.OrderedDict([
+            ('rpn_cls_loss', self.cls_loss(
+                cls_score, self.rpn_data['labels'])),
+            ('rpn_bbox_loss', self.bbox_loss(
+                bbox_pred,
+                self.rpn_data['bbox_targets'],
+                self.rpn_data['bbox_inside_weights'],
+                self.rpn_data['bbox_outside_weights'],
+            )),
+        ])
+
+    def forward(self, *args, **kwargs):
+        cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
+        cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
+
+        outputs = collections.OrderedDict([
+            ('rpn_cls_score', cls_score),
+            ('rpn_bbox_pred', bbox_pred),
+        ])
+
+        if self.training:
+            outputs.update(
+                self.compute_losses(
+                    kwargs['features'],
+                    cls_score,
+                    bbox_pred,
+                    kwargs['gt_boxes'],
+                    kwargs['ims_info'],
+                )
+            )
+
+        return outputs
--- a/lib/modeling/ssd.py
+++ b/lib/modeling/ssd.py
@@ -136,32 +136,29 @@ class SSD(torch.nn.Module):
                gt_boxes=gt_boxes,
            )
        )
-        return collections.OrderedDict({
+        return collections.OrderedDict([
            # A compensating factor of 4.0 is used
            # As we normalize both the pos and neg samples
-            'cls_loss':
-                self.cls_loss(
-                    cls_score.view(-1, cfg.MODEL.NUM_CLASSES),
-                    self.ssd_data['labels']
-                ) * 4.,
-            'bbox_loss':
-                self.bbox_loss(
-                    bbox_pred,
-                    self.ssd_data['bbox_targets'],
-                    self.ssd_data['bbox_inside_weights'],
-                    self.ssd_data['bbox_outside_weights'],
-                )
-        })
+            ('cls_loss', self.cls_loss(
+                cls_score.view(-1, cfg.MODEL.NUM_CLASSES),
+                self.ssd_data['labels']) * 4.),
+            ('bbox_loss', self.bbox_loss(
+                bbox_pred,
+                self.ssd_data['bbox_targets'],
+                self.ssd_data['bbox_inside_weights'],
+                self.ssd_data['bbox_outside_weights'],
+            )),
+        ])

    def forward(self, *args, **kwargs):
        prior_boxes = self.prior_box_layer(kwargs['features'])
        cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
        cls_score, bbox_pred = cls_score.float(), bbox_pred.float()

-        outputs = collections.OrderedDict({
-            'prior_boxes': prior_boxes,
-            'bbox_pred': bbox_pred,
-        })
+        outputs = collections.OrderedDict([
+            ('bbox_pred', bbox_pred),
+            ('prior_boxes', prior_boxes),
+        ])

        if self.training:
            outputs.update(

--- a/lib/nms/nms_wrapper.py
+++ b/lib/nms/nms_wrapper.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# Codes are based on:
-#
-#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/nms_wrapper.py>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from lib.core.config import cfg
-from lib.utils import logger
-
-try:
-    from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms
-except ImportError as e:
-    print('Failed to import cpu nms. Error: {0}'.format(str(e)))
-
-try:
-    from lib.nms.gpu_nms import gpu_nms
-except ImportError as e:
-    print('Failed to import gpu nms. Error: {0}'.format(str(e)))
-
-
-def nms(detections, thresh, force_cpu=False):
-    """Perform either CPU or GPU Hard-NMS."""
-    if detections.shape[0] == 0:
-        return []
-    if cfg.USE_GPU_NMS and not force_cpu:
-        return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
-    else:
-        return cpu_nms(detections, thresh)
-
-
-def soft_nms(
-    detections,
-    thresh,
-    method='linear',
-    sigma=0.5,
-    score_thresh=0.001,
-):
-    """Perform CPU Soft-NMS."""
-    if detections.shape[0] == 0:
-        return []
-    methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
-    if method not in methods:
-        logger.fatal('Unknown soft nms method: {}'.format(method))
-    return cpu_soft_nms(
-        detections,
-        thresh,
-        methods[method],
-        sigma,
-        score_thresh,
-    )
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/nms_wrapper.py>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from lib.core.config import cfg
+from lib.utils import logger
+
+try:
+    from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms
+except ImportError as e:
+    print('Failed to import cpu nms. Error: {0}'.format(str(e)))
+
+try:
+    from lib.nms.gpu_nms import gpu_nms
+except ImportError as e:
+    print('Failed to import gpu nms. Error: {0}'.format(str(e)))
+
+
+def nms(detections, thresh, force_cpu=False):
+    """Perform either CPU or GPU Hard-NMS."""
+    if detections.shape[0] == 0:
+        return []
+    if cfg.USE_GPU_NMS and not force_cpu:
+        return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
+    else:
+        return cpu_nms(detections, thresh)
+
+
+def soft_nms(
+    detections,
+    thresh,
+    method='linear',
+    sigma=0.5,
+    score_thresh=0.001,
+):
+    """Perform CPU Soft-NMS."""
+    if detections.shape[0] == 0:
+        return []
+    methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
+    if method not in methods:
+        logger.fatal('Unknown soft nms method: {}'.format(method))
+    return cpu_soft_nms(
+        detections,
+        thresh,
+        methods[method],
+        sigma,
+        score_thresh,
+    )
--- a/lib/proto/__init__.py
+++ b/lib/proto/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
\ No newline at end of file
--- a/lib/proto/anno.proto
+++ b/lib/proto/anno.proto
-syntax = "proto2";
-
-message Datum {
-  optional int32 channels = 1;
-  optional int32 height = 2;
-  optional int32 width = 3;
-  optional bytes data = 4;
-  optional int32 label = 5;
-  repeated float float_data = 6;
-  optional bool encoded = 7 [default = false];
-  repeated int32 labels = 8;
-}
-
-message Annotation {
-  optional float x1 = 1;
-  optional float y1 = 2;
-  optional float x2 = 3;
-  optional float y2 = 4;
-  optional string name = 5;
-  optional bool difficult = 6 [default = false];
-  optional string mask = 7;
-}
-
-message AnnotatedDatum {
-  optional Datum datum = 1;
-  optional string filename = 2;
-  repeated Annotation annotation = 3;
-}
--- a/lib/pycocotools/.gitignore
+++ b/lib/pycocotools/.gitignore
+_mask.c
--- a/lib/retinanet/__init__.py
+++ b/lib/retinanet/__init__.py
@@ -13,5 +13,5 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from lib.faster_rcnn.layers.data_layer import DataLayer
-from lib.retinanet.layers.anchor_target_layer import AnchorTargetLayer
+from lib.faster_rcnn.data_layer import DataLayer
+from lib.retinanet.anchor_target_layer import AnchorTargetLayer
--- a/lib/retinanet/layers/anchor_target_layer.py
+++ b/lib/retinanet/layers/anchor_target_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import dragon.vm.torch as torch
-import numpy as np
-
-from lib.core.config import cfg
-from lib.faster_rcnn.generate_anchors import generate_anchors_v2
-from lib.utils import logger
-from lib.utils.blob import blob_to_tensor
-from lib.utils.boxes import bbox_transform
-from lib.utils.boxes import dismantle_gt_boxes
-from lib.utils.cython_bbox import bbox_overlaps
-
-
-class AnchorTargetLayer(torch.nn.Module):
-    """Assign anchors to ground-truth targets."""
-
-    def __init__(self):
-        super(AnchorTargetLayer, self).__init__()
-        # Load the basic configs
-        k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
-        scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
-        anchor_scale = cfg.RETINANET.ANCHOR_SCALE
-        self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
-        self.ratios = cfg.RETINANET.ASPECT_RATIOS
-        # Generate base anchors
-        self.base_anchors = []
-        for stride in self.strides:
-            sizes = [stride * anchor_scale *
-                     (2 ** (octave / float(scales_per_octave)))
-                     for octave in range(scales_per_octave)]
-            self.base_anchors.append(
-                generate_anchors_v2(
-                    stride=stride,
-                    ratios=self.ratios,
-                    sizes=sizes,
-                ))
-
-    def forward(self, features, gt_boxes, ims_info):
-        """Produces anchor classification labels and bounding-box regression targets."""
-        num_images = cfg.TRAIN.IMS_PER_BATCH
-        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
-
-        if len(gt_boxes_wide) != num_images:
-            logger.fatal(
-                'Input {} images, got {} slices of gt boxes.'
-                .format(num_images, len(gt_boxes_wide))
-            )
-
-        # Generate proposals from shifted anchors
-        all_anchors, total_anchors = [], 0
-        for i in range(len(self.strides)):
-            height, width = features[i].shape[-2:]
-            shift_x = np.arange(0, width) * self.strides[i]
-            shift_y = np.arange(0, height) * self.strides[i]
-            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
-            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
-                                shift_x.ravel(), shift_y.ravel())).transpose()
-            # Add A anchors (1, A, 4) to
-            # cell K shifts (K, 1, 4) to get
-            # shift anchors (K, A, 4)
-            # Reshape to (K * A, 4) shifted anchors
-            A = self.base_anchors[i].shape[0]
-            K = shifts.shape[0]
-            anchors = (self.base_anchors[i].reshape((1, A, 4)) +
-                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
-            # [K, A, 4] -> [A, K, 4]
-            anchors = anchors.transpose((1, 0, 2))
-            anchors = anchors.reshape((A * K, 4))
-            all_anchors.append(anchors)
-            total_anchors += anchors.shape[0]
-
-        all_anchors = np.concatenate(all_anchors, axis=0)
-
-        # label: 1 is positive, 0 is negative, -1 is don't care
-        labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
-        bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
-        bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
-        bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
-
-        anchors = all_anchors
-        inds_inside = np.arange(all_anchors.shape[0])
-        num_inside = len(inds_inside)
-
-        for ix in range(num_images):
-            # GT boxes (x1, y1, x2, y2, label)
-            gt_boxes = gt_boxes_wide[ix]
-
-            # label: 1 is positive, 0 is negative, -1 is don't care
-            labels = np.empty((num_inside,), dtype=np.float32)
-            labels.fill(-1)
-
-            # Overlaps between the anchors and the gt boxes
-            overlaps = bbox_overlaps(
-                np.ascontiguousarray(anchors, dtype=np.float),
-                np.ascontiguousarray(gt_boxes, dtype=np.float),
-            )
-            argmax_overlaps = overlaps.argmax(axis=1)
-            max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
-
-            # fg label: for each gt, anchor with highest overlap
-            gt_argmax_overlaps = overlaps.argmax(axis=0)
-            gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
-            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
-            gt_inds = argmax_overlaps[gt_argmax_overlaps]
-            labels[gt_argmax_overlaps] = gt_boxes[gt_inds, 4]
-
-            # fg label: above threshold IOU
-            inds = max_overlaps >= cfg.RETINANET.POSITIVE_OVERLAP
-            gt_inds = argmax_overlaps[inds]
-            labels[inds] = gt_boxes[gt_inds, 4]
-            fg_inds = np.where(labels > 0)[0]
-
-            # bg label: below threshold IOU
-            labels[max_overlaps < cfg.RETINANET.NEGATIVE_OVERLAP] = 0
-
-            bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_targets[fg_inds, :] = bbox_transform(
-                anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], :4])
-            bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_inside_weights[fg_inds, :] = np.array((1., 1., 1., 1.))
-
-            bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1)
-
-            labels_wide[ix, inds_inside] = labels
-            bbox_targets_wide[ix, inds_inside] = bbox_targets
-            bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
-            bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
-
-        labels = labels_wide.reshape((num_images, total_anchors))
-        bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
-        bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
-        bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
-
-        return {
-            'labels': blob_to_tensor(labels),
-            'bbox_targets': blob_to_tensor(bbox_targets),
-            'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
-            'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
-        }
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dragon.vm.torch as torch
+import numpy as np
+
+from lib.core.config import cfg
+from lib.faster_rcnn.generate_anchors import generate_anchors_v2
+from lib.utils import logger
+from lib.utils.blob import blob_to_tensor
+from lib.utils.boxes import bbox_transform
+from lib.utils.boxes import dismantle_gt_boxes
+from lib.utils.cython_bbox import bbox_overlaps
+
+
+class AnchorTargetLayer(torch.nn.Module):
+    """Assign anchors to ground-truth targets."""
+
+    def __init__(self):
+        super(AnchorTargetLayer, self).__init__()
+        # Load the basic configs
+        k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
+        scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
+        anchor_scale = cfg.RETINANET.ANCHOR_SCALE
+        self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
+        self.ratios = cfg.RETINANET.ASPECT_RATIOS
+        # Generate base anchors
+        self.base_anchors = []
+        for stride in self.strides:
+            sizes = [stride * anchor_scale *
+                     (2 ** (octave / float(scales_per_octave)))
+                     for octave in range(scales_per_octave)]
+            self.base_anchors.append(
+                generate_anchors_v2(
+                    stride=stride,
+                    ratios=self.ratios,
+                    sizes=sizes,
+                ))
+
+    def forward(self, features, gt_boxes, ims_info):
+        """Produces anchor classification labels and bounding-box regression targets."""
+        num_images = cfg.TRAIN.IMS_PER_BATCH
+        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
+
+        if len(gt_boxes_wide) != num_images:
+            logger.fatal(
+                'Input {} images, got {} slices of gt boxes.'
+                .format(num_images, len(gt_boxes_wide))
+            )
+
+        # Generate proposals from shifted anchors
+        all_anchors, total_anchors = [], 0
+        for i in range(len(self.strides)):
+            height, width = features[i].shape[-2:]
+            shift_x = np.arange(0, width) * self.strides[i]
+            shift_y = np.arange(0, height) * self.strides[i]
+            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                                shift_x.ravel(), shift_y.ravel())).transpose()
+            # Add A anchors (1, A, 4) to
+            # cell K shifts (K, 1, 4) to get
+            # shift anchors (K, A, 4)
+            # Reshape to (K * A, 4) shifted anchors
+            A = self.base_anchors[i].shape[0]
+            K = shifts.shape[0]
+            anchors = (self.base_anchors[i].reshape((1, A, 4)) +
+                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
+            # [K, A, 4] -> [A, K, 4]
+            anchors = anchors.transpose((1, 0, 2))
+            anchors = anchors.reshape((A * K, 4))
+            all_anchors.append(anchors)
+            total_anchors += anchors.shape[0]
+
+        all_anchors = np.concatenate(all_anchors, axis=0)
+
+        # label: 1 is positive, 0 is negative, -1 is don't care
+        labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
+        bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
+        bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
+        bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
+
+        anchors = all_anchors
+        inds_inside = np.arange(all_anchors.shape[0])
+        num_inside = len(inds_inside)
+
+        for ix in range(num_images):
+            # GT boxes (x1, y1, x2, y2, label)
+            gt_boxes = gt_boxes_wide[ix]
+
+            # label: 1 is positive, 0 is negative, -1 is don't care
+            labels = np.empty((num_inside,), dtype=np.float32)
+            labels.fill(-1)
+
+            # Overlaps between the anchors and the gt boxes
+            overlaps = bbox_overlaps(
+                np.ascontiguousarray(anchors, dtype=np.float),
+                np.ascontiguousarray(gt_boxes, dtype=np.float),
+            )
+            argmax_overlaps = overlaps.argmax(axis=1)
+            max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
+
+            # fg label: for each gt, anchor with highest overlap
+            gt_argmax_overlaps = overlaps.argmax(axis=0)
+            gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
+            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
+            gt_inds = argmax_overlaps[gt_argmax_overlaps]
+            labels[gt_argmax_overlaps] = gt_boxes[gt_inds, 4]
+
+            # fg label: above threshold IOU
+            inds = max_overlaps >= cfg.RETINANET.POSITIVE_OVERLAP
+            gt_inds = argmax_overlaps[inds]
+            labels[inds] = gt_boxes[gt_inds, 4]
+            fg_inds = np.where(labels > 0)[0]
+
+            # bg label: below threshold IOU
+            labels[max_overlaps < cfg.RETINANET.NEGATIVE_OVERLAP] = 0
+
+            bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_targets[fg_inds, :] = bbox_transform(
+                anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], :4])
+            bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_inside_weights[fg_inds, :] = np.array((1., 1., 1., 1.))
+
+            bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1)
+
+            labels_wide[ix, inds_inside] = labels
+            bbox_targets_wide[ix, inds_inside] = bbox_targets
+            bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
+            bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
+
+        labels = labels_wide.reshape((num_images, total_anchors))
+        bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
+        bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
+        bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
+
+        return {
+            'labels': blob_to_tensor(labels),
+            'bbox_targets': blob_to_tensor(bbox_targets),
+            'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
+            'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
+        }
--- a/lib/retinanet/layers/__init__.py
+++ b/lib/retinanet/layers/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
--- a/lib/retinanet/test.py
+++ b/lib/retinanet/test.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import dragon.vm.torch as torch
-import numpy as np
-
-from lib.core.config import cfg
-from lib.nms.nms_wrapper import nms
-from lib.nms.nms_wrapper import soft_nms
-from lib.utils.blob import im_list_to_blob
-from lib.utils.blob import tensor_to_blob
-from lib.utils.image import scale_image
-from lib.utils.timer import Timer
-from lib.utils.vis import vis_one_image
-
-
-def im_detect(detector, raw_image):
-    """Detect a image, with single or multiple scales."""
-    # Prepare images
-    ims, ims_scale = scale_image(raw_image)
-
-    # Prepare blobs
-    blobs = {'data': im_list_to_blob(ims)}
-    blobs['ims_info'] = np.array([
-        list(blobs['data'].shape[1:3]) + [im_scale]
-        for im_scale in ims_scale], dtype=np.float32,
-    )
-    blobs['data'] = torch.from_numpy(blobs['data'])
-
-    # Do Forward
-    with torch.no_grad():
-        outputs = detector.forward(inputs=blobs)
-
-    # Unpack results
-    return tensor_to_blob(outputs['detections'])[:, 1:]
-
-
-def ims_detect(detector, raw_images):
-    """Detect images, with single or multiple scales."""
-    # Prepare images
-    ims, ims_scale = scale_image(raw_images[0])
-    num_scales = len(ims_scale)
-    ims_shape = [im.shape for im in raw_images]
-    for item_idx in range(1, len(raw_images)):
-        ims_ext, ims_scale_ext = scale_image(raw_images[item_idx])
-        ims += ims_ext
-        ims_scale += ims_scale_ext
-
-    # Prepare blobs
-    blobs = {'data': im_list_to_blob(ims)}
-    blobs['ims_info'] = np.array([
-        list(blobs['data'].shape[1:3]) + [im_scale]
-        for im_scale in ims_scale], dtype=np.float32,
-    )
-    blobs['data'] = torch.from_numpy(blobs['data'])
-
-    # Do Forward
-    with torch.no_grad():
-        outputs = detector.forward(inputs=blobs)
-
-    # Unpack results
-    results = tensor_to_blob(outputs['detections'])
-    detections_wide = [[] for _ in range(len(ims_shape))]
-
-    for i in range(len(ims)):
-        indices = np.where(results[:, 0].astype(np.int32) == i)[0]
-        detections = results[indices, 1:]
-        detections_wide[i // num_scales].append(detections)
-
-    for i in range(len(ims_shape)):
-        detections_wide[i] = np.vstack(detections_wide[i]) \
-            if len(detections_wide[i]) > 1 else detections_wide[i][0]
-
-    return detections_wide
-
-
-def test_net(net, server):
-    # Load settings
-    classes = server.classes
-    num_images = server.num_images
-    num_classes = server.num_classes
-    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
-
-    _t = {'im_detect': Timer(), 'misc': Timer()}
-
-    for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
-        # Collect raw images and ground-truths
-        image_ids, raw_images = [], []
-        for item_idx in range(cfg.TEST.IMS_PER_BATCH):
-            if batch_idx + item_idx >= num_images: continue
-            image_id, raw_image = server.get_image()
-            image_ids.append(image_id)
-            raw_images.append(raw_image)
-
-        # Run detecting on specific scales
-        _t['im_detect'].tic()
-        if cfg.TEST.IMS_PER_BATCH > 1:
-            results = ims_detect(net, raw_images)
-        else:
-            results = [im_detect(net, raw_images[0])]
-        _t['im_detect'].toc()
-
-        # Post-Processing
-        _t['misc'].tic()
-        for item_idx, detections in enumerate(results):
-            i = batch_idx + item_idx
-            boxes_this_image = [[]]
-            # {x1, y1, x2, y2, score, cls}
-            detections = np.array(detections)
-            for j in range(1, num_classes):
-                cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0]
-                cls_boxes = detections[cls_indices, 0:4]
-                cls_scores = detections[cls_indices, 4]
-                cls_detections = np.hstack((
-                    cls_boxes, cls_scores[:, np.newaxis])) \
-                    .astype(np.float32, copy=False)
-                if cfg.TEST.USE_SOFT_NMS:
-                    keep = soft_nms(
-                        cls_detections,
-                        cfg.TEST.NMS,
-                        method=cfg.TEST.SOFT_NMS_METHOD,
-                        sigma=cfg.TEST.SOFT_NMS_SIGMA,
-                    )
-                else:
-                    keep = nms(
-                        cls_detections,
-                        cfg.TEST.NMS,
-                        force_cpu=True,
-                    )
-                cls_detections = cls_detections[keep, :]
-                all_boxes[j][i] = cls_detections
-                boxes_this_image.append(cls_detections)
-
-            if cfg.VIS or cfg.VIS_ON_FILE:
-                vis_one_image(
-                    raw_images[item_idx],
-                    classes,
-                    boxes_this_image,
-                    thresh=cfg.VIS_TH,
-                    box_alpha=1.,
-                    show_class=True,
-                    filename=server.get_save_filename(image_ids[item_idx]),
-                )
-
-            # Limit to max_per_image detections *over all classes*
-            if cfg.TEST.DETECTIONS_PER_IM > 0:
-                image_scores = []
-                for j in range(1, num_classes):
-                    if len(all_boxes[j][i]) < 1:
-                        continue
-                    image_scores.append(all_boxes[j][i][:, -1])
-                if len(image_scores) > 0:
-                    image_scores = np.hstack(image_scores)
-                if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
-                    image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
-                    for j in range(1, num_classes):
-                        keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
-                        all_boxes[j][i] = all_boxes[j][i][keep, :]
-        _t['misc'].toc()
-
-        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
-              .format(batch_idx + cfg.TEST.IMS_PER_BATCH,
-                      num_images, _t['im_detect'].average_time,
-                      _t['misc'].average_time), end='')
-
-    print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
-
-    print('Evaluating detections')
-    server.evaluate_detections(all_boxes)
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dragon.vm.torch as torch
+import numpy as np
+
+from lib.core.config import cfg
+from lib.nms.nms_wrapper import nms
+from lib.nms.nms_wrapper import soft_nms
+from lib.utils.blob import im_list_to_blob
+from lib.utils.blob import tensor_to_blob
+from lib.utils.image import scale_image
+from lib.utils.timer import Timer
+from lib.utils.vis import vis_one_image
+
+
+def im_detect(detector, raw_image):
+    """Detect a image, with single or multiple scales."""
+    # Prepare images
+    ims, ims_scale = scale_image(raw_image)
+
+    # Prepare blobs
+    blobs = {'data': im_list_to_blob(ims)}
+    blobs['ims_info'] = np.array([
+        list(blobs['data'].shape[1:3]) + [im_scale]
+        for im_scale in ims_scale], dtype=np.float32,
+    )
+    blobs['data'] = torch.from_numpy(blobs['data'])
+
+    # Do Forward
+    with torch.no_grad():
+        outputs = detector.forward(inputs=blobs)
+
+    # Unpack results
+    return tensor_to_blob(outputs['detections'])[:, 1:]
+
+
+def ims_detect(detector, raw_images):
+    """Detect images, with single or multiple scales."""
+    # Prepare images
+    ims, ims_scale = scale_image(raw_images[0])
+    num_scales = len(ims_scale)
+    ims_shape = [im.shape for im in raw_images]
+    for item_idx in range(1, len(raw_images)):
+        ims_ext, ims_scale_ext = scale_image(raw_images[item_idx])
+        ims += ims_ext
+        ims_scale += ims_scale_ext
+
+    # Prepare blobs
+    blobs = {'data': im_list_to_blob(ims)}
+    blobs['ims_info'] = np.array([
+        list(blobs['data'].shape[1:3]) + [im_scale]
+        for im_scale in ims_scale], dtype=np.float32,
+    )
+    blobs['data'] = torch.from_numpy(blobs['data'])
+
+    # Do Forward
+    with torch.no_grad():
+        outputs = detector.forward(inputs=blobs)
+
+    # Unpack results
+    results = tensor_to_blob(outputs['detections'])
+    detections_wide = [[] for _ in range(len(ims_shape))]
+
+    for i in range(len(ims)):
+        indices = np.where(results[:, 0].astype(np.int32) == i)[0]
+        detections = results[indices, 1:]
+        detections_wide[i // num_scales].append(detections)
+
+    for i in range(len(ims_shape)):
+        detections_wide[i] = np.vstack(detections_wide[i]) \
+            if len(detections_wide[i]) > 1 else detections_wide[i][0]
+
+    return detections_wide
+
+
+def test_net(net, server):
+    # Load settings
+    classes = server.classes
+    num_images = server.num_images
+    num_classes = server.num_classes
+    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
+
+    _t = {'im_detect': Timer(), 'misc': Timer()}
+
+    for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
+        # Collect raw images and ground-truths
+        image_ids, raw_images = [], []
+        for item_idx in range(cfg.TEST.IMS_PER_BATCH):
+            if batch_idx + item_idx >= num_images: continue
+            image_id, raw_image = server.get_image()
+            image_ids.append(image_id)
+            raw_images.append(raw_image)
+
+        # Run detecting on specific scales
+        _t['im_detect'].tic()
+        if cfg.TEST.IMS_PER_BATCH > 1:
+            results = ims_detect(net, raw_images)
+        else:
+            results = [im_detect(net, raw_images[0])]
+        _t['im_detect'].toc()
+
+        # Post-Processing
+        _t['misc'].tic()
+        for item_idx, detections in enumerate(results):
+            i = batch_idx + item_idx
+            boxes_this_image = [[]]
+            # {x1, y1, x2, y2, score, cls}
+            detections = np.array(detections)
+            for j in range(1, num_classes):
+                cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0]
+                cls_boxes = detections[cls_indices, 0:4]
+                cls_scores = detections[cls_indices, 4]
+                cls_detections = np.hstack((
+                    cls_boxes, cls_scores[:, np.newaxis])) \
+                    .astype(np.float32, copy=False)
+                if cfg.TEST.USE_SOFT_NMS:
+                    keep = soft_nms(
+                        cls_detections,
+                        cfg.TEST.NMS,
+                        method=cfg.TEST.SOFT_NMS_METHOD,
+                        sigma=cfg.TEST.SOFT_NMS_SIGMA,
+                    )
+                else:
+                    keep = nms(
+                        cls_detections,
+                        cfg.TEST.NMS,
+                        force_cpu=True,
+                    )
+                cls_detections = cls_detections[keep, :]
+                all_boxes[j][i] = cls_detections
+                boxes_this_image.append(cls_detections)
+
+            if cfg.VIS or cfg.VIS_ON_FILE:
+                vis_one_image(
+                    raw_images[item_idx],
+                    classes,
+                    boxes_this_image,
+                    thresh=cfg.VIS_TH,
+                    box_alpha=1.,
+                    show_class=True,
+                    filename=server.get_save_filename(image_ids[item_idx]),
+                )
+
+            # Limit to max_per_image detections *over all classes*
+            if cfg.TEST.DETECTIONS_PER_IM > 0:
+                image_scores = []
+                for j in range(1, num_classes):
+                    if len(all_boxes[j][i]) < 1:
+                        continue
+                    image_scores.append(all_boxes[j][i][:, -1])
+                if len(image_scores) > 0:
+                    image_scores = np.hstack(image_scores)
+                if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
+                    image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
+                    for j in range(1, num_classes):
+                        keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
+                        all_boxes[j][i] = all_boxes[j][i][keep, :]
+        _t['misc'].toc()
+
+        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
+              .format(batch_idx + cfg.TEST.IMS_PER_BATCH,
+                      num_images,
+                      _t['im_detect'].average_time,
+                      _t['misc'].average_time),
+              end='')
+
+    print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
+
+    print('Evaluating detections')
+    server.evaluate_detections(all_boxes)
--- a/lib/ssd/__init__.py
+++ b/lib/ssd/__init__.py
@@ -13,8 +13,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from lib.ssd.layers.data_layer import DataLayer
-from lib.ssd.layers.hard_mining_layer import HardMiningLayer
-from lib.ssd.layers.multibox_layer import MultiBoxMatchLayer
-from lib.ssd.layers.multibox_layer import MultiBoxTargetLayer
-from lib.ssd.layers.priorbox_layer import PriorBoxLayer
+from lib.ssd.data_layer import DataLayer
+from lib.ssd.hard_mining_layer import HardMiningLayer
+from lib.ssd.multibox_layer import MultiBoxMatchLayer
+from lib.ssd.multibox_layer import MultiBoxTargetLayer
+from lib.ssd.priorbox_layer import PriorBoxLayer
--- a/lib/ssd/data/cat.jpg
+++ b/lib/ssd/data/cat.jpg
--- a/lib/ssd/data/__init__.py
+++ b/lib/ssd/data/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
\ No newline at end of file
--- a/lib/ssd/data/blob_fetcher.py
+++ b/lib/ssd/data/blob_fetcher.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import multiprocessing
-import numpy as np
-
-from lib.core.config import cfg
-
-
-class BlobFetcher(multiprocessing.Process):
-    def __init__(self, **kwargs):
-        super(BlobFetcher, self).__init__()
-        self._img_blob_size = (
-            cfg.TRAIN.IMS_PER_BATCH,
-            cfg.SSD.RESIZE.HEIGHT,
-            cfg.SSD.RESIZE.WIDTH, 3,
-        )
-        self.q_in = self.q_out = None
-        self.daemon = True
-
-    def get(self):
-        img_blob, boxes_blob = np.zeros(self._img_blob_size, 'uint8'), []
-
-        for i in range(cfg.TRAIN.IMS_PER_BATCH):
-            img_blob[i], gt_boxes = self.q_in.get()
-            # Pack the boxes by adding the index of images
-            boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), np.float32)
-            boxes[:, :gt_boxes.shape[1]] = gt_boxes
-            boxes[:, -1] = i
-            boxes_blob.append(boxes)
-
-        return {
-            'data': img_blob,
-            'gt_boxes': np.concatenate(boxes_blob, 0),
-        }
-
-    def run(self):
-        while True:
-            self.q_out.put(self.get())
--- a/lib/ssd/data/data_batch.py
+++ b/lib/ssd/data/data_batch.py
@@ -13,54 +13,69 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from multiprocessing import Queue
+import multiprocessing as mp
 import time

 import dragon
-import pprint
+import dragon.vm.torch as torch
+import numpy as np

-from lib.faster_rcnn.data.data_reader import DataReader
-from lib.ssd.data.data_transformer import DataTransformer
-from lib.ssd.data.blob_fetcher import BlobFetcher
+from lib.core.config import cfg
+from lib.datasets.factory import get_imdb
+from lib.ssd.data_transformer import DataTransformer
 from lib.utils import logger


-class DataBatch(object):
-    """DataBatch aims to prefetch data by ``Triple-Buffering``.
+class DataLayer(torch.nn.Module):
+    """Generate a mini-batch of data."""

-    It takes full advantages of the Process/Thread of Python,
+    def __init__(self):
+        super(DataLayer, self).__init__()
+        database = get_imdb(cfg.TRAIN.DATABASE)
+        self.data_batch = DataBatch(**{
+            'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
+            'classes': database.classes,
+            'shuffle': cfg.TRAIN.USE_SHUFFLE,
+            'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
+            'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
+        })

-    which provides remarkable I/O speed up for scalable distributed training.
+    def forward(self):
+        # Get an array blob from the Queue
+        outputs = self.data_batch.get()
+        # Zero-Copy the array to tensor
+        outputs['data'] = torch.from_numpy(outputs['data'])
+        return outputs
+
+
+class DataBatch(mp.Process):
+    """Prefetch the batch of data."""

-    """
    def __init__(self, **kwargs):
        """Construct a ``DataBatch``.

        Parameters
        ----------
-        source : str
-            The path of database.
+        dataset : lambda
+            The creator of a dataset.
        shuffle : bool, optional, default=False
            Whether to shuffle the data.
-        num_chunks : int, optional, default=2048
+        num_chunks : int, optional, default=0
            The number of chunks to split.
-        batch_size : int, optional, default=128
+        batch_size : int, optional, default=32
            The size of a mini-batch.
        prefetch : int, optional, default=5
            The prefetch count.

        """
        super(DataBatch, self).__init__()
-        # Init mpi
-        global_rank, local_rank, group_size = 0, 0, 1
-        if dragon.mpi.is_init():
-            group = dragon.mpi.is_parallel()
-            if group is not None:  # DataParallel
-                global_rank = dragon.mpi.rank()
-                group_size = len(group)
-                for i, node in enumerate(group):
-                    if global_rank == node:
-                        local_rank = i
+        # Distributed settings
+        rank, group_size = 0, 1
+        process_group = dragon.distributed.get_default_process_group()
+        if process_group is not None and kwargs.get(
+                'phase', 'TRAIN') == 'TRAIN':
+            group_size = process_group.size
+            rank = dragon.distributed.get_rank(process_group)
        kwargs['group_size'] = group_size

        # Configuration
@@ -77,63 +92,50 @@ class DataBatch(object):
        self._num_transformers = min(
            self._num_transformers, self._max_transformers)

-        # Init queues
-        self.Q1 = Queue(self._prefetch * self._num_readers * self._batch_size)
-        self.Q2 = Queue(self._prefetch * self._num_readers * self._batch_size)
-        self.Q3 = Queue(self._prefetch * self._num_readers)
+        # Initialize queues
+        num_batches = self._prefetch * self._num_readers
+        self.Q1 = mp.Queue(num_batches * self._batch_size)
+        self.Q2 = mp.Queue(num_batches * self._batch_size)
+        self.Q3 = mp.Queue(num_batches)

-        # Init readers
+        # Initialize readers
        self._readers = []
        for i in range(self._num_readers):
-            self._readers.append(DataReader(**kwargs))
-            self._readers[-1].q_out = self.Q1
-
-        for i in range(self._num_readers):
            part_idx, num_parts = i, self._num_readers
            num_parts *= group_size
-            part_idx += local_rank * self._num_readers
-            self._readers[i]._num_parts = num_parts
-            self._readers[i]._part_idx = part_idx
-            self._readers[i]._rng_seed += part_idx
+            part_idx += rank * self._num_readers
+            self._readers.append(dragon.io.DataReader(
+                num_parts=num_parts, part_idx=part_idx, **kwargs))
+            self._readers[i]._seed += part_idx
+            self._readers[i].q_out = self.Q1
            self._readers[i].start()
            time.sleep(0.1)

-        # Init transformers
+        # Initialize transformers
        self._transformers = []
        for i in range(self._num_transformers):
            transformer = DataTransformer(**kwargs)
-            transformer._rng_seed += (i + local_rank * self._num_transformers)
-            transformer.q_in = self.Q1
-            transformer.q_out = self.Q2
+            transformer._rng_seed += (i + rank * self._num_transformers)
+            transformer.q_in, transformer.q_out = self.Q1, self.Q2
            transformer.start()
            self._transformers.append(transformer)
            time.sleep(0.1)

-        # Init blob fetchers
-        self._fetchers = []
-        for i in range(self._num_fetchers):
-            fetcher = BlobFetcher(**kwargs)
-            fetcher.q_in = self.Q2
-            fetcher.q_out = self.Q3
-            fetcher.start()
-            self._fetchers.append(fetcher)
-            time.sleep(0.1)
-
-        # Prevent to echo multiple nodes
-        if local_rank == 0:
-            self.echo()
+        # Initialize batch-producer
+        self.start()

+        # Register cleanup callbacks
        def cleanup():
            def terminate(processes):
                for process in processes:
                    process.terminate()
                    process.join()
-            terminate(self._fetchers)
-            logger.info('Terminating BlobFetcher ......')
+            terminate([self])
+            logger.info('Terminate DataBatch.')
            terminate(self._transformers)
-            logger.info('Terminating DataTransformer ......')
+            logger.info('Terminate DataTransformer.')
            terminate(self._readers)
-            logger.info('Terminating DataReader......')
+            logger.info('Terminate DataReader.')

        import atexit
        atexit.register(cleanup)
@@ -149,14 +151,24 @@ class DataBatch(object):
        """
        return self.Q3.get()

-    def echo(self):
-        """Print I/O Information."""
-        print('---------------------------------------------------------')
-        print('BatchFetcher({} Threads), Using config:'.format(
-            self._num_readers + self._num_transformers + self._num_fetchers))
-        params = {'queue_size': self._prefetch,
-                  'n_readers': self._num_readers,
-                  'n_transformers': self._num_transformers,
-                  'n_fetchers': self._num_fetchers}
-        pprint.pprint(params)
-        print('---------------------------------------------------------')
+    def run(self):
+        """Start the process to produce batches."""
+        image_batch_shape = (
+            cfg.TRAIN.IMS_PER_BATCH,
+            cfg.SSD.RESIZE.HEIGHT,
+            cfg.SSD.RESIZE.WIDTH, 3,
+        )
+
+        while True:
+            boxes_to_pack = []
+            image_batch = np.zeros(image_batch_shape, 'uint8')
+            for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
+                image_batch[image_index], gt_boxes = self.Q2.get()
+                boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
+                boxes[:, :gt_boxes.shape[1]], boxes[:, -1] = gt_boxes, image_index
+                boxes_to_pack.append(boxes)
+            self.Q3.put({
+                'data': image_batch,
+                'gt_boxes': np.concatenate(boxes_to_pack),
+            })
+
--- a/lib/ssd/data/data_transformer.py
+++ b/lib/ssd/data/data_transformer.py
@@ -13,14 +13,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import cv2
 import multiprocessing
+
+import cv2
 import numpy as np

 from lib.core.config import cfg
-from lib.proto import anno_pb2 as pb
-from lib.ssd.data import transforms
-from lib.utils import logger
+from lib.ssd import transforms
+from lib.utils.boxes import flip_boxes


 class DataTransformer(multiprocessing.Process):
@@ -41,38 +41,41 @@ class DataTransformer(multiprocessing.Process):
        self.q_in = self.q_out = None
        self.daemon = True

-    def make_roi_dict(self, ann_datum, flip=False):
-        annotations = ann_datum.annotation
+    def make_roi_dict(self, example, flip=False):
        n_objects = 0
        if not self._use_diff:
-            for ann in annotations:
-                if not ann.difficult: n_objects += 1
-        else: n_objects = len(annotations)
+            for obj in example['object']:
+                if obj.get('difficult', 0) == 0:
+                    n_objects += 1
+        else:
+            n_objects = len(example['object'])

        roi_dict = {
-            'width': ann_datum.datum.width,
-            'height': ann_datum.datum.height,
-            'gt_classes': np.zeros((n_objects,), dtype=np.int32),
-            'boxes': np.zeros((n_objects, 4), dtype=np.float32),
-            'normalized_boxes': np.zeros((n_objects, 4), dtype=np.float32),
+            'width': example['width'],
+            'height': example['height'],
+            'gt_classes': np.zeros((n_objects,), 'int32'),
+            'boxes': np.zeros((n_objects, 4), 'float32'),
+            'normalized_boxes': np.zeros((n_objects, 4), 'float32'),
        }

-        rec_idx = 0
-        for ann in annotations:
-            if not self._use_diff and ann.difficult:
+        # Filter the difficult instances
+        object_idx = 0
+        for obj in example['object']:
+            if not self._use_diff and \
+                    obj.get('difficult', 0) > 0:
                continue
-            roi_dict['boxes'][rec_idx, :] = [
-                max(0, ann.x1),
-                max(0, ann.y1),
-                min(ann.x2, ann_datum.datum.width - 1),
-                min(ann.y2, ann_datum.datum.height - 1),
+            roi_dict['boxes'][object_idx, :] = [
+                max(0, obj['xmin']),
+                max(0, obj['ymin']),
+                min(obj['xmax'], example['width'] - 1),
+                min(obj['ymax'], example['height'] - 1),
            ]
-            roi_dict['gt_classes'][rec_idx] = \
-                self._class_to_ind[ann.name]
-            rec_idx += 1
+            roi_dict['gt_classes'][object_idx] = \
+                self._class_to_ind[obj['name']]
+            object_idx += 1

        if flip:
-            roi_dict['boxes'] = _flip_boxes(
+            roi_dict['boxes'] = flip_boxes(
                roi_dict['boxes'], roi_dict['width'])

        roi_dict['boxes'][:, 0::2] /= roi_dict['width']
@@ -80,26 +83,19 @@ class DataTransformer(multiprocessing.Process):

        return roi_dict

-    def get(self, serialized):
-        ann_datum = pb.AnnotatedDatum()
-        ann_datum.ParseFromString(serialized)
-        img_datum = ann_datum.datum
-        img = np.fromstring(img_datum.data, np.uint8)
-        if img_datum.encoded is True:
-            img = cv2.imdecode(img, -1)
-        else:
-            h, w = img_datum.height, img_datum.width
-            img = img.reshape((h, w, img_datum.channels))
+    def get(self, example):
+        img = np.frombuffer(example['content'], np.uint8)
+        img = cv2.imdecode(img, -1)

        # Flip
        flip = False
        if self._mirror:
-            if np.random.randint(0, 2) > 0:
+            if np.random.randint(2) > 0:
                img = img[:, ::-1, :]
                flip = True

-        # Datum -> RoIDB
-        roi_dict = self.make_roi_dict(ann_datum, flip)
+        # Example -> RoIDict
+        roi_dict = self.make_roi_dict(example, flip)

        # Post-Process for gt boxes
        # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
@@ -120,19 +116,7 @@ class DataTransformer(multiprocessing.Process):
    def run(self):
        np.random.seed(self._rng_seed)
        while True:
-            serialized = self.q_in.get()
-            im, gt_boxes = self.get(serialized)
-            if len(gt_boxes) < 1:
-                continue
-            self.q_out.put((im, gt_boxes))
-
-
-def _flip_boxes(boxes, width):
-    flip_boxes = boxes.copy()
-    old_x1 = boxes[:, 0].copy()
-    old_x2 = boxes[:, 2].copy()
-    flip_boxes[:, 0] = width - old_x2 - 1
-    flip_boxes[:, 2] = width - old_x1 - 1
-    if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
-        logger.fatal('Encounter invalid coordinates after flipping boxes.')
-    return flip_boxes
+            outputs = self.get(self.q_in.get())
+            if len(outputs[1]) < 1:
+                continue  # Ignore the non-object image
+            self.q_out.put(outputs)
--- a/lib/ssd/generate_anchors.py
+++ b/lib/ssd/generate_anchors.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-
-def generate_anchors(min_sizes, max_sizes, ratios):
-    """
-    Generate anchor (reference) windows by enumerating
-    aspect ratios, min_sizes, max_sizes wrt a reference ctr (x, y, w, h).
-
-    """
-    total_anchors = []
-
-    for idx, min_size in enumerate(min_sizes):
-        # Note that SSD assume it is a ctr-anchor
-        base_anchor = np.array([0, 0, min_size, min_size])
-        anchors = _ratio_enum(base_anchor, ratios)
-        if len(max_sizes) > 0:
-            max_size = max_sizes[idx]
-            _anchors = anchors[0].reshape((1, 4))
-            _anchors = np.vstack([_anchors, _max_size_enum(
-                base_anchor, min_size, max_size)])
-            anchors = np.vstack([_anchors, anchors[1:]])
-        total_anchors.append(anchors)
-    return np.vstack(total_anchors)
-
-
-def _whctrs(anchor):
-    """Return width, height, x center, and y center for an anchor (window)."""
-    w, h = anchor[2], anchor[3]
-    x_ctr, y_ctr = anchor[0], anchor[1]
-    return w, h, x_ctr, y_ctr
-
-
-def _mkanchors(ws, hs, x_ctr, y_ctr):
-    """
-    Given a vector of widths (ws) and heights (hs) around a center
-    (x_ctr, y_ctr), output a set of anchors (windows).
-    """
-    ws = ws[:, np.newaxis]
-    hs = hs[:, np.newaxis]
-    anchors = np.hstack((x_ctr - 0.5 * ws,
-                         y_ctr - 0.5 * hs,
-                         x_ctr + 0.5 * ws,
-                         y_ctr + 0.5 * hs))
-    return anchors
-
-
-def _ratio_enum(anchor, ratios):
-    """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
-    w, h, x_ctr, y_ctr = _whctrs(anchor)
-    size = w * h
-    size_ratios = size / ratios
-    hs = np.round(np.sqrt(size_ratios))
-    ws = np.round(hs * ratios)
-    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
-    return anchors
-
-
-def _max_size_enum(base_anchor, min_size, max_size):
-    """Enumerate a anchor for max_size wrt base_anchor."""
-    w, h, x_ctr, y_ctr = _whctrs(base_anchor)
-    ws = hs = np.sqrt([min_size * max_size])
-    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
-    return anchors
-
-
-if __name__ == '__main__':
-    print(generate_anchors(min_sizes=[30], max_sizes=[60], ratios=[1, 0.5, 2]))
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+
+def generate_anchors(min_sizes, max_sizes, ratios):
+    """
+    Generate anchor (reference) windows by enumerating
+    aspect ratios, min_sizes, max_sizes wrt a reference ctr (x, y, w, h).
+
+    """
+    total_anchors = []
+
+    for idx, min_size in enumerate(min_sizes):
+        # Note that SSD assume it is a ctr-anchor
+        base_anchor = np.array([0, 0, min_size, min_size])
+        anchors = _ratio_enum(base_anchor, ratios)
+        if len(max_sizes) > 0:
+            max_size = max_sizes[idx]
+            _anchors = anchors[0].reshape((1, 4))
+            _anchors = np.vstack([_anchors, _max_size_enum(
+                base_anchor, min_size, max_size)])
+            anchors = np.vstack([_anchors, anchors[1:]])
+        total_anchors.append(anchors)
+    return np.vstack(total_anchors)
+
+
+def _whctrs(anchor):
+    """Return width, height, x center, and y center for an anchor (window)."""
+    w, h = anchor[2], anchor[3]
+    x_ctr, y_ctr = anchor[0], anchor[1]
+    return w, h, x_ctr, y_ctr
+
+
+def _mkanchors(ws, hs, x_ctr, y_ctr):
+    """
+    Given a vector of widths (ws) and heights (hs) around a center
+    (x_ctr, y_ctr), output a set of anchors (windows).
+    """
+    ws = ws[:, np.newaxis]
+    hs = hs[:, np.newaxis]
+    anchors = np.hstack((x_ctr - 0.5 * ws,
+                         y_ctr - 0.5 * hs,
+                         x_ctr + 0.5 * ws,
+                         y_ctr + 0.5 * hs))
+    return anchors
+
+
+def _ratio_enum(anchor, ratios):
+    """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
+    w, h, x_ctr, y_ctr = _whctrs(anchor)
+    size = w * h
+    size_ratios = size / ratios
+    hs = np.round(np.sqrt(size_ratios))
+    ws = np.round(hs * ratios)
+    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+    return anchors
+
+
+def _max_size_enum(base_anchor, min_size, max_size):
+    """Enumerate a anchor for max_size wrt base_anchor."""
+    w, h, x_ctr, y_ctr = _whctrs(base_anchor)
+    ws = hs = np.sqrt([min_size * max_size])
+    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+    return anchors
+
+
+if __name__ == '__main__':
+    print(generate_anchors(min_sizes=[30], max_sizes=[60], ratios=[1, 0.5, 2]))
--- a/lib/ssd/layers/hard_mining_layer.py
+++ b/lib/ssd/layers/hard_mining_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import dragon.vm.torch as torch
-import numpy as np
-
-from lib.core.config import cfg
-from lib.utils.blob import blob_to_tensor
-
-
-class HardMiningLayer(torch.nn.Module):
-    def __init__(self):
-        super(HardMiningLayer, self).__init__()
-
-    def forward(self, conf_prob, match_labels, max_overlaps):
-        # Confidence of each matched box
-        conf_prob_wide = conf_prob.numpy(True)
-        # Label of each matched box
-        match_labels_wide = match_labels
-        # Max overlaps between default boxes and gt boxes
-        max_overlaps_wide = max_overlaps
-
-        # label ``-1`` will be ignored
-        labels_wide = -np.ones(match_labels_wide.shape, dtype=np.int64)
-
-        for ix in range(match_labels_wide.shape[0]):
-            match_labels = match_labels_wide[ix]
-            max_overlaps = max_overlaps_wide[ix]
-            conf_prob = conf_prob_wide[ix]
-            conf_loss = np.zeros(match_labels.shape, dtype=np.float32)
-            inds = np.where(match_labels >= 0)[0]
-            flt_min = np.finfo(float).eps
-            # Softmax cross-entropy
-            conf_loss[inds] = -np.log(np.maximum(
-                conf_prob[inds, match_labels[inds]], flt_min))
-
-            # Filter negatives
-            fg_inds = np.where(match_labels > 0)[0]
-            neg_inds = np.where(match_labels == 0)[0]
-            neg_overlaps = max_overlaps[neg_inds]
-            eligible_neg_inds = np.where(neg_overlaps < cfg.SSD.OHEM.NEG_OVERLAP)[0]
-            sel_inds = neg_inds[eligible_neg_inds]
-
-            # Do Mining
-            sel_loss = conf_loss[sel_inds]
-            num_pos = len(fg_inds)
-            num_sel = min(int(num_pos * cfg.SSD.OHEM.NEG_POS_RATIO), len(sel_inds))
-            sorted_sel_inds = sel_inds[np.argsort(-sel_loss)]
-            bg_inds = sorted_sel_inds[:num_sel]
-            labels_wide[ix][fg_inds] = match_labels[fg_inds]  # Keep fg indices
-            labels_wide[ix][bg_inds] = 0  # Use hard negatives as bg indices
-
-        # Feed labels to compute cls loss
-        return {'labels': blob_to_tensor(labels_wide)}
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dragon.vm.torch as torch
+import numpy as np
+
+from lib.core.config import cfg
+from lib.utils.blob import blob_to_tensor
+
+
+class HardMiningLayer(torch.nn.Module):
+    def __init__(self):
+        super(HardMiningLayer, self).__init__()
+
+    def forward(self, conf_prob, match_labels, max_overlaps):
+        # Confidence of each matched box
+        conf_prob_wide = conf_prob.numpy(True)
+        # Label of each matched box
+        match_labels_wide = match_labels
+        # Max overlaps between default boxes and gt boxes
+        max_overlaps_wide = max_overlaps
+
+        # label ``-1`` will be ignored
+        labels_wide = -np.ones(match_labels_wide.shape, dtype=np.int64)
+
+        for ix in range(match_labels_wide.shape[0]):
+            match_labels = match_labels_wide[ix]
+            max_overlaps = max_overlaps_wide[ix]
+            conf_prob = conf_prob_wide[ix]
+            conf_loss = np.zeros(match_labels.shape, dtype=np.float32)
+            inds = np.where(match_labels >= 0)[0]
+            flt_min = np.finfo(float).eps
+            # Softmax cross-entropy
+            conf_loss[inds] = -np.log(np.maximum(
+                conf_prob[inds, match_labels[inds]], flt_min))
+
+            # Filter negatives
+            fg_inds = np.where(match_labels > 0)[0]
+            neg_inds = np.where(match_labels == 0)[0]
+            neg_overlaps = max_overlaps[neg_inds]
+            eligible_neg_inds = np.where(neg_overlaps < cfg.SSD.OHEM.NEG_OVERLAP)[0]
+            sel_inds = neg_inds[eligible_neg_inds]
+
+            # Do Mining
+            sel_loss = conf_loss[sel_inds]
+            num_pos = len(fg_inds)
+            num_sel = min(int(num_pos * cfg.SSD.OHEM.NEG_POS_RATIO), len(sel_inds))
+            sorted_sel_inds = sel_inds[np.argsort(-sel_loss)]
+            bg_inds = sorted_sel_inds[:num_sel]
+            labels_wide[ix][fg_inds] = match_labels[fg_inds]  # Keep fg indices
+            labels_wide[ix][bg_inds] = 0  # Use hard negatives as bg indices
+
+        # Feed labels to compute cls loss
+        return {'labels': blob_to_tensor(labels_wide)}
--- a/lib/ssd/layers/__init__.py
+++ b/lib/ssd/layers/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
\ No newline at end of file
--- a/lib/ssd/layers/data_layer.py
+++ b/lib/ssd/layers/data_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import dragon.vm.torch as torch
-
-from lib.core.config import cfg
-from lib.datasets.factory import get_imdb
-from lib.ssd.data.data_batch import DataBatch
-
-
-class DataLayer(torch.nn.Module):
-    def __init__(self):
-        super(DataLayer, self).__init__()
-        database = get_imdb(cfg.TRAIN.DATABASE)
-        self.data_batch = DataBatch(**{
-            'source': database.source,
-            'classes': database.classes,
-            'shuffle': cfg.TRAIN.USE_SHUFFLE,
-            'num_chunks': 2048,  # Chunk-Wise Shuffle
-            'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
-        })
-
-    def forward(self):
-        # Get an array blob from the Queue
-        outputs = self.data_batch.get()
-        # Zero-Copy the array to tensor
-        outputs['data'] = torch.from_numpy(outputs['data'])
-        return outputs
--- a/lib/ssd/layers/multibox_layer.py
+++ b/lib/ssd/layers/multibox_layer.py
--- a/lib/ssd/layers/priorbox_layer.py
+++ b/lib/ssd/layers/priorbox_layer.py
--- a/lib/ssd/test.py
+++ b/lib/ssd/test.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import cv2
-import dragon.vm.torch as torch
-import numpy as np
-
-from lib.core.config import cfg
-from lib.nms.nms_wrapper import nms
-from lib.nms.nms_wrapper import soft_nms
-from lib.utils.blob import tensor_to_blob
-from lib.utils.boxes import bbox_transform_inv
-from lib.utils.boxes import clip_tiled_boxes
-from lib.utils.timer import Timer
-from lib.utils.vis import vis_one_image
-
-
-def get_images(ims):
-    target_h = cfg.SSD.RESIZE.HEIGHT
-    target_w = cfg.SSD.RESIZE.WIDTH
-    processed_ims, im_scales = [], []
-    for im in ims:
-        im_scales.append((float(target_h) / im.shape[0],
-                          float(target_w) / im.shape[1]))
-        processed_ims.append(cv2.resize(im, (target_w, target_h)))
-    ims_blob = np.array(processed_ims, dtype=np.uint8)
-    return ims_blob, im_scales
-
-
-def ims_detect(detector, ims):
-    """Detect images, with the single scale."""
-    # Prepare blobs
-    data, im_scales = get_images(ims)
-    data = torch.from_numpy(data).cuda(cfg.GPU_ID)
-
-    # Do Forward
-    with torch.no_grad():
-        outputs = detector.forward(inputs={'data': data})
-
-    # Decode results
-    batch_boxes = []
-    scores = tensor_to_blob(outputs['cls_prob'])
-    prior_boxes = tensor_to_blob(outputs['prior_boxes'])
-    box_deltas = tensor_to_blob(outputs['bbox_pred'])
-    for i in range(box_deltas.shape[0]):
-        boxes = bbox_transform_inv(
-            boxes=prior_boxes,
-            deltas=box_deltas[i],
-            weights=cfg.BBOX_REG_WEIGHTS,
-        )
-        boxes[:, 0::2] /= im_scales[i][1]
-        boxes[:, 1::2] /= im_scales[i][0]
-        batch_boxes.append(clip_tiled_boxes(boxes, ims[i].shape))
-
-    return scores, batch_boxes
-
-
-def test_net(net, server):
-    # Load settings
-    classes = server.classes
-    num_images = server.num_images
-    num_classes = server.num_classes
-    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
-
-    _t = {'im_detect': Timer(), 'misc': Timer()}
-
-    for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
-        # Collect raw images and ground-truths
-        image_ids, raw_images = [], []
-
-        for item_idx in range(cfg.TEST.IMS_PER_BATCH):
-            if batch_idx + item_idx >= num_images: continue
-            image_id, raw_image = server.get_image()
-            image_ids.append(image_id)
-            raw_images.append(raw_image)
-
-        _t['im_detect'].tic()
-        batch_scores, batch_boxes = ims_detect(net, raw_images)
-        _t['im_detect'].toc()
-
-        _t['misc'].tic()
-        for item_idx in range(len(batch_scores)):
-            i = batch_idx + item_idx
-            scores = batch_scores[item_idx]
-            boxes = batch_boxes[item_idx]
-            boxes_this_image = [[]]
-            for j in range(1, num_classes):
-                inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
-                cls_scores = scores[inds, j]
-                cls_boxes = boxes[inds]
-                pre_nms_inds = np.argsort(-cls_scores)[:cfg.TEST.NMS_TOP_K]
-                cls_scores = cls_scores[pre_nms_inds]
-                cls_boxes = cls_boxes[pre_nms_inds]
-                cls_detections = np.hstack(
-                    (cls_boxes, cls_scores[:, np.newaxis])) \
-                    .astype(np.float32, copy=False)
-                if cfg.TEST.USE_SOFT_NMS:
-                    keep = soft_nms(
-                        cls_detections,
-                        cfg.TEST.NMS,
-                        method=cfg.TEST.SOFT_NMS_METHOD,
-                        sigma=cfg.TEST.SOFT_NMS_SIGMA,
-                    )
-                else:
-                    keep = nms(
-                        cls_detections,
-                        cfg.TEST.NMS,
-                        force_cpu=True,
-                    )
-                cls_detections = cls_detections[keep, :]
-                all_boxes[j][i] = cls_detections
-                boxes_this_image.append(cls_detections)
-
-            if cfg.VIS or cfg.VIS_ON_FILE:
-                vis_one_image(
-                    raw_images[item_idx],
-                    classes,
-                    boxes_this_image,
-                    thresh=cfg.VIS_TH,
-                    box_alpha=1.0,
-                    show_class=True,
-                    filename=server.get_save_filename(image_ids[item_idx]),
-                )
-
-            # Limit to max_per_image detections *over all classes*
-            if cfg.TEST.DETECTIONS_PER_IM > 0:
-                image_scores = []
-                for j in range(1, num_classes):
-                    if len(all_boxes[j][i]) < 1:
-                        continue
-                    image_scores.append(all_boxes[j][i][:, -1])
-                if len(image_scores) > 0:
-                    image_scores = np.hstack(image_scores)
-                if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
-                    image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
-                    for j in range(1, num_classes):
-                        keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
-                        all_boxes[j][i] = all_boxes[j][i][keep, :]
-        _t['misc'].toc()
-
-        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
-              .format(batch_idx + cfg.TEST.IMS_PER_BATCH,
-                      num_images, _t['im_detect'].average_time,
-                      _t['misc'].average_time), end='')
-
-    print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
-
-    print('Evaluating detections')
-    server.evaluate_detections(all_boxes)
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import cv2
+import dragon.vm.torch as torch
+import numpy as np
+
+from lib.core.config import cfg
+from lib.nms.nms_wrapper import nms
+from lib.nms.nms_wrapper import soft_nms
+from lib.utils.blob import tensor_to_blob
+from lib.utils.boxes import bbox_transform_inv
+from lib.utils.boxes import clip_tiled_boxes
+from lib.utils.timer import Timer
+from lib.utils.vis import vis_one_image
+
+
+def get_images(ims):
+    target_h = cfg.SSD.RESIZE.HEIGHT
+    target_w = cfg.SSD.RESIZE.WIDTH
+    processed_ims, im_scales = [], []
+    for im in ims:
+        im_scales.append((float(target_h) / im.shape[0],
+                          float(target_w) / im.shape[1]))
+        processed_ims.append(cv2.resize(im, (target_w, target_h)))
+    ims_blob = np.array(processed_ims, dtype=np.uint8)
+    return ims_blob, im_scales
+
+
+def ims_detect(detector, ims):
+    """Detect images, with the single scale."""
+    # Prepare blobs
+    data, im_scales = get_images(ims)
+    data = torch.from_numpy(data).cuda(cfg.GPU_ID)
+
+    # Do Forward
+    with torch.no_grad():
+        outputs = detector.forward(inputs={'data': data})
+
+    # Decode results
+    batch_boxes = []
+    scores = tensor_to_blob(outputs['cls_prob'])
+    prior_boxes = tensor_to_blob(outputs['prior_boxes'])
+    box_deltas = tensor_to_blob(outputs['bbox_pred'])
+    for i in range(box_deltas.shape[0]):
+        boxes = bbox_transform_inv(
+            boxes=prior_boxes,
+            deltas=box_deltas[i],
+            weights=cfg.BBOX_REG_WEIGHTS,
+        )
+        boxes[:, 0::2] /= im_scales[i][1]
+        boxes[:, 1::2] /= im_scales[i][0]
+        batch_boxes.append(clip_tiled_boxes(boxes, ims[i].shape))
+
+    return scores, batch_boxes
+
+
+def test_net(net, server):
+    # Load settings
+    classes = server.classes
+    num_images = server.num_images
+    num_classes = server.num_classes
+    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
+
+    _t = {'im_detect': Timer(), 'misc': Timer()}
+
+    for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
+        # Collect raw images and ground-truths
+        image_ids, raw_images = [], []
+
+        for item_idx in range(cfg.TEST.IMS_PER_BATCH):
+            if batch_idx + item_idx >= num_images: continue
+            image_id, raw_image = server.get_image()
+            image_ids.append(image_id)
+            raw_images.append(raw_image)
+
+        _t['im_detect'].tic()
+        batch_scores, batch_boxes = ims_detect(net, raw_images)
+        _t['im_detect'].toc()
+
+        _t['misc'].tic()
+        for item_idx in range(len(batch_scores)):
+            i = batch_idx + item_idx
+            scores = batch_scores[item_idx]
+            boxes = batch_boxes[item_idx]
+            boxes_this_image = [[]]
+            for j in range(1, num_classes):
+                inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
+                cls_scores = scores[inds, j]
+                cls_boxes = boxes[inds]
+                pre_nms_inds = np.argsort(-cls_scores)[:cfg.TEST.NMS_TOP_K]
+                cls_scores = cls_scores[pre_nms_inds]
+                cls_boxes = cls_boxes[pre_nms_inds]
+                cls_detections = np.hstack(
+                    (cls_boxes, cls_scores[:, np.newaxis])) \
+                    .astype(np.float32, copy=False)
+                if cfg.TEST.USE_SOFT_NMS:
+                    keep = soft_nms(
+                        cls_detections,
+                        cfg.TEST.NMS,
+                        method=cfg.TEST.SOFT_NMS_METHOD,
+                        sigma=cfg.TEST.SOFT_NMS_SIGMA,
+                    )
+                else:
+                    keep = nms(
+                        cls_detections,
+                        cfg.TEST.NMS,
+                        force_cpu=True,
+                    )
+                cls_detections = cls_detections[keep, :]
+                all_boxes[j][i] = cls_detections
+                boxes_this_image.append(cls_detections)
+
+            if cfg.VIS or cfg.VIS_ON_FILE:
+                vis_one_image(
+                    raw_images[item_idx],
+                    classes,
+                    boxes_this_image,
+                    thresh=cfg.VIS_TH,
+                    box_alpha=1.0,
+                    show_class=True,
+                    filename=server.get_save_filename(image_ids[item_idx]),
+                )
+
+            # Limit to max_per_image detections *over all classes*
+            if cfg.TEST.DETECTIONS_PER_IM > 0:
+                image_scores = []
+                for j in range(1, num_classes):
+                    if len(all_boxes[j][i]) < 1:
+                        continue
+                    image_scores.append(all_boxes[j][i][:, -1])
+                if len(image_scores) > 0:
+                    image_scores = np.hstack(image_scores)
+                if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
+                    image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
+                    for j in range(1, num_classes):
+                        keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
+                        all_boxes[j][i] = all_boxes[j][i][keep, :]
+        _t['misc'].toc()
+
+        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
+              .format(batch_idx + cfg.TEST.IMS_PER_BATCH,
+                      num_images,
+                      _t['im_detect'].average_time,
+                      _t['misc'].average_time),
+              end='')
+
+    print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
+
+    print('Evaluating detections')
+    server.evaluate_detections(all_boxes)
--- a/lib/ssd/data/transforms.py
+++ b/lib/ssd/data/transforms.py
--- a/lib/ssd/data/transforms_test.py
+++ b/lib/ssd/data/transforms_test.py
@@ -19,7 +19,7 @@ sys.path.append('../../')
 import cv2
 import numpy as np

-from lib.ssd.data import transforms
+from lib.ssd import transforms


 if __name__ == '__main__':

--- a/lib/utils/boxes.py
+++ b/lib/utils/boxes.py
@@ -201,6 +201,16 @@ def expand_boxes(boxes, scale):
    return boxes_exp


+def flip_boxes(boxes, width):
+    """Flip the boxes horizontally."""
+    flip_boxes = boxes.copy()
+    old_x1 = boxes[:, 0].copy()
+    old_x2 = boxes[:, 2].copy()
+    flip_boxes[:, 0] = width - old_x2 - 1
+    flip_boxes[:, 2] = width - old_x1 - 1
+    return flip_boxes
+
+
 def filter_boxes(boxes, min_size):
    """Remove all boxes with any side smaller than min size."""
    ws = boxes[:, 2] - boxes[:, 0] + 1

--- a/lib/utils/mask_transform.py
+++ b/lib/utils/mask_transform.py
--- a/tools/mpi_train.py
+++ b/tools/mpi_train.py
@@ -62,22 +62,20 @@ if __name__ == '__main__':
    if checkpoint is not None:
        cfg.TRAIN.WEIGHTS = checkpoint

-    # Setup MPI
-    if cfg.NUM_GPUS != dragon.mpi.size():
+    # Setup the distributed environment
+    world_rank = dragon.distributed.get_rank()
+    world_size = dragon.distributed.get_world_size()
+    if cfg.NUM_GPUS != world_size:
        raise ValueError(
-            'Excepted {} mpi nodes, but got {}.'
-            .format(len(args.gpus), dragon.mpi.size())
+            'Excepted staring of {} processes, got {}.'
+            .format(cfg.NUM_GPUS, world_size)
        )
-    GPUs = [i for i in range(cfg.NUM_GPUS)]
-    cfg.GPU_ID = GPUs[dragon.mpi.rank()]
-    dragon.mpi.add_parallel_group([i for i in range(cfg.NUM_GPUS)])
-    dragon.mpi.set_parallel_mode('NCCL' if cfg.USE_NCCL else 'MPI')
+    logger.set_root_logger(world_rank == 0)

-    # Setup logger
-    if dragon.mpi.rank() != 0:
-        logger.set_root_logger(False)
+    # Select the GPU depending on the rank of process
+    cfg.GPU_ID = [i for i in range(cfg.NUM_GPUS)][world_rank]

-    # Fix the random seeds (numpy and dragon) for reproducibility
+    # Fix the random seed for reproducibility
    numpy.random.seed(cfg.RNG_SEED)
    dragon.config.set_random_seed(cfg.RNG_SEED)

@@ -89,7 +87,8 @@ if __name__ == '__main__':
    # Ready to train the network
    logger.info('Output will be saved to `{:s}`'
                .format(coordinator.checkpoints_dir()))
-    train_net(coordinator, start_iter)
-
-    # Finalize mpi
-    dragon.mpi.finalize()
+    with dragon.distributed.new_group(
+            ranks=[i for i in range(cfg.NUM_GPUS)],
+            backend='NCCL' if cfg.USE_NCCL else 'MPI',
+            verbose=True).as_default():
+        train_net(coordinator, start_iter)
--- a/tools/train.py
+++ b/tools/train.py
@@ -82,7 +82,7 @@ if __name__ == '__main__':
        if checkpoint is not None:
            cfg.TRAIN.WEIGHTS = checkpoint

-        # Fix the random seeds (numpy and dragon) for reproducibility
+        # Fix the random seed for reproducibility
        numpy.random.seed(cfg.RNG_SEED)
        dragon.config.set_random_seed(cfg.RNG_SEED)