Adapt to SeetaRecord

Ting PAN
Commit f8359d17 authored Sep 29, 2019 by Ting PAN
Showing with 5417 additions and 6186 deletions
.gitignore
CHANGES
compile/make.sh
configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_1x.yml
configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_2x.yml
configs/faster_rcnn/voc_faster_rcnn_R-50-FPN.yml
configs/faster_rcnn/voc_faster_rcnn_VGG-16-C4.yml
configs/retinanet/coco_retinanet_400_R-50-FPN_1x.yml
configs/retinanet/coco_retinanet_400_R-50-FPN_4x.yml
configs/retinanet/voc_retinanet_300_AirNet-FPN.yml
configs/retinanet/voc_retinanet_300_R-18-FPN.yml
configs/retinanet/voc_retinanet_300_R-34-FPN.yml
configs/ssd/voc_ssd_300_AirNet-5b.yml
configs/ssd/voc_ssd_300_VGG-16.yml
database/__init__.py
database/frcnn/__init__.py
database/frcnn/pascal_voc/__init__.py
database/frcnn/pascal_voc/make_lmdb.py
database/frcnn/utils/__init__.py
database/frcnn/utils/make_from_dict.py
--- a/.gitignore
+++ b/.gitignore
@@ -47,4 +47,4 @@ __pycache__
 .idea
 # OSX dir files
 .DS_Store
\ No newline at end of file
--- a/CHANGES
+++ b/CHANGES
 ------------------------------------------------------------------------
 The list of most significant changes made over time in SeetaDet.
+SeetaDet 0.2.0 (20190929)
+Dragon Minimum Required (Version 0.3.0.dev20190929)
+Changes:
+Preview Features:
+- Use SeetaRecord instead of LMDB.
+- Flatten the implementation of layers.
+Bugs fixed:
+- None
+------------------------------------------------------------------------
 SeetaDet 0.1.2 (20190723)
 Dragon Minimum Required (Version 0.3.0.0)

--- a/compile/make.sh
+++ b/compile/make.sh
+#!/bin/sh
 # delete cache
 rm -r build install *.c *.cpp
-# compile proto files
-protoc -I ../lib/proto --python_out=../lib/proto ../lib/proto/anno.proto
 # compile cython modules
 python setup.py build_ext --inplace
 # compile cuda modules
-cd build
+cd build && cmake .. && make install && cd ..
-cmake .. && make install && cd ..
 # setup
 cp -r install/lib ../
--- a/configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_1x.yml
+++ b/configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_1x.yml
@@ -32,15 +32,15 @@ FRCNN:
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/R-101.Affine.pth'
+  WEIGHTS: '/model/R-101.Affine.pth'
-  DATABASE: '/data/coco_2014_trainval35k_lmdb'
+  DATABASE: '/data/coco_2014_trainval35k'
  IMS_PER_BATCH: 2
  USE_DIFF: False # Do not use crowd objects
  BATCH_SIZE: 512
  SCALES: [800]
  MAX_SIZE: 1333
 TEST:
-  DATABASE: '/data/coco_2014_minival_lmdb'
+  DATABASE: '/data/coco_2014_minival'
  JSON_FILE: '/data/instances_minival2014.json'
  PROTOCOL: 'coco'
  RPN_POST_NMS_TOP_N: 1000

--- a/configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_2x.yml
+++ b/configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_2x.yml
@@ -32,15 +32,15 @@ FRCNN:
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/R-101.Affine.pth'
+  WEIGHTS: '/model/R-101.Affine.pth'
-  DATABASE: '/data/coco_2014_trainval35k_lmdb'
+  DATABASE: '/data/coco_2014_trainval35k'
  IMS_PER_BATCH: 2
  USE_DIFF: False # Do not use crowd objects
  BATCH_SIZE: 512
  SCALES: [800]
  MAX_SIZE: 1333
 TEST:
-  DATABASE: '/data/coco_2014_minival_lmdb'
+  DATABASE: '/data/coco_2014_minival'
  JSON_FILE: '/data/instances_minival2014.json'
  PROTOCOL: 'coco'
  RPN_POST_NMS_TOP_N: 1000

--- a/configs/faster_rcnn/voc_faster_rcnn_R-50-FPN.yml
+++ b/configs/faster_rcnn/voc_faster_rcnn_R-50-FPN.yml
@@ -23,14 +23,14 @@ FRCNN:
  ROI_XFORM_METHOD: RoIAlign
  ROI_XFORM_RESOLUTION: 7
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/R-50.Affine.pth'
+  WEIGHTS: '/model/R-50.Affine.pth'
-  DATABASE: '/data/voc_0712_trainval_lmdb'
+  DATABASE: '/data/voc_0712_trainval'
  IMS_PER_BATCH: 2
  BATCH_SIZE: 128
  SCALES: [600]
  MAX_SIZE: 1000
 TEST:
-  DATABASE: '/data/voc_2007_test_lmdb'
+  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
  RPN_POST_NMS_TOP_N: 1000
  SCALES: [600]

--- a/configs/faster_rcnn/voc_faster_rcnn_VGG-16-C4.yml
+++ b/configs/faster_rcnn/voc_faster_rcnn_VGG-16-C4.yml
@@ -28,15 +28,15 @@ FRCNN:
  ROI_XFORM_RESOLUTION: 7
  MLP_HEAD_DIM: 4096
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/VGG16.RCNN.pth'
+  WEIGHTS: '/model/VGG16.RCNN.pth'
-  DATABASE: '/data/voc_0712_trainval_lmdb'
+  DATABASE: '/data/voc_0712_trainval'
  RPN_MIN_SIZE: 16
  IMS_PER_BATCH: 2
  BATCH_SIZE: 128
  SCALES: [600]
  MAX_SIZE: 1000
 TEST:
-  DATABASE: '/data/voc_2007_test_lmdb'
+  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
  RPN_MIN_SIZE: 16
  RPN_POST_NMS_TOP_N: 300

--- a/configs/retinanet/coco_retinanet_400_R-50-FPN_1x.yml
+++ b/configs/retinanet/coco_retinanet_400_R-50-FPN_1x.yml
@@ -32,13 +32,13 @@ FPN:
  RPN_MIN_LEVEL: 3
  RPN_MAX_LEVEL: 7
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/R-50.Affine.pth'
+  WEIGHTS: '/model/R-50.Affine.pth'
-  DATABASE: '/data/coco_2014_trainval35k_lmdb'
+  DATABASE: '/data/coco_2014_trainval35k'
  IMS_PER_BATCH: 8
  SCALES: [400]
  MAX_SIZE: 666
 TEST:
-  DATABASE: '/data/coco_2014_minival_lmdb'
+  DATABASE: '/data/coco_2014_minival'
  JSON_FILE: '/data/instances_minival2014.json'
  PROTOCOL: 'coco'
  IMS_PER_BATCH: 1

--- a/configs/retinanet/coco_retinanet_400_R-50-FPN_4x.yml
+++ b/configs/retinanet/coco_retinanet_400_R-50-FPN_4x.yml
@@ -36,8 +36,8 @@ DROPBLOCK:
  DROP_ON: True
  DECREMENT: 0.000005 # * 20000 = 0.1
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/R-50.Affine.pth'
+  WEIGHTS: '/model/R-50.Affine.pth'
-  DATABASE: '/data/coco_2014_trainval35k_lmdb'
+  DATABASE: '/data/coco_2014_trainval35k'
  IMS_PER_BATCH: 8
  SCALES: [400]
  MAX_SIZE: 666
@@ -45,7 +45,7 @@ TRAIN:
  COLOR_JITTERING: True
  SCALE_RANGE: [0.75, 1.33]
 TEST:
-  DATABASE: '/data/coco_2014_minival_lmdb'
+  DATABASE: '/data/coco_2014_minival'
  JSON_FILE: '/data/instances_minival2014.json'
  PROTOCOL: 'coco'
  IMS_PER_BATCH: 1

--- a/configs/retinanet/voc_retinanet_300_AirNet-FPN.yml
+++ b/configs/retinanet/voc_retinanet_300_AirNet-FPN.yml
@@ -23,8 +23,8 @@ FPN:
  RPN_MIN_LEVEL: 3
  RPN_MAX_LEVEL: 7
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/AirNet.Affine.pth'
+  WEIGHTS: '/model/AirNet.Affine.pth'
-  DATABASE: '/data/voc_0712_trainval_lmdb'
+  DATABASE: '/data/voc_0712_trainval'
  IMS_PER_BATCH: 32
  SCALES: [300]
  MAX_SIZE: 500
@@ -32,7 +32,7 @@ TRAIN:
  SCALE_JITTERING: True
  COLOR_JITTERING: True
 TEST:
-  DATABASE: '/data/voc_2007_test_lmdb'
+  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
  IMS_PER_BATCH: 1
  SCALES: [300]

--- a/configs/retinanet/voc_retinanet_300_R-18-FPN.yml
+++ b/configs/retinanet/voc_retinanet_300_R-18-FPN.yml
@@ -24,8 +24,8 @@ FPN:
  RPN_MIN_LEVEL: 3
  RPN_MAX_LEVEL: 7
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/R-18.Affine.pth'
+  WEIGHTS: '/model/R-18.Affine.pth'
-  DATABASE: '/data/voc_0712_trainval_lmdb'
+  DATABASE: '/data/voc_0712_trainval'
  IMS_PER_BATCH: 32
  SCALES: [300]
  MAX_SIZE: 500
@@ -33,7 +33,7 @@ TRAIN:
  SCALE_JITTERING: True
  COLOR_JITTERING: True
 TEST:
-  DATABASE: '/data/voc_2007_test_lmdb'
+  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
  IMS_PER_BATCH: 1
  SCALES: [300]

--- a/configs/retinanet/voc_retinanet_300_R-34-FPN.yml
+++ b/configs/retinanet/voc_retinanet_300_R-34-FPN.yml
@@ -24,8 +24,8 @@ FPN:
  RPN_MIN_LEVEL: 3
  RPN_MAX_LEVEL: 7
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/R-34.Affine.pth'
+  WEIGHTS: '/model/R-34.Affine.pth'
-  DATABASE: '/data/voc_0712_trainval_lmdb'
+  DATABASE: '/data/voc_0712_trainval'
  IMS_PER_BATCH: 32
  SCALES: [300]
  MAX_SIZE: 500
@@ -33,7 +33,7 @@ TRAIN:
  SCALE_JITTERING: True
  COLOR_JITTERING: True
 TEST:
-  DATABASE: '/data/voc_2007_test_lmdb'
+  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
  IMS_PER_BATCH: 1
  SCALES: [300]

--- a/configs/ssd/voc_ssd_300_AirNet-5b.yml
+++ b/configs/ssd/voc_ssd_300_AirNet-5b.yml
@@ -29,11 +29,11 @@ SSD:
    STRIDES: [8, 16, 32]
    ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]]
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/AirNet.Affine.pth'
+  WEIGHTS: '/model/AirNet.Affine.pth'
-  DATABASE: '/data/voc_0712_trainval_lmdb'
+  DATABASE: '/data/voc_0712_trainval'
  IMS_PER_BATCH: 32
 TEST:
-  DATABASE: '/data/voc_2007_test_lmdb'
+  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
  IMS_PER_BATCH: 8
  NMS_TOP_K: 400

--- a/configs/ssd/voc_ssd_300_VGG-16.yml
+++ b/configs/ssd/voc_ssd_300_VGG-16.yml
@@ -32,11 +32,11 @@ SSD:
    ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33],
                    [1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]]
 TRAIN:
-  WEIGHTS: '/data/models/imagenet/VGG16.SSD.pth'
+  WEIGHTS: '/model/VGG16.SSD.pth'
-  DATABASE: '/data/voc_0712_trainval_lmdb'
+  DATABASE: '/data/voc_0712_trainval'
  IMS_PER_BATCH: 32
 TEST:
-  DATABASE: '/data/voc_2007_test_lmdb'
+  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
  IMS_PER_BATCH: 8
  NMS_TOP_K: 400

--- a/database/__init__.py
+++ b/database/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
--- a/database/frcnn/__init__.py
+++ b/database/frcnn/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
--- a/database/frcnn/pascal_voc/__init__.py
+++ b/database/frcnn/pascal_voc/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
--- a/database/frcnn/pascal_voc/make_lmdb.py
+++ b/database/frcnn/pascal_voc/make_lmdb.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import sys
-import os.path as osp
-sys.path.insert(0, '../../../')
-from database.frcnn.utils.make_from_xml import make_db
-if __name__ == '__main__':
-    VOC_ROOT_DIR = '/home/workspace/datasets/VOC'
-    # train database: voc_2007_trainval + voc_2012_trainval
-    make_db(database_file=osp.join(VOC_ROOT_DIR, 'cache/voc_0712_trainval_lmdb'),
-            images_path=[osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/JPEGImages'),
-                         osp.join(VOC_ROOT_DIR, 'VOCdevkit2012/VOC2012/JPEGImages')],
-            annotations_path=[osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/Annotations'),
-                              osp.join(VOC_ROOT_DIR, 'VOCdevkit2012/VOC2012/Annotations')],
-            imagesets_path=[osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
-                            osp.join(VOC_ROOT_DIR, 'VOCdevkit2012/VOC2012/ImageSets/Main')],
-            splits=['trainval', 'trainval'])
-    # test database: voc_2007_test
-    make_db(database_file=osp.join(VOC_ROOT_DIR, 'cache/voc_2007_test_lmdb'),
-            images_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/JPEGImages'),
-            annotations_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/Annotations'),
-            imagesets_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
-            splits=['test'])
--- a/database/frcnn/utils/__init__.py
+++ b/database/frcnn/utils/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
--- a/database/frcnn/utils/make_from_dict.py
+++ b/database/frcnn/utils/make_from_dict.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import os
-import sys
-import time
-import cv2
-from dragon.tools.db import LMDB
-sys.path.insert(0, '../../..')
-from lib.proto import anno_pb2 as pb
-ZFILL = 8
-ENCODE_QUALITY = 95
-def set_zfill(value):
-    global ZFILL
-    ZFILL = value
-def set_quality(value):
-    global ENCODE_QUALITY
-    ENCODE_QUALITY = value
-def make_datum(image_id, image_file, objects):
-    anno_datum = pb.AnnotatedDatum()
-    datum = pb.Datum()
-    im = cv2.imread(image_file)
-    datum.height, datum.width, datum.channels = im.shape
-    datum.encoded = ENCODE_QUALITY != 100
-    if datum.encoded:
-        result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY])
-    datum.data = im.tostring()
-    anno_datum.datum.CopyFrom(datum)
-    anno_datum.filename = image_id
-    for ix, obj in enumerate(objects):
-        anno = pb.Annotation()
-        anno.x1, anno.y1, anno.x2, anno.y2 = obj['bbox']
-        anno.name = obj['name']
-        anno.difficult = obj['difficult']
-        anno_datum.annotation.add().CopyFrom(anno)
-    return anno_datum
-def make_db(database_file, images_path, gt_recs, ext='.png'):
-    if os.path.isdir(database_file) is True:
-        raise ValueError('The database path is already exist.')
-    else:
-        root_dir = database_file[:database_file.rfind('/')]
-        if not os.path.exists(root_dir):
-            os.makedirs(root_dir)
-    print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
-    db = LMDB(max_commit=10000)
-    db.open(database_file, mode='w')
-    count = 0
-    total_line = len(gt_recs)
-    start_time = time.time()
-    zfill_flag = '{0:0%d}' % (ZFILL)
-    for image_id, objects in gt_recs.items():
-        count += 1
-        if count % 10000 == 0:
-            now_time = time.time()
-            print('{0} / {1} in {2:.2f} sec'.format(
-                count, total_line, now_time - start_time))
-            db.commit()
-        image_file = os.path.join(images_path, image_id + ext)
-        datum = make_datum(image_id, image_file, objects)
-        db.put(zfill_flag.format(count - 1), datum.SerializeToString())
-    now_time = time.time()
-    print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
-    db.commit()
-    db.close()
-    end_time = time.time()
-    print('{0} images have been stored in the database.'.format(total_line))
-    print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time))
-    print('The size of database is {0} MB.'.format(
-        float(os.path.getsize(database_file + '/data.mdb') / 1000 / 1000)))
\ No newline at end of file
--- a/database/frcnn/utils/make_from_xml.py
+++ b/database/frcnn/utils/make_from_xml.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import os
-import sys
-import time
-import cv2
-import xml.etree.ElementTree as ET
-from dragon.tools.db import LMDB
-sys.path.insert(0, '../../..')
-from lib.proto import anno_pb2 as pb
-ZFILL = 8
-ENCODE_QUALITY = 95
-def set_zfill(value):
-    global ZFILL
-    ZFILL = value
-def set_quality(value):
-    global ENCODE_QUALITY
-    ENCODE_QUALITY = value
-def make_datum(image_file, xml_file):
-    tree = ET.parse(xml_file)
-    filename = os.path.split(xml_file)[-1]
-    objs = tree.findall('object')
-    anno_datum = pb.AnnotatedDatum()
-    datum = pb.Datum()
-    im = cv2.imread(image_file)
-    if im is None or im.shape[0] == 0 or im.shape[1] == 0:
-        print("XML have not objects ignored: ", xml_file)
-        return None
-    datum.height, datum.width, datum.channels = im.shape
-    datum.encoded = ENCODE_QUALITY != 100
-    if datum.encoded:
-        result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY])
-        if im is None or im.shape[0] == 0 or im.shape[1] == 0:
-            print("XML have not objects ignored: ", xml_file)
-            return None
-    datum.data = im.tostring()
-    anno_datum.datum.CopyFrom(datum)
-    anno_datum.filename = filename.split('.')[0]
-    if len(objs) == 0:
-        return None
-    for ix, obj in enumerate(objs):
-        anno = pb.Annotation()
-        bbox = obj.find('bndbox')
-        x1 = float(bbox.find('xmin').text)
-        y1 = float(bbox.find('ymin').text)
-        x2 = float(bbox.find('xmax').text)
-        y2 = float(bbox.find('ymax').text)
-        cls = obj.find('name').text.strip()
-        anno.x1, anno.y1, anno.x2, anno.y2 = (x1, y1, x2, y2)
-        anno.name = cls
-        class_name_set.add(cls)
-        anno.difficult = False
-        if obj.find('difficult') is not None:
-            anno.difficult = int(obj.find('difficult').text) == 1
-        anno_datum.annotation.add().CopyFrom(anno)
-    return anno_datum
-def make_db(
-    database_file,
-    images_path,
-    annotations_path,
-    imagesets_path,
-    splits,
-):
-    if os.path.isdir(database_file) is True:
-        print('Warning: The database path is already exist.')
-    else:
-        root_dir = database_file[:database_file.rfind('/')]
-        if not os.path.exists(root_dir):
-            os.makedirs(root_dir)
-    if not isinstance(images_path, list):
-        images_path = [images_path]
-    if not isinstance(annotations_path, list):
-        annotations_path = [annotations_path]
-    if not isinstance(imagesets_path, list):
-        imagesets_path = [imagesets_path]
-    assert len(splits) == len(imagesets_path)
-    assert len(splits) == len(images_path)
-    assert len(splits) == len(annotations_path)
-    print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
-    db = LMDB(max_commit=1000)
-    db.open(database_file, mode='w')
-    count = 0
-    total_line = 0
-    start_time = time.time()
-    zfill_flag = '{0:0%d}' % ZFILL
-    for db_idx, split in enumerate(splits):
-        split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
-        assert os.path.exists(split_file)
-        with open(split_file, 'r') as f:
-            lines = f.readlines()
-            total_line += len(lines)
-        for line in lines:
-            filename = line.strip()
-            image_file = os.path.join(images_path[db_idx], filename + '.jpg')
-            xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
-            datum = make_datum(image_file, xml_file)
-            if datum is not None:
-                count += 1
-                db.put(zfill_flag.format(count - 1), datum.SerializeToString())
-                if count % 1000 == 0:
-                    now_time = time.time()
-                    print('{0} / {1} in {2:.2f} sec'.format(
-                        count, total_line, now_time - start_time))
-                    db.commit()
-    now_time = time.time()
-    print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
-    db.commit()
-    db.close()
-    end_time = time.time()
-    print('{0} images have been stored in the database.'.format(total_line))
-    print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time))
-    print('The size of database is {0} MB.'.format(
-        float(os.path.getsize(database_file + '/data.mdb') / 1000 / 1000)))
--- a/lib/core/config.py
+++ b/lib/core/config.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # Codes are based on:
 #
 #      <https://github.com/facebookresearch/Detectron/blob/master/lib/core/config.py>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os.path as osp
 import numpy as np
 from lib.utils.attrdict import AttrDict as edict
 __C = edict()
 cfg = __C
 ###########################################
 #                                         #
 #            Training Options             #
 #                                         #
 ###########################################
 __C.TRAIN = edict()
 # Initialize network with weights from this file
 __C.TRAIN.WEIGHTS = ''
 # Database to train
 __C.TRAIN.DATABASE = ''
 # Scales to use during training (can list multiple scales)
 # Each scale is the pixel size of an image's shortest side
 __C.TRAIN.SCALES = (600,)
 # Max pixel size of the longest side of a scaled input image
 # A square will be used if value < 1
 __C.TRAIN.MAX_SIZE = 1000
 # Images to use per mini-batch
 __C.TRAIN.IMS_PER_BATCH = 1
 # Minibatch size (number of regions of interest [ROIs])
 __C.TRAIN.BATCH_SIZE = 128
 # Fraction of minibatch that is labeled foreground (i.e. class > 0)
 __C.TRAIN.FG_FRACTION = 0.25
 # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 __C.TRAIN.FG_THRESH = 0.5
 # Overlap threshold for a ROI to be considered background (class = 0 if
 # overlap in [LO, HI))
 __C.TRAIN.BG_THRESH_HI = 0.5
 __C.TRAIN.BG_THRESH_LO = 0.0
 # Use shuffle after each epoch
 __C.TRAIN.USE_SHUFFLE = True
+# The number of chunks to shuffle
-# Use horizontally-flipped images during training?
+__C.TRAIN.NUM_SHUFFLE_CHUNKS = 0
-__C.TRAIN.USE_FLIPPED = True
+# Use horizontally-flipped images during training?
-# Use the difficult(under occlusion) objects
+__C.TRAIN.USE_FLIPPED = True
-__C.TRAIN.USE_DIFF = True
+# Use the difficult(under occlusion) objects
-# Overlap required between a ROI and ground-truth box in order for that ROI to
+__C.TRAIN.USE_DIFF = True
-# be used as a bounding-box regression training example
-__C.TRAIN.BBOX_THRESH = 0.5
+# Overlap required between a ROI and ground-truth box in order for that ROI to
+# be used as a bounding-box regression training example
-# If True, randomly scale the image by scale range
+__C.TRAIN.BBOX_THRESH = 0.5
-__C.TRAIN.SCALE_JITTERING = False
-__C.TRAIN.SCALE_RANGE = [0.75, 1.0]
+# If True, randomly scale the image by scale range
+__C.TRAIN.SCALE_JITTERING = False
-# If True, randomly distort the image by brightness, contrast, and saturation
+__C.TRAIN.SCALE_RANGE = [0.75, 1.0]
-__C.TRAIN.COLOR_JITTERING = False
+# If True, randomly distort the image by brightness, contrast, and saturation
-# IOU >= thresh: positive example
+__C.TRAIN.COLOR_JITTERING = False
-__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
-# IOU < thresh: negative example
+# IOU >= thresh: positive example
-__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
+__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
-# If an anchor statisfied by positive and negative conditions set to negative
+# IOU < thresh: negative example
-__C.TRAIN.RPN_CLOBBER_POSITIVES = False
+__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
-# Max number of foreground examples
+# If an anchor statisfied by positive and negative conditions set to negative
-__C.TRAIN.RPN_FG_FRACTION = 0.5
+__C.TRAIN.RPN_CLOBBER_POSITIVES = False
-# Total number of examples
+# Max number of foreground examples
-__C.TRAIN.RPN_BATCHSIZE = 256
+__C.TRAIN.RPN_FG_FRACTION = 0.5
-# NMS threshold used on RPN proposals
+# Total number of examples
-__C.TRAIN.RPN_NMS_THRESH = 0.7
+__C.TRAIN.RPN_BATCHSIZE = 256
-# Number of top scoring boxes to keep before apply NMS to RPN proposals
+# NMS threshold used on RPN proposals
-__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
+__C.TRAIN.RPN_NMS_THRESH = 0.7
-# Number of top scoring boxes to keep after applying NMS to RPN proposals
+# Number of top scoring boxes to keep before apply NMS to RPN proposals
-__C.TRAIN.RPN_POST_NMS_TOP_N = 2000
+__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
-# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
+# Number of top scoring boxes to keep after applying NMS to RPN proposals
-__C.TRAIN.RPN_MIN_SIZE = 0
+__C.TRAIN.RPN_POST_NMS_TOP_N = 2000
-# Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels
+# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
-# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
+__C.TRAIN.RPN_MIN_SIZE = 0
-__C.TRAIN.RPN_STRADDLE_THRESH = 0
+# Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels
+# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
+__C.TRAIN.RPN_STRADDLE_THRESH = 0
-###########################################
-#                                         #
-#            Testing Options              #
+###########################################
 #                                         #
-###########################################
+#            Testing Options              #
+#                                         #
+###########################################
-__C.TEST = edict()
-# Database to test
+__C.TEST = edict()
-__C.TEST.DATABASE = ''
+# Database to test
-# Original json ground-truth file to use
+__C.TEST.DATABASE = ''
-# Records in the Database file will be used instead
-__C.TEST.JSON_FILE = ''
+# Original json ground-truth file to use
+# Records in the Database file will be used instead
-# Scales to use during testing (can list multiple scales)
+__C.TEST.JSON_FILE = ''
-# Each scale is the pixel size of an image's shortest side
-__C.TEST.SCALES = (600,)
+# Scales to use during testing (can list multiple scales)
+# Each scale is the pixel size of an image's shortest side
-# Max pixel size of the longest side of a scaled input image
+__C.TEST.SCALES = (600,)
-# A square will be used if value < 1
-__C.TEST.MAX_SIZE = 1000
+# Max pixel size of the longest side of a scaled input image
+# A square will be used if value < 1
-# Images to use per mini-batch
+__C.TEST.MAX_SIZE = 1000
-__C.TEST.IMS_PER_BATCH = 1
+# Images to use per mini-batch
-# Overlap threshold used for non-maximum suppression (suppress boxes with
+__C.TEST.IMS_PER_BATCH = 1
-# IoU >= this threshold)
-__C.TEST.NMS = 0.3
+# Overlap threshold used for non-maximum suppression (suppress boxes with
+# IoU >= this threshold)
-# Use Soft-NMS instead of standard NMS?
+__C.TEST.NMS = 0.3
-# For the soft NMS overlap threshold, we simply use TEST.NMS
-__C.TEST.USE_SOFT_NMS = False
+# Use Soft-NMS instead of standard NMS?
-__C.TEST.SOFT_NMS_METHOD = 'linear'
+# For the soft NMS overlap threshold, we simply use TEST.NMS
-__C.TEST.SOFT_NMS_SIGMA = 0.5
+__C.TEST.USE_SOFT_NMS = False
+__C.TEST.SOFT_NMS_METHOD = 'linear'
-# The top-k prior boxes before nms.
+__C.TEST.SOFT_NMS_SIGMA = 0.5
-__C.TEST.NMS_TOP_K = 400
+# The top-k prior boxes before nms.
-# The threshold for predicting boxes
+__C.TEST.NMS_TOP_K = 400
-__C.TEST.SCORE_THRESH = 0.05
+# The threshold for predicting boxes
-# The threshold for predicting masks
+__C.TEST.SCORE_THRESH = 0.05
-__C.TEST.BINARY_THRESH = 0.5
+# The threshold for predicting masks
-# NMS threshold used on RPN proposals
+__C.TEST.BINARY_THRESH = 0.5
-__C.TEST.RPN_NMS_THRESH = 0.7
-# Number of top scoring boxes to keep before apply NMS to RPN proposals
+# NMS threshold used on RPN proposals
-__C.TEST.RPN_PRE_NMS_TOP_N = 6000
+__C.TEST.RPN_NMS_THRESH = 0.7
-# Number of top scoring boxes to keep after applying NMS to RPN proposals
+# Number of top scoring boxes to keep before apply NMS to RPN proposals
-__C.TEST.RPN_POST_NMS_TOP_N = 300
+__C.TEST.RPN_PRE_NMS_TOP_N = 6000
-# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
+# Number of top scoring boxes to keep after applying NMS to RPN proposals
-__C.TEST.RPN_MIN_SIZE = 0
+__C.TEST.RPN_POST_NMS_TOP_N = 300
+# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
-# Save detection results files if True
+__C.TEST.RPN_MIN_SIZE = 0
-# If false, results files are cleaned up (they can be large) after local
-# evaluation
+# Save detection results files if True
-__C.TEST.COMPETITION_MODE = True
+# If false, results files are cleaned up (they can be large) after local
+# evaluation
-# The optional test protocol for custom dataSet
+__C.TEST.COMPETITION_MODE = True
-# Ignored by VOC, COCO dataSets
-# Available protocols: 'voc2007', 'voc2010', 'coco'
+# The optional test protocol for custom dataSet
-__C.TEST.PROTOCOL = 'voc2007'
+# Ignored by VOC, COCO dataSets
+# Available protocols: 'voc2007', 'voc2010', 'coco'
-# Maximum number of detections to return per image (100 is based on the limit
+__C.TEST.PROTOCOL = 'voc2007'
-# established for the COCO dataset)
-__C.TEST.DETECTIONS_PER_IM = 100
+# Maximum number of detections to return per image (100 is based on the limit
+# established for the COCO dataset)
+__C.TEST.DETECTIONS_PER_IM = 100
-###########################################
-#                                         #
-#              Model Options              #
+###########################################
 #                                         #
-###########################################
+#              Model Options              #
+#                                         #
+###########################################
-__C.MODEL = edict()
-# The type of the model
+__C.MODEL = edict()
-# ('faster_rcnn',
-#  'mask_rcnn',
+# The type of the model
-#  'ssd',
+# ('faster_rcnn',
-#  'rssd',
+#  'mask_rcnn',
-#  'retinanet,
+#  'ssd',
-# )
+#  'rssd',
-__C.MODEL.TYPE = ''
+#  'retinanet,
+# )
-# The float precision for training and inference
+__C.MODEL.TYPE = ''
-# (FLOAT32, FLOAT16,)
-__C.MODEL.DATA_TYPE = 'FLOAT32'
+# The float precision for training and inference
+# (FLOAT32, FLOAT16,)
-# The backbone
+__C.MODEL.DATA_TYPE = 'FLOAT32'
-__C.MODEL.BACKBONE = ''
+# The backbone
-# The number of classes in the dataset
+__C.MODEL.BACKBONE = ''
-__C.MODEL.NUM_CLASSES = -1
+# The number of classes in the dataset
-# Keep it for TaaS DataSet
+__C.MODEL.NUM_CLASSES = -1
-__C.MODEL.CLASSES = ['__background__']
+# Keep it for TaaS DataSet
-# Add StopGrad at a specified stage so the bottom layers are frozen
+__C.MODEL.CLASSES = ['__background__']
-__C.MODEL.FREEZE_AT = 2
+# Add StopGrad at a specified stage so the bottom layers are frozen
-# Whether to use focal loss for one-stage detectors?
+__C.MODEL.FREEZE_AT = 2
-# Enabled if model type in ('ssd',)
-# Retinanet is force to use focal loss
+# Whether to use focal loss for one-stage detectors?
-__C.MODEL.USE_FOCAL_LOSS = False
+# Enabled if model type in ('ssd',)
-__C.MODEL.FOCAL_LOSS_ALPHA = 0.25
+# Retinanet is force to use focal loss
-__C.MODEL.FOCAL_LOSS_GAMMA = 2.0
+__C.MODEL.USE_FOCAL_LOSS = False
+__C.MODEL.FOCAL_LOSS_ALPHA = 0.25
-# Stride of the coarsest Feature level
+__C.MODEL.FOCAL_LOSS_GAMMA = 2.0
-# This is needed so the input can be padded properly
-__C.MODEL.COARSEST_STRIDE = -1
+# Stride of the coarsest Feature level
+# This is needed so the input can be padded properly
+__C.MODEL.COARSEST_STRIDE = -1
-###########################################
-#                                         #
-#              RPN Options                #
+###########################################
 #                                         #
-###########################################
+#              RPN Options                #
+#                                         #
+###########################################
-__C.RPN = edict()
-# Strides for multiple rpn heads
+__C.RPN = edict()
-__C.RPN.STRIDES = [4, 8, 16, 32, 64]
+# Strides for multiple rpn heads
-# Scales for multiple anchors
+__C.RPN.STRIDES = [4, 8, 16, 32, 64]
-__C.RPN.SCALES = [8, 8, 8, 8, 8]
+# Scales for multiple anchors
-# RPN anchor aspect ratios
+__C.RPN.SCALES = [8, 8, 8, 8, 8]
-__C.RPN.ASPECT_RATIOS = [0.5, 1, 2]
+# RPN anchor aspect ratios
+__C.RPN.ASPECT_RATIOS = [0.5, 1, 2]
-###########################################
-#                                         #
-#           Retina-Net Options            #
+###########################################
 #                                         #
-###########################################
+#           Retina-Net Options            #
+#                                         #
+###########################################
-__C.RETINANET = edict()
-# Anchor aspect ratios to use
+__C.RETINANET = edict()
-__C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0)
+# Anchor aspect ratios to use
-# Anchor scales per octave
+__C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0)
-__C.RETINANET.SCALES_PER_OCTAVE = 3
+# Anchor scales per octave
-# At each FPN level, we generate anchors based on their scale, aspect_ratio,
+__C.RETINANET.SCALES_PER_OCTAVE = 3
-# stride of the level, and we multiply the resulting anchor by ANCHOR_SCALE
-__C.RETINANET.ANCHOR_SCALE = 4
+# At each FPN level, we generate anchors based on their scale, aspect_ratio,
+# stride of the level, and we multiply the resulting anchor by ANCHOR_SCALE
-# Convolutions to use in the cls and bbox tower
+__C.RETINANET.ANCHOR_SCALE = 4
-# NOTE: this doesn't include the last conv for logits
-__C.RETINANET.NUM_CONVS = 4
+# Convolutions to use in the cls and bbox tower
+# NOTE: this doesn't include the last conv for logits
-# During inference, #locs to select based on cls score before NMS is performed
+__C.RETINANET.NUM_CONVS = 4
-__C.RETINANET.PRE_NMS_TOP_N = 5000
+# During inference, #locs to select based on cls score before NMS is performed
-# IoU overlap ratio for labeling an anchor as positive
+__C.RETINANET.PRE_NMS_TOP_N = 5000
-# Anchors with >= iou overlap are labeled positive
-__C.RETINANET.POSITIVE_OVERLAP = 0.5
+# IoU overlap ratio for labeling an anchor as positive
+# Anchors with >= iou overlap are labeled positive
-# IoU overlap ratio for labeling an anchor as negative
+__C.RETINANET.POSITIVE_OVERLAP = 0.5
-# Anchors with < iou overlap are labeled negative
-__C.RETINANET.NEGATIVE_OVERLAP = 0.4
+# IoU overlap ratio for labeling an anchor as negative
+# Anchors with < iou overlap are labeled negative
+__C.RETINANET.NEGATIVE_OVERLAP = 0.4
-###########################################
-#                                         #
-#              FPN Options                #
+###########################################
 #                                         #
-###########################################
+#              FPN Options                #
+#                                         #
+###########################################
-__C.FPN = edict()
-# Channel dimension of the FPN feature levels
+__C.FPN = edict()
-__C.FPN.DIM = 256
+# Channel dimension of the FPN feature levels
-# Coarsest level of the FPN pyramid
+__C.FPN.DIM = 256
-__C.FPN.RPN_MAX_LEVEL = 6
-# Finest level of the FPN pyramid
+# Coarsest level of the FPN pyramid
-__C.FPN.RPN_MIN_LEVEL = 2
+__C.FPN.RPN_MAX_LEVEL = 6
+# Finest level of the FPN pyramid
-# Hyper-Parameters for the RoI-to-FPN level mapping heuristic
+__C.FPN.RPN_MIN_LEVEL = 2
-__C.FPN.ROI_CANONICAL_SCALE = 224
-__C.FPN.ROI_CANONICAL_LEVEL = 4
+# Hyper-Parameters for the RoI-to-FPN level mapping heuristic
-# Coarsest level of the FPN pyramid
+__C.FPN.ROI_CANONICAL_SCALE = 224
-__C.FPN.ROI_MAX_LEVEL = 5
+__C.FPN.ROI_CANONICAL_LEVEL = 4
-# Finest level of the FPN pyramid
+# Coarsest level of the FPN pyramid
-__C.FPN.ROI_MIN_LEVEL = 2
+__C.FPN.ROI_MAX_LEVEL = 5
+# Finest level of the FPN pyramid
+__C.FPN.ROI_MIN_LEVEL = 2
-###########################################
-#                                         #
-#           Fast R-CNN Options            #
+###########################################
 #                                         #
-###########################################
+#           Fast R-CNN Options            #
+#                                         #
+###########################################
-__C.FRCNN = edict()
-# RoI transformation function (e.g., RoIPool or RoIAlign)
+__C.FRCNN = edict()
-__C.FRCNN.ROI_XFORM_METHOD = 'RoIPool'
+# RoI transformation function (e.g., RoIPool or RoIAlign)
-# Hidden layer dimension when using an MLP for the RoI box head
+__C.FRCNN.ROI_XFORM_METHOD = 'RoIPool'
-__C.FRCNN.MLP_HEAD_DIM = 1024
+# Hidden layer dimension when using an MLP for the RoI box head
-# RoI transform output resolution
+__C.FRCNN.MLP_HEAD_DIM = 1024
-# Note: some models may have constraints on what they can use, e.g. they use
-# pretrained FC layers like in VGG16, and will ignore this option
+# RoI transform output resolution
-__C.FRCNN.ROI_XFORM_RESOLUTION = 7
+# Note: some models may have constraints on what they can use, e.g. they use
+# pretrained FC layers like in VGG16, and will ignore this option
+__C.FRCNN.ROI_XFORM_RESOLUTION = 7
-###########################################
-#                                         #
-#           Mask R-CNN Options            #
+###########################################
 #                                         #
-###########################################
+#           Mask R-CNN Options            #
+#                                         #
+###########################################
-__C.MRCNN = edict()
-# Resolution of mask predictions
+__C.MRCNN = edict()
-__C.MRCNN.RESOLUTION = 28
+# Resolution of mask predictions
-# RoI transformation function (e.g., RoIPool or RoIAlign)
+__C.MRCNN.RESOLUTION = 28
-__C.MRCNN.ROI_XFORM_METHOD = 'RoIAlign'
+# RoI transformation function (e.g., RoIPool or RoIAlign)
-# RoI transformation function (e.g., RoIPool or RoIAlign)
+__C.MRCNN.ROI_XFORM_METHOD = 'RoIAlign'
-__C.MRCNN.ROI_XFORM_RESOLUTION = 14
+# RoI transformation function (e.g., RoIPool or RoIAlign)
+__C.MRCNN.ROI_XFORM_RESOLUTION = 14
-###########################################
-#                                         #
-#               SSD Options               #
+###########################################
 #                                         #
-###########################################
+#               SSD Options               #
+#                                         #
+###########################################
-__C.SSD = edict()
-# Whether to enable FPN enhancement?
+__C.SSD = edict()
-__C.SSD.FPN_ON = False
+# Whether to enable FPN enhancement?
-__C.SSD.MULTIBOX = edict()
+__C.SSD.FPN_ON = False
-# MultiBox configs
-__C.SSD.MULTIBOX.STRIDES = []
+__C.SSD.MULTIBOX = edict()
-__C.SSD.MULTIBOX.MIN_SIZES = []
+# MultiBox configs
-__C.SSD.MULTIBOX.MAX_SIZES = []
+__C.SSD.MULTIBOX.STRIDES = []
-__C.SSD.MULTIBOX.ASPECT_RATIOS = []
+__C.SSD.MULTIBOX.MIN_SIZES = []
-__C.SSD.MULTIBOX.ASPECT_ANGLES = []
+__C.SSD.MULTIBOX.MAX_SIZES = []
+__C.SSD.MULTIBOX.ASPECT_RATIOS = []
-__C.SSD.OHEM = edict()
+__C.SSD.MULTIBOX.ASPECT_ANGLES = []
-# The threshold for selecting negative bbox in hard example mining
-__C.SSD.OHEM.NEG_OVERLAP = 0.5
+__C.SSD.OHEM = edict()
-# The ratio used in hard example mining
+# The threshold for selecting negative bbox in hard example mining
-__C.SSD.OHEM.NEG_POS_RATIO = 3.0
+__C.SSD.OHEM.NEG_OVERLAP = 0.5
+# The ratio used in hard example mining
-# Distort the image?
+__C.SSD.OHEM.NEG_POS_RATIO = 3.0
-__C.SSD.DISTORT = edict()
-__C.SSD.DISTORT.BRIGHTNESS_PROB = 0.5
+# Distort the image?
-__C.SSD.DISTORT.CONTRAST_PROB = 0.5
+__C.SSD.DISTORT = edict()
-__C.SSD.DISTORT.SATURATION_PROB = 0.5
+__C.SSD.DISTORT.BRIGHTNESS_PROB = 0.5
+__C.SSD.DISTORT.CONTRAST_PROB = 0.5
-# Expand the image?
+__C.SSD.DISTORT.SATURATION_PROB = 0.5
-__C.SSD.EXPAND = edict()
-__C.SSD.EXPAND.PROB = 0.5
+# Expand the image?
-__C.SSD.EXPAND.MAX_RATIO = 4.0
+__C.SSD.EXPAND = edict()
+__C.SSD.EXPAND.PROB = 0.5
-# Resize the image?
+__C.SSD.EXPAND.MAX_RATIO = 4.0
-__C.SSD.RESIZE = edict()
-__C.SSD.RESIZE.HEIGHT = 300
+# Resize the image?
-__C.SSD.RESIZE.WIDTH = 300
+__C.SSD.RESIZE = edict()
-__C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
+__C.SSD.RESIZE.HEIGHT = 300
+__C.SSD.RESIZE.WIDTH = 300
-# Samplers
+__C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
-# Format as (min_scale, max_scale,
-#            min_aspect_ratio, max_aspect_ratio,
+# Samplers
-#            min_jaccard_overlap, max_jaccard_overlap,
+# Format as (min_scale, max_scale,
-#            max_trials, max_sample)
+#            min_aspect_ratio, max_aspect_ratio,
-__C.SSD.SAMPLERS = [
+#            min_jaccard_overlap, max_jaccard_overlap,
-    (1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1, 1),   # Entire image
+#            max_trials, max_sample)
-    (0.3, 1.0, 0.5, 2.0, 0.1, 1.0, 10, 1),  # IoU >= 0.1
+__C.SSD.SAMPLERS = [
-    (0.3, 1.0, 0.5, 2.0, 0.3, 1.0, 10, 1),  # IoU >= 0.3
+    (1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1, 1),   # Entire image
-    (0.3, 1.0, 0.5, 2.0, 0.5, 1.0, 5, 1),   # IoU >= 0.5
+    (0.3, 1.0, 0.5, 2.0, 0.1, 1.0, 10, 1),  # IoU >= 0.1
-    (0.3, 1.0, 0.5, 2.0, 0.7, 1.0, 5, 1),   # IoU >= 0.7
+    (0.3, 1.0, 0.5, 2.0, 0.3, 1.0, 10, 1),  # IoU >= 0.3
-    (0.3, 1.0, 0.5, 2.0, 0.9, 1.0, 5, 1),   # IoU >= 0.9
+    (0.3, 1.0, 0.5, 2.0, 0.5, 1.0, 5, 1),   # IoU >= 0.5
-    (0.3, 1.0, 0.5, 2.0, 0.0, 1.0, 1, 1),   # Any patches
+    (0.3, 1.0, 0.5, 2.0, 0.7, 1.0, 5, 1),   # IoU >= 0.7
-]
+    (0.3, 1.0, 0.5, 2.0, 0.9, 1.0, 5, 1),   # IoU >= 0.9
+    (0.3, 1.0, 0.5, 2.0, 0.0, 1.0, 1, 1),   # Any patches
+]
-###########################################
-#                                         #
-#             ResNet Options              #
+###########################################
 #                                         #
-###########################################
+#             ResNet Options              #
+#                                         #
+###########################################
-__C.RESNET = edict()
-# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
+__C.RESNET = edict()
-__C.RESNET.NUM_GROUPS = 1
+# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
-# Baseline width of each group
+__C.RESNET.NUM_GROUPS = 1
-__C.RESNET.GROUP_WIDTH = 64
+# Baseline width of each group
+__C.RESNET.GROUP_WIDTH = 64
-###########################################
-#                                         #
-#            DropBlock Options            #
+###########################################
 #                                         #
-###########################################
+#            DropBlock Options            #
+#                                         #
+###########################################
-__C.DROPBLOCK = edict()
-# Whether to use drop block for more regulization
+__C.DROPBLOCK = edict()
-__C.DROPBLOCK.DROP_ON = False
+# Whether to use drop block for more regulization
-# Decrement for scheduling keep prob after each iteration
+__C.DROPBLOCK.DROP_ON = False
-__C.DROPBLOCK.DECREMENT = 1e-6
+# Decrement for scheduling keep prob after each iteration
+__C.DROPBLOCK.DECREMENT = 1e-6
-###########################################
-#                                         #
-#             Solver Options              #
+###########################################
 #                                         #
-###########################################
+#             Solver Options              #
+#                                         #
+###########################################
-__C.SOLVER = edict()
-# Base learning rate for the specified schedule
+__C.SOLVER = edict()
-__C.SOLVER.BASE_LR = 0.001
+# Base learning rate for the specified schedule
-# Optional scaling factor for total loss
+__C.SOLVER.BASE_LR = 0.001
-# This option is helpful to scale the magnitude
-# of gradients during FP16 training
+# Optional scaling factor for total loss
-__C.SOLVER.LOSS_SCALING = 1.
+# This option is helpful to scale the magnitude
+# of gradients during FP16 training
-# Schedule type (see functions in utils.lr_policy for options)
+__C.SOLVER.LOSS_SCALING = 1.
-# E.g., 'step', 'steps_with_decay', ...
-__C.SOLVER.LR_POLICY = 'steps_with_decay'
+# Schedule type (see functions in utils.lr_policy for options)
+# E.g., 'step', 'steps_with_decay', ...
-# Hyperparameter used by the specified policy
+__C.SOLVER.LR_POLICY = 'steps_with_decay'
-# For 'step', the current LR is multiplied by SOLVER.GAMMA at each step
-__C.SOLVER.GAMMA = 0.1
+# Hyperparameter used by the specified policy
+# For 'step', the current LR is multiplied by SOLVER.GAMMA at each step
-# Uniform step size for 'steps' policy
+__C.SOLVER.GAMMA = 0.1
-__C.SOLVER.STEP_SIZE = 30000
+# Uniform step size for 'steps' policy
-__C.SOLVER.STEPS = []
+__C.SOLVER.STEP_SIZE = 30000
-# Maximum number of SGD iterations
+__C.SOLVER.STEPS = []
-__C.SOLVER.MAX_ITERS = 40000
+# Maximum number of SGD iterations
-# Momentum to use with SGD
+__C.SOLVER.MAX_ITERS = 40000
-__C.SOLVER.MOMENTUM = 0.9
+# Momentum to use with SGD
-# L2 regularization hyper parameters
+__C.SOLVER.MOMENTUM = 0.9
-__C.SOLVER.WEIGHT_DECAY = 0.0005
+# L2 regularization hyper parameters
-# L2 norm factor for clipping gradients
+__C.SOLVER.WEIGHT_DECAY = 0.0005
-__C.SOLVER.CLIP_NORM = -1.0
+# L2 norm factor for clipping gradients
-# Warm up to SOLVER.BASE_LR over this number of SGD iterations
+__C.SOLVER.CLIP_NORM = -1.0
-__C.SOLVER.WARM_UP_ITERS = 500
+# Warm up to SOLVER.BASE_LR over this number of SGD iterations
-# Start the warm up from SOLVER.BASE_LR * SOLVER.WARM_UP_FACTOR
+__C.SOLVER.WARM_UP_ITERS = 500
-__C.SOLVER.WARM_UP_FACTOR = 1.0 / 3.0
+# Start the warm up from SOLVER.BASE_LR * SOLVER.WARM_UP_FACTOR
-# The steps for accumulating gradients
+__C.SOLVER.WARM_UP_FACTOR = 1.0 / 3.0
-__C.SOLVER.ITER_SIZE = 1
+# The steps for accumulating gradients
-# The interval to display logs
+__C.SOLVER.ITER_SIZE = 1
-__C.SOLVER.DISPLAY = 20
+# The interval to display logs
-# The interval to snapshot a model
+__C.SOLVER.DISPLAY = 20
-__C.SOLVER.SNAPSHOT_ITERS = 5000
+# The interval to snapshot a model
-# prefix to yield the path: <prefix>_iters_XYZ.caffemodel
+__C.SOLVER.SNAPSHOT_ITERS = 5000
-__C.SOLVER.SNAPSHOT_PREFIX = ''
+# prefix to yield the path: <prefix>_iters_XYZ.caffemodel
+__C.SOLVER.SNAPSHOT_PREFIX = ''
-###########################################
-#                                         #
-#               Misc Options              #
+###########################################
 #                                         #
-###########################################
+#               Misc Options              #
+#                                         #
+###########################################
-# Number of GPUs to use (applies to both training and testing)
-__C.NUM_GPUS = 1
+# Number of GPUs to use (applies to both training and testing)
-# Use NCCL for all reduce, otherwise use cuda-aware mpi
+__C.NUM_GPUS = 1
-__C.USE_NCCL = True
+# Use NCCL for all reduce, otherwise use cuda-aware mpi
-# Hosts for Inter-Machine communication
+__C.USE_NCCL = True
-__C.HOSTS = []
+# Hosts for Inter-Machine communication
-# Pixel mean values (BGR order)
+__C.HOSTS = []
-__C.PIXEL_MEANS = [102., 115., 122.]
+# Pixel mean values (BGR order)
-# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
+__C.PIXEL_MEANS = [102., 115., 122.]
-# These are empirically chosen to approximately lead to unit variance targets
-__C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
+# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
+# These are empirically chosen to approximately lead to unit variance targets
-# Default weights on (dx, dy, dw, dh, da) for normalizing rbox regression targets
+__C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
-__C.RBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0, 10.0)
+# Default weights on (dx, dy, dw, dh, da) for normalizing rbox regression targets
-# Prior prob for the positives at the beginning of training.
+__C.RBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0, 10.0)
-# This is used to set the bias init for the logits layer
-__C.PRIOR_PROB = 0.01
+# Prior prob for the positives at the beginning of training.
+# This is used to set the bias init for the logits layer
-# For reproducibility
+__C.PRIOR_PROB = 0.01
-__C.RNG_SEED = 3
+# For reproducibility
-# Root directory of project
+__C.RNG_SEED = 3
-__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
+# Root directory of project
-# Data directory
+__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
-__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
+# Data directory
-# Place outputs under an experiments directory
+__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
-__C.EXP_DIR = ''
+# Place outputs under an experiments directory
-# Use GPU implementation of non-maximum suppression
+__C.EXP_DIR = ''
-__C.USE_GPU_NMS = True
+# Use GPU implementation of non-maximum suppression
-# Default GPU device id
+__C.USE_GPU_NMS = True
-__C.GPU_ID = 0
+# Default GPU device id
-# Dump detection visualizations
+__C.GPU_ID = 0
-__C.VIS = False
-__C.VIS_ON_FILE = False
+# Dump detection visualizations
+__C.VIS = False
-# Score threshold for visualization
+__C.VIS_ON_FILE = False
-__C.VIS_TH = 0.7
+# Score threshold for visualization
-# Write summaries by tensor board
+__C.VIS_TH = 0.7
-__C.ENABLE_TENSOR_BOARD = False
+# Write summaries by tensor board
+__C.ENABLE_TENSOR_BOARD = False
-def _merge_a_into_b(a, b):
-    """Merge config dictionary a into config dictionary b, clobbering the
-    options in b whenever they are also specified in a.
+def _merge_a_into_b(a, b):
-    """
+    """Merge config dictionary a into config dictionary b, clobbering the
-    if not isinstance(a, dict):
+    options in b whenever they are also specified in a.
-        return
+    """
-    for k, v in a.items():
+    if not isinstance(a, dict):
-        # a must specify keys that are in b
+        return
-        if k not in b:
+    for k, v in a.items():
-            raise KeyError('{} is not a valid config key'.format(k))
+        # a must specify keys that are in b
-        # the types must match, too
+        if k not in b:
-        v = _check_and_coerce_cfg_value_type(v, b[k], k)
+            raise KeyError('{} is not a valid config key'.format(k))
-        # recursively merge dicts
+        # the types must match, too
-        if type(v) is edict:
+        v = _check_and_coerce_cfg_value_type(v, b[k], k)
-            try:
+        # recursively merge dicts
-                _merge_a_into_b(a[k], b[k])
+        if type(v) is edict:
-            except:
+            try:
-                print('Error under config key: {}'.format(k))
+                _merge_a_into_b(a[k], b[k])
-                raise
+            except:
-        else:
+                print('Error under config key: {}'.format(k))
-            b[k] = v
+                raise
+        else:
+            b[k] = v
-def cfg_from_file(filename):
-    """Load a config file and merge it into the default options."""
-    import yaml
+def cfg_from_file(filename):
-    with open(filename, 'r') as f:
+    """Load a config file and merge it into the default options."""
-        yaml_cfg = edict(yaml.load(f))
+    import yaml
-    global __C
+    with open(filename, 'r') as f:
-    _merge_a_into_b(yaml_cfg, __C)
+        yaml_cfg = edict(yaml.load(f))
+    global __C
+    _merge_a_into_b(yaml_cfg, __C)
-def cfg_from_list(cfg_list):
-    """Set config keys via list (e.g., from command line)."""
-    from ast import literal_eval
+def cfg_from_list(cfg_list):
-    assert len(cfg_list) % 2 == 0
+    """Set config keys via list (e.g., from command line)."""
-    for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
+    from ast import literal_eval
-        key_list = k.split('.')
+    assert len(cfg_list) % 2 == 0
-        d = __C
+    for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
-        for subkey in key_list[:-1]:
+        key_list = k.split('.')
-            assert d.has_key(subkey)
+        d = __C
-            d = d[subkey]
+        for subkey in key_list[:-1]:
-        subkey = key_list[-1]
+            assert d.has_key(subkey)
-        assert subkey in d
+            d = d[subkey]
-        try:
+        subkey = key_list[-1]
-            value = literal_eval(v)
+        assert subkey in d
-        except:
+        try:
-            # Handle the case when v is a string literal
+            value = literal_eval(v)
-            value = v
+        except:
-        assert type(value) == type(d[subkey]), \
+            # Handle the case when v is a string literal
-            'type {} does not match original type {}'\
+            value = v
-            .format(type(value), type(d[subkey]))
+        assert type(value) == type(d[subkey]), \
-        d[subkey] = value
+            'type {} does not match original type {}'\
+            .format(type(value), type(d[subkey]))
+        d[subkey] = value
-def _check_and_coerce_cfg_value_type(value_a, value_b, key):
-    """Checks that `value_a`, which is intended to replace `value_b` is of the
-    right type. The type is correct if it matches exactly or is one of a few
+def _check_and_coerce_cfg_value_type(value_a, value_b, key):
-    cases in which the type can be easily coerced.
+    """Checks that `value_a`, which is intended to replace `value_b` is of the
-    """
+    right type. The type is correct if it matches exactly or is one of a few
-    # The types must match (with some exceptions)
+    cases in which the type can be easily coerced.
-    type_b = type(value_b)
+    """
-    type_a = type(value_a)
+    # The types must match (with some exceptions)
-    if type_a is type_b:
+    type_b = type(value_b)
-        return value_a
+    type_a = type(value_a)
-    if type_b is float and type_a is int:
+    if type_a is type_b:
-        return float(value_a)
+        return value_a
+    if type_b is float and type_a is int:
-    # Exceptions: numpy arrays, strings, tuple<->list
+        return float(value_a)
-    if isinstance(value_b, np.ndarray):
-        value_a = np.array(value_a, dtype=value_b.dtype)
+    # Exceptions: numpy arrays, strings, tuple<->list
-    elif isinstance(value_a, tuple) and isinstance(value_b, list):
+    if isinstance(value_b, np.ndarray):
-        value_a = list(value_a)
+        value_a = np.array(value_a, dtype=value_b.dtype)
-    elif isinstance(value_a, list) and isinstance(value_b, tuple):
+    elif isinstance(value_a, tuple) and isinstance(value_b, list):
-        value_a = tuple(value_a)
+        value_a = list(value_a)
-    elif isinstance(value_a, dict) and isinstance(value_b, edict):
+    elif isinstance(value_a, list) and isinstance(value_b, tuple):
-        value_a = edict(value_a)
+        value_a = tuple(value_a)
-    else:
+    elif isinstance(value_a, dict) and isinstance(value_b, edict):
-        raise ValueError(
+        value_a = edict(value_a)
-            'Type mismatch ({} vs. {}) with values ({} vs. {}) for config '
+    else:
-            'key: {}'.format(type_b, type_a, value_b, value_a, key)
+        raise ValueError(
-        )
+            'Type mismatch ({} vs. {}) with values ({} vs. {}) for config '
-    return value_a
+            'key: {}'.format(type_b, type_a, value_b, value_a, key)
+        )
+    return value_a
--- a/lib/core/coordinator.py
+++ b/lib/core/coordinator.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import shutil
 import time
 import numpy as np
 from lib.core.config import cfg
 from lib.core.config import cfg_from_file
 class Coordinator(object):
    """Coordinator is a simple tool to manage the
     unique experiments from the YAML configurations.
    """
    def __init__(self, cfg_file, exp_dir=None):
        # Override the default configs
        cfg_from_file(cfg_file)
        if cfg.EXP_DIR != '':
            exp_dir = cfg.EXP_DIR
        if exp_dir is None:
            model_id = time.strftime(
                '%Y%m%d_%H%M%S', time.localtime(time.time()))
            self.experiment_dir = '../experiments/{}'.format(model_id)
            if not os.path.exists(self.experiment_dir):
                os.makedirs(self.experiment_dir)
        else:
            if not os.path.exists(exp_dir):
                raise ValueError('ExperimentDir({}) does not exist.'.format(exp_dir))
            self.experiment_dir = exp_dir
    def _path_at(self, file, auto_create=True):
        path = os.path.abspath(os.path.join(self.experiment_dir, file))
        if auto_create and not os.path.exists(path):
            os.makedirs(path)
        return path
    def checkpoints_dir(self):
        return self._path_at('checkpoints')
    def exports_dir(self):
        return self._path_at('exports')
    def results_dir(self, checkpoint=None):
        sub_dir = os.path.splitext(os.path.basename(checkpoint))[0] if checkpoint else ''
        return self._path_at(os.path.join('results', sub_dir))
    def checkpoint(self, global_step=None, wait=True):
        def locate():
            files = os.listdir(self.checkpoints_dir())
            steps = []
            for ix, file in enumerate(files):
                step = int(file.split('_iter_')[-1].split('.')[0])
                if global_step == step:
                    return os.path.join(self.checkpoints_dir(), files[ix]), step
                steps.append(step)
            if global_step is None:
                if len(files) == 0:
                    return None, 0
                last_idx = int(np.argmax(steps))
                last_step = steps[last_idx]
                return os.path.join(self.checkpoints_dir(), files[last_idx]), last_step
            return None, 0
        result = locate()
        while result[0] is None and wait:
            print('\rWaiting for step_{}.checkpoint to exist...'.format(global_step), end='')
            time.sleep(10)
            result = locate()
        return result
    def delete_experiment(self):
        if os.path.exists(self.experiment_dir):
            shutil.rmtree(self.experiment_dir)
--- a/lib/core/test.py
+++ b/lib/core/test.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import os
+import collections
-import cv2
+import multiprocessing as mp
-from multiprocessing import Queue
+import os
-from collections import OrderedDict
+import cv2
-from lib.core.config import cfg
+import dragon
-from lib.datasets.factory import get_imdb
-# All detectors share the same reader/transformer during testing
+from lib.core.config import cfg
-from lib.faster_rcnn.data.data_reader import DataReader
+from lib.datasets.factory import get_imdb
-from lib.faster_rcnn.data.data_transformer import DataTransformer
+from lib.faster_rcnn.data_transformer import DataTransformer
 class TestServer(object):
    def __init__(self, output_dir):
        self.imdb = get_imdb(cfg.TEST.DATABASE)
        self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE)
        self.num_images, self.num_classes, self.classes = \
            self.imdb.num_images, self.imdb.num_classes, self.imdb.classes
-        self.data_reader = DataReader(**{'source': self.imdb.source})
+        self.data_reader = dragon.io.DataReader(
-        self.data_transformer = DataTransformer()
+            dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source))
-        self.data_reader.q_out = Queue(cfg.TEST.IMS_PER_BATCH)
+        self.data_transformer = DataTransformer()
-        self.data_reader.start()
+        self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH)
-        self.gt_recs = OrderedDict()
+        self.data_reader.start()
-        self.output_dir = output_dir
+        self.gt_recs = collections.OrderedDict()
-        if cfg.VIS_ON_FILE:
+        self.output_dir = output_dir
-            self.vis_dir = os.path.join(self.output_dir, 'vis')
+        if cfg.VIS_ON_FILE:
-            if not os.path.exists(self.vis_dir):
+            self.vis_dir = os.path.join(self.output_dir, 'vis')
-                os.makedirs(self.vis_dir)
+            if not os.path.exists(self.vis_dir):
+                os.makedirs(self.vis_dir)
-    def set_transformer(self, transformer_cls):
-        self.data_transformer = transformer_cls()
+    def set_transformer(self, transformer_cls):
+        self.data_transformer = transformer_cls()
-    def get_image(self):
-        serialized = self.data_reader.q_out.get()
+    def get_image(self):
-        image = self.data_transformer.get_image(serialized)
+        example = self.data_reader.q_out.get()
-        image_id, objects = self.data_transformer.get_annotations(serialized)
+        image = self.data_transformer.get_image(example)
-        self.gt_recs[image_id] = {
+        image_id, objects = self.data_transformer.get_annotations(example)
-            'objects': objects,
+        self.gt_recs[image_id] = {
-            'width': image.shape[1],
+            'objects': objects,
-            'height': image.shape[0],
+            'width': image.shape[1],
-        }
+            'height': image.shape[0],
-        return image_id, image
+        }
+        return image_id, image
-    def get_save_filename(self, image_id, ext='.jpg'):
-        return os.path.join(self.vis_dir, image_id + ext) \
+    def get_save_filename(self, image_id, ext='.jpg'):
-            if cfg.VIS_ON_FILE else None
+        return os.path.join(self.vis_dir, image_id + ext) \
+            if cfg.VIS_ON_FILE else None
-    def get_records(self):
-        if len(self.gt_recs) != self.num_images:
+    def get_records(self):
-            raise RuntimeError(
+        if len(self.gt_recs) != self.num_images:
-                'Loading {} records, while {} required.'
+            raise RuntimeError(
-                .format(len(self.gt_recs), self.num_images),
+                'Loading {} records, while {} required.'
-            )
+                .format(len(self.gt_recs), self.num_images),
-        return self.gt_recs
+            )
+        return self.gt_recs
-    def evaluate_detections(self, all_boxes):
-        self.imdb.evaluate_detections(
+    def evaluate_detections(self, all_boxes):
-            all_boxes, self.get_records(), self.output_dir)
+        self.imdb.evaluate_detections(
+            all_boxes,
-    def evaluate_segmentations(self, all_boxes, all_masks):
+            self.get_records(),
-        self.imdb.evaluate_segmentations(
+            self.output_dir,
-            all_boxes, all_masks, self.get_records(), self.output_dir)
+        )
+    def evaluate_segmentations(self, all_boxes, all_masks):
-class InferServer(object):
+        self.imdb.evaluate_segmentations(
-    def __init__(self, output_dir):
+            all_boxes,
-        self.images_dir = cfg.TEST.DATABASE
+            all_masks,
-        self.imdb = get_imdb('taas:/empty')
+            self.get_records(),
-        self.images = os.listdir(self.images_dir)
+            self.output_dir,
-        self.num_images, self.num_classes, self.classes = \
+        )
-            len(self.images), cfg.MODEL.NUM_CLASSES, cfg.MODEL.CLASSES
-        self.data_transformer = DataTransformer()
-        self.gt_recs = OrderedDict()
+class InferServer(object):
-        self.output_dir = output_dir
+    def __init__(self, output_dir):
-        self.image_idx = 0
+        self.images_dir = cfg.TEST.DATABASE
-        if cfg.VIS_ON_FILE:
+        self.imdb = get_imdb('taas:/empty')
-            self.vis_dir = os.path.join(self.output_dir, 'vis')
+        self.images = os.listdir(self.images_dir)
-            if not os.path.exists(self.vis_dir):
+        self.num_images, self.num_classes, self.classes = \
-                os.makedirs(self.vis_dir)
+            len(self.images), cfg.MODEL.NUM_CLASSES, cfg.MODEL.CLASSES
+        self.data_transformer = DataTransformer()
-    def set_transformer(self, transformer_cls):
+        self.gt_recs = collections.OrderedDict()
-        self.data_transformer = transformer_cls()
+        self.output_dir = output_dir
+        self.image_idx = 0
-    def get_image(self):
+        if cfg.VIS_ON_FILE:
-        image_name = self.images[self.image_idx]
+            self.vis_dir = os.path.join(self.output_dir, 'vis')
-        image_id = image_name.split('.')[0]
+            if not os.path.exists(self.vis_dir):
-        image = cv2.imread(os.path.join(self.images_dir, image_name))
+                os.makedirs(self.vis_dir)
-        self.image_idx = (self.image_idx + 1) % self.num_images
-        self.gt_recs[image_id] = {
+    def set_transformer(self, transformer_cls):
-            'width': image.shape[1],
+        self.data_transformer = transformer_cls()
-            'height': image.shape[0],
-        }
+    def get_image(self):
-        return image_id, image
+        image_name = self.images[self.image_idx]
+        image_id = image_name.split('.')[0]
-    def get_save_filename(self, image_id, ext='.jpg'):
+        image = cv2.imread(os.path.join(self.images_dir, image_name))
-        return os.path.join(self.vis_dir, image_id + ext) \
+        self.image_idx = (self.image_idx + 1) % self.num_images
-            if cfg.VIS_ON_FILE else None
+        self.gt_recs[image_id] = {'width': image.shape[1], 'height': image.shape[0]}
+        return image_id, image
-    def get_records(self):
-        if len(self.gt_recs) != self.num_images:
+    def get_save_filename(self, image_id, ext='.jpg'):
-            raise RuntimeError(
+        return os.path.join(self.vis_dir, image_id + ext) \
-                'Loading {} records, while {} required.'
+            if cfg.VIS_ON_FILE else None
-                .format(len(self.gt_recs), self.num_images),
-            )
+    def get_records(self):
-        return self.gt_recs
+        if len(self.gt_recs) != self.num_images:
+            raise RuntimeError(
-    def evaluate_detections(self, all_boxes):
+                'Loading {} records, while {} required.'
-        self.imdb.evaluate_detections(
+                .format(len(self.gt_recs), self.num_images),
-            all_boxes,
+            )
-            self.get_records(),
+        return self.gt_recs
-            self.output_dir,
-        )
+    def evaluate_detections(self, all_boxes):
+        self.imdb.evaluate_detections(
-    def evaluate_segmentations(self, all_boxes, all_masks):
+            all_boxes,
-        self.imdb.evaluate_segmentations(
+            self.get_records(),
-            all_boxes,
+            self.output_dir,
-            all_masks,
+        )
-            self.get_records(),
-            self.output_dir,
+    def evaluate_segmentations(self, all_boxes, all_masks):
-        )
+        self.imdb.evaluate_segmentations(
+            all_boxes,
+            all_masks,
+            self.get_records(),
+            self.output_dir,
+        )
--- a/lib/core/train.py
+++ b/lib/core/train.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # Codes are based on:
 #
 #      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/train.py>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import collections
 import datetime
 import os
 import dragon.vm.torch as torch
 from lib.core.config import cfg
 from lib.core.solver import get_solver_func
 from lib.utils import logger
 from lib.utils.stats import SmoothedValue
 from lib.utils.timer import Timer
 class SolverWrapper(object):
    def __init__(self, coordinator):
        self.output_dir = coordinator.checkpoints_dir()
        self.solver = get_solver_func('MomentumSGD')()
        # Load the pre-trained weights
        init_weights = cfg.TRAIN.WEIGHTS
        if init_weights != '':
            if os.path.exists(init_weights):
                logger.info('Loading weights from {}.'.format(init_weights))
                self.solver.detector.load_weights(init_weights)
            else:
                raise ValueError('Invalid path of weights: {}'.format(init_weights))
        # Mixed precision training?
        if cfg.MODEL.DATA_TYPE.lower() == 'float16':
            self.solver.detector.half()  # Powerful FP16 Support
        self.solver.detector.cuda(cfg.GPU_ID)
        # Plan the metrics
        self.metrics = collections.OrderedDict()
        if cfg.ENABLE_TENSOR_BOARD:
            from dragon.tools.tensorboard import TensorBoard
            self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs')
    def snapshot(self):
        if not logger.is_root():
            return None
        filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}'
                    .format(self.solver.iter) + '.pth')
        filename = os.path.join(self.output_dir, filename)
        torch.save(self.solver.detector.state_dict(), filename)
        logger.info('Wrote snapshot to: {:s}'.format(filename))
        return filename
    def add_metrics(self, stats):
        for k, v in stats['loss'].items():
            if k not in self.metrics:
                self.metrics[k] = SmoothedValue(20)
            self.metrics[k].AddValue(v)
    def send_metrics(self, stats):
        if hasattr(self, 'board'):
            self.board.scalar_summary('lr', stats['lr'], stats['iter'])
            self.board.scalar_summary('time', stats['time'], stats['iter'])
            for k, v in self.metrics.items():
                if k == 'total':
                    self.board.scalar_summary(
                        'total_loss',
                        v.GetMedianValue(),
                        stats['iter'],
                    )
                else:
                    self.board.scalar_summary(
                        k,
                        v.GetMedianValue(),
                        stats['iter'],
                    )
    def step(self, display=False):
        stats = self.solver.one_step()
        self.add_metrics(stats)
        self.send_metrics(stats)
        if display:
            logger.info(
                'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (
                    stats['iter'], stats['lr'],
                    self.metrics['total'].GetMedianValue(),
                    stats['time'],
                )
            )
            for k, v in self.metrics.items():
                if k == 'total':
                    continue
                logger.info(' ' * 10 + 'Train net output({}): {}'
                            .format(k, v.GetMedianValue()))
    def train_model(self):
        """Network training loop."""
        last_snapshot_iter = -1
        timer = Timer()
        model_paths = []
        start_lr = self.solver.base_lr
        while self.solver.iter < cfg.SOLVER.MAX_ITERS:
            if self.solver.iter < cfg.SOLVER.WARM_UP_ITERS:
                alpha = (self.solver.iter + 1.0) / cfg.SOLVER.WARM_UP_ITERS
                self.solver.base_lr = \
                    start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha)
            # Apply 1-step SGD update
            with timer.tic_and_toc():
                self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0)
            if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0:
                average_time = timer.average_time
                eta_seconds = average_time * (
                    cfg.SOLVER.MAX_ITERS - self.solver.iter)
                eta = str(datetime.timedelta(seconds=int(eta_seconds)))
                progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS
                logger.info(
                    '< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
                    .format(progress, timer.average_time, eta)
                )
            if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0:
                last_snapshot_iter = self.solver.iter
                model_paths.append(self.snapshot())
        if last_snapshot_iter != self.solver.iter:
            model_paths.append(self.snapshot())
        return model_paths
 def train_net(coordinator, start_iter=0):
    sw = SolverWrapper(coordinator)
    sw.solver.iter = start_iter
    logger.info('Solving...')
    model_paths = sw.train_model()
    return model_paths
--- a/lib/datasets/factory.py
+++ b/lib/datasets/factory.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # Codes are based on:
 #
 #      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/factory.py>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 from lib.datasets.taas import TaaS
 # TaaS DataSet
 _GLOBAL_DATA_SETS = {'taas': lambda source: TaaS(source)}
 def get_imdb(name):
    """Get an imdb (image database) by name."""
    keys = name.split(':')
    if len(keys) >= 2:
        cls, source = keys[0], ':'.join(keys[1:])
        if cls not in _GLOBAL_DATA_SETS:
            raise KeyError('Unknown DataSet: {}'.format(cls))
        return _GLOBAL_DATA_SETS[cls](source)
    elif os.path.exists(name):
            return _GLOBAL_DATA_SETS['taas'](name)
    else:
        raise ValueError('Illegal Database: {}' + name)
 def list_imdbs():
    """List all registered imdbs."""
    return _GLOBAL_DATA_SETS.keys()
--- a/lib/datasets/imdb.py
+++ b/lib/datasets/imdb.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # Codes are based on:
 #
 #      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/imdb.py>
 #
 # ------------------------------------------------------------
 import os
-from dragon.tools.db import LMDB
+import dragon
 from lib.core.config import cfg
 class imdb(object):
    def __init__(self, name):
        self._name = name
        self._num_classes = 0
        self._classes = []
    @property
    def name(self):
        return self._name
    @property
    def num_classes(self):
        return len(self._classes)
    @property
    def classes(self):
        return self._classes
    @property
    def cache_path(self):
        cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache'))
        if not os.path.exists(cache_path):
            os.makedirs(cache_path)
        return cache_path
    @property
    def source(self):
-        excepted_source = os.path.join(self.cache_path, self.name + '_lmdb')
+        excepted_source = os.path.join(self.cache_path, self.name)
        if not os.path.exists(excepted_source):
-            raise RuntimeError('Excepted LMDB source from: {}, '
+            raise RuntimeError(
-                               'but it is not existed.'.format(excepted_source))
+                'Excepted source from: {}, '
-        return excepted_source
+                'but it is not existed.'
+                .format(excepted_source)
-    @property
+            )
-    def num_images(self):
+        return excepted_source
-        self._db = LMDB()
-        self._db.open(self.source)
+    @property
-        num_entries = self._db.num_entries()
+    def num_images(self):
-        self._db.close()
+        return dragon.io.SeetaRecordDataset(self.source).size
-        return num_entries
+    def evaluate_detections(self, all_boxes, gt_recs, output_dir):
-    def evaluate_detections(self, all_boxes, gt_recs, output_dir):
+        pass
-        pass
+    def evaluate_masks(self, all_boxes, all_masks, output_dir):
-    def evaluate_masks(self, all_boxes, all_masks, output_dir):
+        pass
-        pass
--- a/lib/datasets/taas.py
+++ b/lib/datasets/taas.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # Codes are based on:
 #
 #      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import os
+import json
-import sys
+import os
-import json
+import sys
-import numpy as np
+import uuid
-import uuid
 import cv2
-try:
+import numpy as np
-    import cPickle
+try:
-except:
+    import cPickle
-    import pickle as cPickle
+except:
-from .imdb import imdb
+    import pickle as cPickle
-from .voc_eval import voc_bbox_eval, voc_segm_eval
+from lib.core.config import cfg
-from lib.core.config import cfg
+from lib.datasets.imdb import imdb
-from lib.utils import boxes as box_utils
+from lib.datasets.voc_eval import voc_bbox_eval
-from lib.pycocotools.mask import encode as encode_masks
+from lib.datasets.voc_eval import voc_segm_eval
+from lib.pycocotools.mask import encode as encode_masks
+from lib.utils import boxes as box_utils
-class TaaS(imdb):
-    def __init__(self, source):
-        imdb.__init__(self, 'taas')
+class TaaS(imdb):
-        self._classes = cfg.MODEL.CLASSES
+    def __init__(self, source):
-        self._source = source
+        imdb.__init__(self, 'taas')
-        self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
+        self._classes = cfg.MODEL.CLASSES
-        self._class_to_cat_id = self._class_to_ind
+        self._source = source
-        self._salt = str(uuid.uuid4())
+        self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
-        self.config = {'cleanup': True, 'use_salt': True}
+        self._class_to_cat_id = self._class_to_ind
+        self._salt = str(uuid.uuid4())
-    @property
+        self.config = {'cleanup': True, 'use_salt': True}
-    def source(self):
-        excepted_source = self._source
+    @property
-        if not os.path.exists(excepted_source):
+    def source(self):
-            raise RuntimeError('Excepted LMDB source from: {}, '
+        excepted_source = self._source
-                               'but it is not existed.'.format(excepted_source))
+        if not os.path.exists(excepted_source):
-        return excepted_source
+            raise RuntimeError(
+                'Excepted source from: {}, '
-    ##############################################
+                'but it is not existed.'
-    #                                            #
+                .format(excepted_source)
-    #                   UTILS                    #
+            )
-    #                                            #
+        return excepted_source
-    ##############################################
+    ##############################################
-    def _get_comp_id(self):
+    #                                            #
-        return '_' + self._salt if self.config['use_salt'] else ''
+    #                   UTILS                    #
+    #                                            #
-    @classmethod
+    ##############################################
-    def _get_prefix(cls, type='bbox'):
-        if type == 'bbox':
+    def _get_comp_id(self):
-            return 'detections_'
+        return '_' + self._salt if self.config['use_salt'] else ''
-        elif type == 'segm':
-            return 'segmentations_'
+    @classmethod
-        elif type == 'kpt':
+    def _get_prefix(cls, type='bbox'):
-            return 'keypoints_'
+        if type == 'bbox':
-        return ''
+            return 'detections_'
+        elif type == 'segm':
-    def _get_voc_results_T(self, results_folder, type='bbox'):
+            return 'segmentations_'
-        # experiments/model_id/results/detections_taas_<comp_id>_aeroplane.txt
+        elif type == 'kpt':
-        if type == 'bbox':
+            return 'keypoints_'
-            filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.txt'
+        return ''
-        elif type == 'segm':
-            filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.pkl'
+    def _get_voc_results_T(self, results_folder, type='bbox'):
-        else:
+        # experiments/model_id/results/detections_taas_<comp_id>_aeroplane.txt
-            raise ValueError('Type of results can be either bbox or segm.')
+        if type == 'bbox':
-        if not os.path.exists(results_folder):
+            filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.txt'
-            os.makedirs(results_folder)
+        elif type == 'segm':
-        return os.path.join(results_folder, filename)
+            filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.pkl'
+        else:
-    def _get_coco_annotations_T(self, results_folder, type='bbox'):
+            raise ValueError('Type of results can be either bbox or segm.')
-        # experiments/model_id/annotations/[GT]detections_taas_<comp_id>.json
+        if not os.path.exists(results_folder):
-        filename = '[GT]_' + self._get_prefix(type) + self._name + '.json'
+            os.makedirs(results_folder)
-        if not os.path.exists(results_folder):
+        return os.path.join(results_folder, filename)
-            os.makedirs(results_folder)
-        return os.path.join(results_folder, filename)
+    def _get_coco_annotations_T(self, results_folder, type='bbox'):
+        # experiments/model_id/annotations/[GT]detections_taas_<comp_id>.json
-    def _get_coco_results_T(self, results_folder, type='bbox'):
+        filename = '[GT]_' + self._get_prefix(type) + self._name + '.json'
-        # experiments/model_id/results/detections_taas_<comp_id>.json
+        if not os.path.exists(results_folder):
-        filename = self._get_prefix(type) + self._name + self._get_comp_id() + '.json'
+            os.makedirs(results_folder)
-        if not os.path.exists(results_folder):
+        return os.path.join(results_folder, filename)
-            os.makedirs(results_folder)
-        return os.path.join(results_folder, filename)
+    def _get_coco_results_T(self, results_folder, type='bbox'):
+        # experiments/model_id/results/detections_taas_<comp_id>.json
-    ##############################################
+        filename = self._get_prefix(type) + self._name + self._get_comp_id() + '.json'
-    #                                            #
+        if not os.path.exists(results_folder):
-    #                    VOC                     #
+            os.makedirs(results_folder)
-    #                                            #
+        return os.path.join(results_folder, filename)
-    ##############################################
+    ##############################################
-    def _write_xml_bbox_results(self, all_boxes, gt_recs, output_dir):
+    #                                            #
-        from xml.dom import minidom
+    #                    VOC                     #
-        import xml.etree.ElementTree as ET
+    #                                            #
-        ix = 0
+    ##############################################
-        for image_id, rec in gt_recs.items():
-            root = ET.Element('annotation')
+    def _write_xml_bbox_results(self, all_boxes, gt_recs, output_dir):
-            ET.SubElement(root, 'filename').text = str(image_id)
+        from xml.dom import minidom
-            for cls_ind, cls in enumerate(self.classes):
+        import xml.etree.ElementTree as ET
-                if cls == '__background__':
+        ix = 0
-                    continue
+        for image_id, rec in gt_recs.items():
-                detections = all_boxes[cls_ind][ix]
+            root = ET.Element('annotation')
-                if len(detections) == 0:
+            ET.SubElement(root, 'filename').text = str(image_id)
-                    continue
+            for cls_ind, cls in enumerate(self.classes):
-                for k in range(detections.shape[0]):
+                if cls == '__background__':
-                    if detections[k, -1] < cfg.VIS_TH:
+                    continue
-                        continue
+                detections = all_boxes[cls_ind][ix]
-                    object = ET.SubElement(root, 'object')
+                if len(detections) == 0:
-                    ET.SubElement(object, 'name').text = cls
+                    continue
-                    ET.SubElement(object, 'difficult').text = '0'
+                for k in range(detections.shape[0]):
-                    bnd_box = ET.SubElement(object, 'bndbox')
+                    if detections[k, -1] < cfg.VIS_TH:
-                    ET.SubElement(bnd_box, 'xmin').text = str(detections[k][0])
+                        continue
-                    ET.SubElement(bnd_box, 'ymin').text = str(detections[k][1])
+                    object = ET.SubElement(root, 'object')
-                    ET.SubElement(bnd_box, 'xmax').text = str(detections[k][2])
+                    ET.SubElement(object, 'name').text = cls
-                    ET.SubElement(bnd_box, 'ymax').text = str(detections[k][3])
+                    ET.SubElement(object, 'difficult').text = '0'
-            ix += 1
+                    bnd_box = ET.SubElement(object, 'bndbox')
-            rawText = ET.tostring(root)
+                    ET.SubElement(bnd_box, 'xmin').text = str(detections[k][0])
-            dom = minidom.parseString(rawText)
+                    ET.SubElement(bnd_box, 'ymin').text = str(detections[k][1])
-            with open('{}/{}.xml'.format(output_dir, image_id), 'w') as f:
+                    ET.SubElement(bnd_box, 'xmax').text = str(detections[k][2])
-                dom.writexml(f, "", "\t", "\n", "utf-8")
+                    ET.SubElement(bnd_box, 'ymax').text = str(detections[k][3])
+            ix += 1
-    def _write_voc_bbox_results(self, all_boxes, gt_recs, output_dir):
+            rawText = ET.tostring(root)
-        for cls_ind, cls in enumerate(self.classes):
+            dom = minidom.parseString(rawText)
-            if cls == '__background__':
+            with open('{}/{}.xml'.format(output_dir, image_id), 'w') as f:
-                continue
+                dom.writexml(f, "", "\t", "\n", "utf-8")
-            print('Writing {} VOC format bbox results'.format(cls))
-            filename = self._get_voc_results_T(output_dir).format(cls)
+    def _write_voc_bbox_results(self, all_boxes, gt_recs, output_dir):
-            with open(filename, 'wt') as f:
+        for cls_ind, cls in enumerate(self.classes):
-                ix = 0
+            if cls == '__background__':
-                for image_id, rec in gt_recs.items():
+                continue
-                    dets = all_boxes[cls_ind][ix]
+            print('Writing {} VOC format bbox results'.format(cls))
-                    ix += 1
+            filename = self._get_voc_results_T(output_dir).format(cls)
-                    if len(dets) == 0:
+            with open(filename, 'wt') as f:
-                        continue
+                ix = 0
-                    for k in range(dets.shape[0]):
+                for image_id, rec in gt_recs.items():
-                        f.write(
+                    dets = all_boxes[cls_ind][ix]
-                            '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'
+                    ix += 1
-                            .format(image_id, dets[k, -1],
+                    if len(dets) == 0:
-                                    dets[k, 0] + 1, dets[k, 1] + 1,
+                        continue
-                                    dets[k, 2] + 1, dets[k, 3] + 1))
+                    for k in range(dets.shape[0]):
+                        f.write(
-    def _write_voc_segm_results(self, all_boxes, all_masks, output_dir):
+                            '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'
-        for cls_inds, cls in enumerate(self.classes):
+                            .format(image_id, dets[k, -1],
-            if cls == '__background__':
+                                    dets[k, 0] + 1, dets[k, 1] + 1,
-                continue
+                                    dets[k, 2] + 1, dets[k, 3] + 1))
-            print('Writing {} VOC format segm results'.format(cls))
-            segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
+    def _write_voc_segm_results(self, all_boxes, all_masks, output_dir):
-            bbox_filename = segm_filename.replace('segmentations', 'detections')
+        for cls_inds, cls in enumerate(self.classes):
-            with open(bbox_filename, 'wb') as f:
+            if cls == '__background__':
-                cPickle.dump(all_boxes[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
+                continue
-            with open(segm_filename, 'wb') as f:
+            print('Writing {} VOC format segm results'.format(cls))
-                cPickle.dump(all_masks[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
+            segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
+            bbox_filename = segm_filename.replace('segmentations', 'detections')
-    def _do_voc_bbox_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
+            with open(bbox_filename, 'wb') as f:
-        aps = []
+                cPickle.dump(all_boxes[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
-        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
+            with open(segm_filename, 'wb') as f:
-        for i, cls in enumerate(self._classes):
+                cPickle.dump(all_masks[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
-            if cls == '__background__':
-                continue
+    def _do_voc_bbox_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
-            det_file = self._get_voc_results_T(output_dir).format(cls)
+        aps = []
-            rec, prec, ap = voc_bbox_eval(
+        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
-                det_file, gt_recs, cls,
+        for i, cls in enumerate(self._classes):
-                IoU=IoU, use_07_metric=use_07_metric,
+            if cls == '__background__':
-            )
+                continue
-            if ap > 0:
+            det_file = self._get_voc_results_T(output_dir).format(cls)
-                aps += [ap]
+            rec, prec, ap = voc_bbox_eval(
-            print('AP for {} = {:.4f}'.format(cls, ap))
+                det_file, gt_recs, cls,
-        print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
+                IoU=IoU, use_07_metric=use_07_metric,
+            )
-    def _do_voc_segm_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
+            if ap > 0:
-        aps = []
+                aps += [ap]
-        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
+            print('AP for {} = {:.4f}'.format(cls, ap))
-        for i, cls in enumerate(self.classes):
+        print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
-            if cls == '__background__':
-                continue
+    def _do_voc_segm_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
-            segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
+        aps = []
-            bbox_filename = segm_filename.replace('segmentations', 'detections')
+        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
-            ap = voc_segm_eval(
+        for i, cls in enumerate(self.classes):
-                bbox_filename, segm_filename, gt_recs, cls,
+            if cls == '__background__':
-                IoU=IoU, use_07_metric=use_07_metric,
+                continue
-            )
+            segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
-            if ap > 0:
+            bbox_filename = segm_filename.replace('segmentations', 'detections')
-                aps += [ap]
+            ap = voc_segm_eval(
-            print('AP for {} = {:.4f}'.format(cls, ap))
+                bbox_filename, segm_filename, gt_recs, cls,
-        print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
+                IoU=IoU, use_07_metric=use_07_metric,
+            )
-    ##############################################
+            if ap > 0:
-    #                                            #
+                aps += [ap]
-    #                    COCO                    #
+            print('AP for {} = {:.4f}'.format(cls, ap))
-    #                                            #
+        print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
-    ##############################################
+    ##############################################
-    @classmethod
+    #                                            #
-    def _get_coco_image_id(cls, image_name):
+    #                    COCO                    #
-        image_id = image_name.split('_')[-1].split('.')[0]
+    #                                            #
-        try:
+    ##############################################
-            return int(image_id)
-        except:
+    @classmethod
-            return image_name
+    def _get_coco_image_id(cls, image_name):
+        image_id = image_name.split('_')[-1].split('.')[0]
-    @classmethod
+        try:
-    def _encode_coco_masks(cls, masks, boxes, im_h, im_w):
+            return int(image_id)
-        num_pred = len(boxes)
+        except:
-        assert len(masks) == num_pred
+            return image_name
-        mask_image = np.zeros((im_h, im_w, num_pred), dtype=np.uint8, order='F')
-        M = masks[0].shape[0]
+    @classmethod
-        scale = (M + 2.0) / M
+    def _encode_coco_masks(cls, masks, boxes, im_h, im_w):
-        ref_boxes = box_utils.expand_boxes(boxes, scale)
+        num_pred = len(boxes)
-        ref_boxes = ref_boxes.astype(np.int32)
+        assert len(masks) == num_pred
-        padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
+        mask_image = np.zeros((im_h, im_w, num_pred), dtype=np.uint8, order='F')
-        for i in range(num_pred):
+        M = masks[0].shape[0]
-            ref_box = ref_boxes[i, :4]
+        scale = (M + 2.0) / M
-            mask = masks[i]
+        ref_boxes = box_utils.expand_boxes(boxes, scale)
-            padded_mask[1:-1, 1:-1] = mask[:, :]
+        ref_boxes = ref_boxes.astype(np.int32)
-            w = ref_box[2] - ref_box[0] + 1
+        padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
-            h = ref_box[3] - ref_box[1] + 1
+        for i in range(num_pred):
-            w = np.maximum(w, 1)
+            ref_box = ref_boxes[i, :4]
-            h = np.maximum(h, 1)
+            mask = masks[i]
-            mask = cv2.resize(padded_mask, (w, h))
+            padded_mask[1:-1, 1:-1] = mask[:, :]
-            mask = np.array(mask > cfg.TEST.BINARY_THRESH, dtype=np.uint8)
+            w = ref_box[2] - ref_box[0] + 1
-            x1 = max(ref_box[0], 0)
+            h = ref_box[3] - ref_box[1] + 1
-            y1 = max(ref_box[1], 0)
+            w = np.maximum(w, 1)
-            x2 = min(ref_box[2] + 1, im_w)
+            h = np.maximum(h, 1)
-            y2 = min(ref_box[3] + 1, im_h)
+            mask = cv2.resize(padded_mask, (w, h))
-            mask_image[y1:y2, x1:x2, i] = \
+            mask = np.array(mask > cfg.TEST.BINARY_THRESH, dtype=np.uint8)
-                mask[(y1 - ref_box[1]):(y2 - ref_box[1]),
+            x1 = max(ref_box[0], 0)
-                     (x1 - ref_box[0]):(x2 - ref_box[0])]
+            y1 = max(ref_box[1], 0)
-        return encode_masks(mask_image)
+            x2 = min(ref_box[2] + 1, im_w)
+            y2 = min(ref_box[3] + 1, im_h)
-    def _write_coco_bbox_annotations(self, gt_recs, output_dir):
+            mask_image[y1:y2, x1:x2, i] = \
-        # Build images
+                mask[(y1 - ref_box[1]):(y2 - ref_box[1]),
-        dataset = {'images': []}
+                     (x1 - ref_box[0]):(x2 - ref_box[0])]
-        for image_name, rec in gt_recs.items():
+        return encode_masks(mask_image)
-            dataset['images'].append({
-                'file_name': image_name + '.jpg',
+    def _write_coco_bbox_annotations(self, gt_recs, output_dir):
-                'id': self._get_coco_image_id(image_name),
+        # Build images
-                'height': rec['height'], 'width': rec['width'],
+        dataset = {'images': []}
-            })
+        for image_name, rec in gt_recs.items():
-        # Build categories
+            dataset['images'].append({
-        dataset['categories'] = []
+                'file_name': image_name + '.jpg',
-        for cls in self._classes:
+                'id': self._get_coco_image_id(image_name),
-            if cls == '__background__':
+                'height': rec['height'], 'width': rec['width'],
-                continue
+            })
-            dataset['categories'].append({
+        # Build categories
-                'name': cls,
+        dataset['categories'] = []
-                'id': self._class_to_ind[cls],
+        for cls in self._classes:
-            })
+            if cls == '__background__':
-        # Build annotations
+                continue
-        dataset['annotations'] = []
+            dataset['categories'].append({
-        ann_id = 0
+                'name': cls,
-        for image_name, rec in gt_recs.items():
+                'id': self._class_to_ind[cls],
-            for obj in rec['objects']:
+            })
-                x, y = obj['bbox'][0], obj['bbox'][1]
+        # Build annotations
-                w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
+        dataset['annotations'] = []
-                dataset['annotations'].append({
+        ann_id = 0
-                    'id': str(ann_id),
+        for image_name, rec in gt_recs.items():
-                    'bbox': [x, y, w, h],
+            for obj in rec['objects']:
-                    'area': w * h,
+                x, y = obj['bbox'][0], obj['bbox'][1]
-                    'iscrowd': obj['difficult'],
+                w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
-                    'image_id': self._get_coco_image_id(image_name),
+                dataset['annotations'].append({
-                    'category_id': self._class_to_ind[obj['name']],
+                    'id': str(ann_id),
-                })
+                    'bbox': [x, y, w, h],
-                ann_id += 1
+                    'area': w * h,
-        ann_file = self._get_coco_annotations_T(output_dir, type='bbox')
+                    'iscrowd': obj['difficult'],
-        with open(ann_file, 'w') as f:
+                    'image_id': self._get_coco_image_id(image_name),
-            json.dump(dataset, f)
+                    'category_id': self._class_to_ind[obj['name']],
-        return ann_file
+                })
+                ann_id += 1
-    def _write_coco_segm_annotations(self, gt_recs, output_dir):
+        ann_file = self._get_coco_annotations_T(output_dir, type='bbox')
-        # Build images
+        with open(ann_file, 'w') as f:
-        dataset = {'images': []}
+            json.dump(dataset, f)
-        for image_name, rec in gt_recs.items():
+        return ann_file
-            dataset['images'].append({
-                'file_name': image_name + '.jpg',
+    def _write_coco_segm_annotations(self, gt_recs, output_dir):
-                'id': self._get_coco_image_id(image_name),
+        # Build images
-                'height': rec['height'], 'width': rec['width'],
+        dataset = {'images': []}
-            })
+        for image_name, rec in gt_recs.items():
-        # Build categories
+            dataset['images'].append({
-        dataset['categories'] = []
+                'file_name': image_name + '.jpg',
-        for cls in self._classes:
+                'id': self._get_coco_image_id(image_name),
-            if cls == '__background__':
+                'height': rec['height'], 'width': rec['width'],
-                continue
+            })
-            dataset['categories'].append({
+        # Build categories
-                'name': cls,
+        dataset['categories'] = []
-                'id': self._class_to_ind[cls],
+        for cls in self._classes:
-            })
+            if cls == '__background__':
-        # Build annotations
+                continue
-        dataset['annotations'] = []
+            dataset['categories'].append({
-        ann_id = 0
+                'name': cls,
-        for image_name, rec in gt_recs.items():
+                'id': self._class_to_ind[cls],
-            for obj in rec['objects']:
+            })
-                x, y = obj['bbox'][0], obj['bbox'][1]
+        # Build annotations
-                w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
+        dataset['annotations'] = []
-                dataset['annotations'].append({
+        ann_id = 0
-                    'id': str(ann_id),
+        for image_name, rec in gt_recs.items():
-                    'bbox': [x, y, w, h],
+            for obj in rec['objects']:
-                    'area': w * h,
+                x, y = obj['bbox'][0], obj['bbox'][1]
-                    'segmentation': {
+                w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
-                        'size': [rec['height'], rec['width']],
+                dataset['annotations'].append({
-                        'counts': obj['mask'],
+                    'id': str(ann_id),
-                    },
+                    'bbox': [x, y, w, h],
-                    'iscrowd': obj['difficult'],
+                    'area': w * h,
-                    'image_id': self._get_coco_image_id(image_name),
+                    'segmentation': {
-                    'category_id': self._class_to_ind[obj['name']],
+                        'size': [rec['height'], rec['width']],
-                })
+                        'counts': obj['mask'],
-                ann_id += 1
+                    },
-        ann_file = self._get_coco_annotations_T(output_dir, type='segm')
+                    'iscrowd': obj['difficult'],
-        with open(ann_file, 'w') as f:
+                    'image_id': self._get_coco_image_id(image_name),
-            json.dump(dataset, f)
+                    'category_id': self._class_to_ind[obj['name']],
-        return ann_file
+                })
+                ann_id += 1
-    def _coco_bbox_results_one_category(self, boxes, cat_id, gt_recs):
+        ann_file = self._get_coco_annotations_T(output_dir, type='segm')
-        ix, results = 0, []
+        with open(ann_file, 'w') as f:
-        for image_name, rec in gt_recs.items():
+            json.dump(dataset, f)
-            dets = boxes[ix]
+        return ann_file
-            ix += 1
-            if isinstance(dets, list) and len(dets) == 0:
+    def _coco_bbox_results_one_category(self, boxes, cat_id, gt_recs):
-                continue
+        ix, results = 0, []
-            dets = dets.astype(np.float)
+        for image_name, rec in gt_recs.items():
-            scores = dets[:, -1]
+            dets = boxes[ix]
-            xs = dets[:, 0]
+            ix += 1
-            ys = dets[:, 1]
+            if isinstance(dets, list) and len(dets) == 0:
-            ws = dets[:, 2] - xs + 1
+                continue
-            hs = dets[:, 3] - ys + 1
+            dets = dets.astype(np.float)
-            results.extend(
+            scores = dets[:, -1]
-                [{'image_id': self._get_coco_image_id(image_name),
+            xs = dets[:, 0]
-                  'category_id': cat_id,
+            ys = dets[:, 1]
-                  'bbox': [xs[k], ys[k], ws[k], hs[k]],
+            ws = dets[:, 2] - xs + 1
-                  'score': scores[k],
+            hs = dets[:, 3] - ys + 1
-                  } for k in range(dets.shape[0])]
+            results.extend(
-            )
+                [{'image_id': self._get_coco_image_id(image_name),
-        return results
+                  'category_id': cat_id,
+                  'bbox': [xs[k], ys[k], ws[k], hs[k]],
-    def _coco_segm_results_one_category(self, boxes, masks, cat_id, gt_recs):
+                  'score': scores[k],
-        def filter_boxes(dets):
+                  } for k in range(dets.shape[0])]
-            boxes = dets[:, :4]
+            )
-            ws = boxes[:, 2] - boxes[:, 0]
+        return results
-            hs = boxes[:, 3] - boxes[:, 1]
-            keep = np.where((ws >= 1) & (hs >= 1))[0]
+    def _coco_segm_results_one_category(self, boxes, masks, cat_id, gt_recs):
-            return keep
+        def filter_boxes(dets):
-        results = []
+            boxes = dets[:, :4]
-        ix = 0
+            ws = boxes[:, 2] - boxes[:, 0]
-        for image_name, rec in gt_recs.items():
+            hs = boxes[:, 3] - boxes[:, 1]
-            dets = boxes[ix].astype(np.float)
+            keep = np.where((ws >= 1) & (hs >= 1))[0]
-            msks = masks[ix]
+            return keep
-            ix += 1
+        results = []
-            keep = filter_boxes(dets)
+        ix = 0
-            im_h, im_w = rec['height'], rec['width']
+        for image_name, rec in gt_recs.items():
-            if len(keep) == 0:
+            dets = boxes[ix].astype(np.float)
-                continue
+            msks = masks[ix]
-            scores = dets[:, -1]
+            ix += 1
-            mask_encode = self._encode_coco_masks(
+            keep = filter_boxes(dets)
-                msks[keep], dets[keep, :4], im_h, im_w)
+            im_h, im_w = rec['height'], rec['width']
-            for k in range(dets[keep].shape[0]):
+            if len(keep) == 0:
-                rle = mask_encode[k]
+                continue
-                if sys.version_info >= (3, 0):
+            scores = dets[:, -1]
-                    rle['counts'] = rle['counts'].decode()
+            mask_encode = self._encode_coco_masks(
-                results.append({
+                msks[keep], dets[keep, :4], im_h, im_w)
-                    'image_id': self._get_coco_image_id(image_name),
+            for k in range(dets[keep].shape[0]):
-                    'category_id': cat_id,
+                rle = mask_encode[k]
-                    'segmentation': rle,
+                if sys.version_info >= (3, 0):
-                    'score': scores[k],
+                    rle['counts'] = rle['counts'].decode()
-                })
+                results.append({
-        return results
+                    'image_id': self._get_coco_image_id(image_name),
+                    'category_id': cat_id,
-    def _write_coco_bbox_results(self, all_boxes, gt_recs, output_dir):
+                    'segmentation': rle,
-        filename = self._get_coco_results_T(output_dir)
+                    'score': scores[k],
-        results = []
+                })
-        for cls_ind, cls in enumerate(self.classes):
+        return results
-            if cls == '__background__':
-                continue
+    def _write_coco_bbox_results(self, all_boxes, gt_recs, output_dir):
-            print('Collecting {} results ({:d}/{:d})'
+        filename = self._get_coco_results_T(output_dir)
-                  .format(cls, cls_ind, self.num_classes - 1))
+        results = []
-            cat_id = self._class_to_cat_id[cls]
+        for cls_ind, cls in enumerate(self.classes):
-            results.extend(self._coco_bbox_results_one_category(
+            if cls == '__background__':
-                all_boxes[cls_ind], cat_id, gt_recs))
+                continue
-        print('Writing results json to {}'.format(filename))
+            print('Collecting {} results ({:d}/{:d})'
-        with open(filename, 'w') as fid:
+                  .format(cls, cls_ind, self.num_classes - 1))
-            json.dump(results, fid)
+            cat_id = self._class_to_cat_id[cls]
-        return filename
+            results.extend(self._coco_bbox_results_one_category(
+                all_boxes[cls_ind], cat_id, gt_recs))
-    def _write_coco_segm_results(self, all_boxes, all_masks, gt_recs, output_dir):
+        print('Writing results json to {}'.format(filename))
-        filename = self._get_coco_results_T(output_dir, type='segm')
+        with open(filename, 'w') as fid:
-        results = []
+            json.dump(results, fid)
-        for cls_ind, cls in enumerate(self.classes):
+        return filename
-            if cls == '__background__':
-                continue
+    def _write_coco_segm_results(self, all_boxes, all_masks, gt_recs, output_dir):
-            print('Collecting {} results ({:d}/{:d})'
+        filename = self._get_coco_results_T(output_dir, type='segm')
-                  .format(cls, cls_ind, self.num_classes - 1))
+        results = []
-            cat_id = self._class_to_cat_id[cls]
+        for cls_ind, cls in enumerate(self.classes):
-            results.extend(self._coco_segm_results_one_category(
+            if cls == '__background__':
-                all_boxes[cls_ind], all_masks[cls_ind], cat_id, gt_recs))
+                continue
-        print('Writing results json to {}'.format(filename))
+            print('Collecting {} results ({:d}/{:d})'
-        with open(filename, 'w') as fid:
+                  .format(cls, cls_ind, self.num_classes - 1))
-            json.dump(results, fid)
+            cat_id = self._class_to_cat_id[cls]
-        return filename
+            results.extend(self._coco_segm_results_one_category(
+                all_boxes[cls_ind], all_masks[cls_ind], cat_id, gt_recs))
-    def _do_coco_bbox_eval(self, coco, res_file):
+        print('Writing results json to {}'.format(filename))
-        from lib.pycocotools.cocoeval import COCOeval
+        with open(filename, 'w') as fid:
-        coco_dt = coco.loadRes(res_file)
+            json.dump(results, fid)
-        coco_eval = COCOeval(coco, coco_dt, 'bbox')
+        return filename
-        coco_eval.evaluate()
-        coco_eval.accumulate()
+    def _do_coco_bbox_eval(self, coco, res_file):
-        self._print_coco_eval_results(coco_eval)
+        from lib.pycocotools.cocoeval import COCOeval
+        coco_dt = coco.loadRes(res_file)
-    def _do_coco_segm_eval(self, coco, res_file):
+        coco_eval = COCOeval(coco, coco_dt, 'bbox')
-        from lib.pycocotools.cocoeval import COCOeval
+        coco_eval.evaluate()
-        coco_dt = coco.loadRes(res_file)
+        coco_eval.accumulate()
-        coco_eval = COCOeval(coco, coco_dt, 'segm')
+        self._print_coco_eval_results(coco_eval)
-        coco_eval.evaluate()
-        coco_eval.accumulate()
+    def _do_coco_segm_eval(self, coco, res_file):
-        self._print_coco_eval_results(coco_eval)
+        from lib.pycocotools.cocoeval import COCOeval
+        coco_dt = coco.loadRes(res_file)
-    def _print_coco_eval_results(self, coco_eval):
+        coco_eval = COCOeval(coco, coco_dt, 'segm')
-        IoU_lo_thresh = 0.5
+        coco_eval.evaluate()
-        IoU_hi_thresh = 0.95
+        coco_eval.accumulate()
+        self._print_coco_eval_results(coco_eval)
-        def _get_thr_ind(coco_eval, thr):
-            ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
+    def _print_coco_eval_results(self, coco_eval):
-                           (coco_eval.params.iouThrs < thr + 1e-5))[0][0]
+        IoU_lo_thresh = 0.5
-            iou_thr = coco_eval.params.iouThrs[ind]
+        IoU_hi_thresh = 0.95
-            assert np.isclose(iou_thr, thr)
-            return ind
+        def _get_thr_ind(coco_eval, thr):
+            ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
-        ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
+                           (coco_eval.params.iouThrs < thr + 1e-5))[0][0]
-        ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)
+            iou_thr = coco_eval.params.iouThrs[ind]
+            assert np.isclose(iou_thr, thr)
-        # Precision has dims (iou, recall, cls, area range, max dets)
+            return ind
-        # Area range index 0: all area ranges
-        # Max dets index 2: 100 per image
+        ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
-        precision = \
+        ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)
-            coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
-        ap_default = np.mean(precision[precision > -1])
+        # Precision has dims (iou, recall, cls, area range, max dets)
-        print('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
+        # Area range index 0: all area ranges
-              '~~~~'.format(IoU_lo_thresh, IoU_hi_thresh))
+        # Max dets index 2: 100 per image
-        print('{:.1f}'.format(100 * ap_default))
+        precision = \
-        for cls_ind, cls in enumerate(self.classes):
+            coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
-            if cls == '__background__':
+        ap_default = np.mean(precision[precision > -1])
-                continue
+        print('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
-            # Minus 1 because of __background__
+              '~~~~'.format(IoU_lo_thresh, IoU_hi_thresh))
-            precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
+        print('{:.1f}'.format(100 * ap_default))
-            ap = np.mean(precision[precision > -1])
+        for cls_ind, cls in enumerate(self.classes):
-            print('{:.1f}'.format(100 * ap))
+            if cls == '__background__':
+                continue
-        print('~~~~ Summary metrics ~~~~')
+            # Minus 1 because of __background__
-        coco_eval.summarize()
+            precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
+            ap = np.mean(precision[precision > -1])
-    ##############################################
+            print('{:.1f}'.format(100 * ap))
-    #                                            #
-    #                    EVAL-API                #
+        print('~~~~ Summary metrics ~~~~')
-    #                                            #
+        coco_eval.summarize()
-    ##############################################
+    ##############################################
-    def evaluate_detections(self, all_boxes, gt_recs, output_dir):
+    #                                            #
-        protocol = cfg.TEST.PROTOCOL
+    #                    EVAL-API                #
-        if 'voc' in protocol:
+    #                                            #
-            self._write_voc_bbox_results(all_boxes, gt_recs, output_dir)
+    ##############################################
-            if 'wo' not in protocol:
-                print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
+    def evaluate_detections(self, all_boxes, gt_recs, output_dir):
-                self._do_voc_bbox_eval(
+        protocol = cfg.TEST.PROTOCOL
-                    gt_recs, output_dir, IoU=0.5,
+        if 'voc' in protocol:
-                    use_07_metric='2007' in protocol)
+            self._write_voc_bbox_results(all_boxes, gt_recs, output_dir)
-                print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
+            if 'wo' not in protocol:
-                self._do_voc_bbox_eval(
+                print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
-                    gt_recs, output_dir, IoU=0.7,
+                self._do_voc_bbox_eval(
-                    use_07_metric='2007' in protocol)
+                    gt_recs, output_dir, IoU=0.5,
-        elif 'xml' in protocol:
+                    use_07_metric='2007' in protocol)
-            if cfg.EXP_DIR != '':
+                print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
-                output_dir = cfg.EXP_DIR
+                self._do_voc_bbox_eval(
-            self._write_xml_bbox_results(all_boxes, gt_recs, output_dir)
+                    gt_recs, output_dir, IoU=0.7,
-        elif 'coco' in protocol:
+                    use_07_metric='2007' in protocol)
-            from lib.pycocotools.coco import COCO
+        elif 'xml' in protocol:
-            if os.path.exists(cfg.TEST.JSON_FILE):
+            if cfg.EXP_DIR != '':
-                coco = COCO(cfg.TEST.JSON_FILE)
+                output_dir = cfg.EXP_DIR
-                # We should override category id before writing results
+            self._write_xml_bbox_results(all_boxes, gt_recs, output_dir)
-                cats = coco.loadCats(coco.getCatIds())
+        elif 'coco' in protocol:
-                self._class_to_cat_id = dict(zip(
+            from lib.pycocotools.coco import COCO
-                    [c['name'] for c in cats], coco.getCatIds()))
+            if os.path.exists(cfg.TEST.JSON_FILE):
-            else:
+                coco = COCO(cfg.TEST.JSON_FILE)
-                coco = None
+                # We should override category id before writing results
-            res_file = self._write_coco_bbox_results(
+                cats = coco.loadCats(coco.getCatIds())
-                all_boxes, gt_recs, output_dir)
+                self._class_to_cat_id = dict(zip(
-            if 'wo' not in protocol:
+                    [c['name'] for c in cats], coco.getCatIds()))
-                if coco is None:
+            else:
-                    ann_file = self._write_coco_bbox_annotations(gt_recs, output_dir)
+                coco = None
-                    coco = COCO(ann_file)
+            res_file = self._write_coco_bbox_results(
-                self._do_coco_bbox_eval(coco, res_file)
+                all_boxes, gt_recs, output_dir)
+            if 'wo' not in protocol:
-    def evaluate_segmentations(self, all_boxes, all_masks, gt_recs, output_dir):
+                if coco is None:
-        protocol = cfg.TEST.PROTOCOL
+                    ann_file = self._write_coco_bbox_annotations(gt_recs, output_dir)
-        if 'voc' in protocol:
+                    coco = COCO(ann_file)
-            self._write_voc_segm_results(all_boxes, all_masks, output_dir)
+                self._do_coco_bbox_eval(coco, res_file)
-            if 'wo' not in protocol:
-                print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
+    def evaluate_segmentations(self, all_boxes, all_masks, gt_recs, output_dir):
-                self._do_voc_segm_eval(
+        protocol = cfg.TEST.PROTOCOL
-                    gt_recs, output_dir, IoU=0.5,
+        if 'voc' in protocol:
-                    use_07_metric='2007' in protocol)
+            self._write_voc_segm_results(all_boxes, all_masks, output_dir)
-                print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
+            if 'wo' not in protocol:
-                self._do_voc_segm_eval(
+                print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
-                    gt_recs, output_dir, IoU=0.7,
+                self._do_voc_segm_eval(
-                    use_07_metric='2007' in protocol)
+                    gt_recs, output_dir, IoU=0.5,
-        elif 'coco' in protocol:
+                    use_07_metric='2007' in protocol)
-            from lib.pycocotools.coco import COCO
+                print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
-            if os.path.exists(cfg.TEST.JSON_FILE):
+                self._do_voc_segm_eval(
-                coco = COCO(cfg.TEST.JSON_FILE)
+                    gt_recs, output_dir, IoU=0.7,
-                # We should override category id before writing results
+                    use_07_metric='2007' in protocol)
-                cats = coco.loadCats(coco.getCatIds())
+        elif 'coco' in protocol:
-                self._class_to_cat_id = dict(
+            from lib.pycocotools.coco import COCO
-                    zip([c['name'] for c in cats], coco.getCatIds()))
+            if os.path.exists(cfg.TEST.JSON_FILE):
-            else:
+                coco = COCO(cfg.TEST.JSON_FILE)
-                coco = None
+                # We should override category id before writing results
-            res_file = self._write_coco_segm_results(all_boxes, all_masks, gt_recs, output_dir)
+                cats = coco.loadCats(coco.getCatIds())
-            if 'wo' not in protocol:
+                self._class_to_cat_id = dict(
-                if coco is None:
+                    zip([c['name'] for c in cats], coco.getCatIds()))
-                    coco = COCO(self._write_coco_segm_annotations(gt_recs, output_dir))
+            else:
-                self._do_coco_segm_eval(coco, res_file)
+                coco = None
+            res_file = self._write_coco_segm_results(all_boxes, all_masks, gt_recs, output_dir)
-    def competition_mode(self, on):
+            if 'wo' not in protocol:
-        if on:
+                if coco is None:
-            self.config['use_salt'] = False
+                    coco = COCO(self._write_coco_segm_annotations(gt_recs, output_dir))
-            self.config['cleanup'] = False
+                self._do_coco_segm_eval(coco, res_file)
-        else:
-            self.config['use_salt'] = True
+    def competition_mode(self, on):
-            self.config['cleanup'] = True
+        if on:
+            self.config['use_salt'] = False
+            self.config['cleanup'] = False
+        else:
+            self.config['use_salt'] = True
+            self.config['cleanup'] = True
--- a/lib/datasets/voc_eval.py
+++ b/lib/datasets/voc_eval.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # Codes are based on:
 #
 #      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import cv2
 import numpy as np
 try:
    import cPickle
 except:
    import pickle as cPickle
 from lib.core.config import cfg
 from lib.pycocotools.mask_utils import mask_rle2im
 from lib.utils.boxes import expand_boxes
-from lib.utils.mask_transform import mask_overlap
+from lib.utils.mask import mask_overlap
 def voc_ap(rec, prec, use_07_metric=False):
    """ ap = voc_ap(rec, prec, [use_07_metric])
    Compute VOC AP given precision and recall.
    If use_07_metric is true, uses the
    VOC 07 11 point method (default:False).
    """
    if use_07_metric:
        # 11 point metric
        ap = 0.
        for t in np.arange(0., 1.1, 0.1):
            if np.sum(rec >= t) == 0:
                p = 0
            else:
                p = np.max(prec[rec >= t])
            ap = ap + p / 11.
    else:
        # correct AP calculation
        # first append sentinel values at the end
        mrec = np.concatenate(([0.], rec, [1.]))
        mpre = np.concatenate(([0.], prec, [0.]))
        # compute the precision envelope
        for i in range(mpre.size - 1, 0, -1):
            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
        # to calculate area under PR curve, look for points
        # where X axis (recall) changes value
        i = np.where(mrec[1:] != mrec[:-1])[0]
        # and sum (\Delta recall) * prec
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap
 def voc_bbox_eval(
    det_file,
    gt_recs,
    cls_name,
    IoU=0.5,
    use_07_metric=False,
 ):
    class_recs = {}
    n_pos = 0
    for image_name, rec in gt_recs.items():
        R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
        bbox = np.array([x['bbox'] for x in R])
        difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
        det = [False] * len(R)
        n_pos = n_pos + sum(~difficult)
        class_recs[image_name] = {
            'bbox': bbox,
            'difficult': difficult,
            'det': det
        }
    # Read detections
    with open(det_file, 'r') as f:
        lines = f.readlines()
    splitlines = [x.strip().split(' ') for x in lines]
    image_ids = [x[0] for x in splitlines]
    confidence = np.array([float(x[1]) for x in splitlines])
    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
    # Avoid IndexError if detecting nothing
    if len(BB) == 0:
        return 0, 0, -1
    # Sort by confidence
    sorted_ind = np.argsort(-confidence)
    BB = BB[sorted_ind, :]
    image_ids = [image_ids[x] for x in sorted_ind]
    # Go down detections and mark TPs and FPs
    nd = len(image_ids)
    tp, fp = np.zeros(nd), np.zeros(nd)
    for d in range(nd):
        R = class_recs[image_ids[d]]
        bb = BB[d, :].astype(float)
        ovmax, jmax = -np.inf, 0
        BBGT = R['bbox'].astype(float)
        if BBGT.size > 0:
            # Compute overlaps intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
            iymax = np.minimum(BBGT[:, 3], bb[3])
            iw = np.maximum(ixmax - ixmin + 1., 0.)
            ih = np.maximum(iymax - iymin + 1., 0.)
            inters = iw * ih
            # Union
            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
            overlaps = inters / uni
            ovmax = np.max(overlaps)
            jmax = np.argmax(overlaps)
        if ovmax > IoU:
            if not R['difficult'][jmax]:
                if not R['det'][jmax]:
                    tp[d] = 1.
                    R['det'][jmax] = 1
                else:
                    fp[d] = 1.
        else:
            fp[d] = 1.
    # compute precision recall
    fp = np.cumsum(fp)
    tp = np.cumsum(tp)
    rec = tp / float(n_pos)
    # avoid divide by zero in case the first detection matches a difficult
    # ground truth
    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
    ap = voc_ap(rec, prec, use_07_metric)
    return rec, prec, ap
 def voc_segm_eval(
    det_file,
    seg_file,
    gt_recs,
    cls_name,
    IoU=0.5,
    use_07_metric=False,
 ):
    # 0. Constants
    M = cfg.MRCNN.RESOLUTION
    binary_thresh = cfg.TEST.BINARY_THRESH
    scale = (M + 2.0) / M
    padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
    # 1. Get bbox & mask ground truths
    image_names, class_recs, n_pos = [], {}, 0
    for image_name, rec in gt_recs.items():
        R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
        bbox = np.array([x['bbox'] for x in R])
        mask = np.array([mask_rle2im([x['mask']], rec['height'], rec['width'])[0] for x in R])
        difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
        det = [False] * len(R)
        n_pos = n_pos + sum(~difficult)
        class_recs[image_name] = {
            'bbox': bbox,
            'mask': mask,
            'difficult': difficult,
            'det': det
        }
        image_names.append(image_name)
    # 2. Get predict pickle file for this class
    with open(det_file, 'rb') as f:
        boxes_pkl = cPickle.load(f)
    with open(seg_file, 'rb') as f:
        masks_pkl = cPickle.load(f)
    # 3. Pre-compute number of total instances to allocate memory
    num_images = len(gt_recs)
    box_num = 0
    for im_i in range(num_images):
        box_num += len(boxes_pkl[im_i])
    # avoid IndexError if detecting nothing
    if box_num == 0:
        return 0, 0, -1
    # 4. Re-organize all the predicted boxes
    new_boxes = np.zeros((box_num, 5))
    new_masks = np.zeros((box_num, M, M))
    new_images = []
    cnt = 0
    for image_ind in range(num_images):
        boxes = boxes_pkl[image_ind]
        masks = masks_pkl[image_ind]
        num_instance = len(boxes)
        for box_ind in range(num_instance):
            new_boxes[cnt] = boxes[box_ind]
            new_masks[cnt] = masks[box_ind]
            new_images.append(image_names[image_ind])
            cnt += 1
    # 5. Rearrange boxes according to their scores
    seg_scores = new_boxes[:, -1]
    keep_inds = np.argsort(-seg_scores)
    new_boxes = new_boxes[keep_inds, :]
    new_masks = new_masks[keep_inds, :, :]
    num_pred = new_boxes.shape[0]
    # 6. Calculate t/f positive
    fp = np.zeros((num_pred, 1))
    tp = np.zeros((num_pred, 1))
    ref_boxes = expand_boxes(new_boxes, scale)
    ref_boxes = ref_boxes.astype(np.int32)
    for i in range(num_pred):
        image_name = new_images[keep_inds[i]]
        if image_name not in class_recs:
            print('Warning: {} does not exist in the ground-truths.'.format(image_name))
            fp[i] = 1
            continue
        R = class_recs[image_name]
        im_h = gt_recs[image_name]['height']
        im_w = gt_recs[image_name]['width']
        # Decode mask
        ref_box = ref_boxes[i, :4]
        mask = new_masks[i]
        padded_mask[1:-1, 1:-1] = mask[:, :]
        w = ref_box[2] - ref_box[0] + 1
        h = ref_box[3] - ref_box[1] + 1
        w = np.maximum(w, 1)
        h = np.maximum(h, 1)
        mask = cv2.resize(padded_mask, (w, h))
        mask = np.array(mask > binary_thresh, dtype=np.uint8)
        x1 = max(ref_box[0], 0)
        y1 = max(ref_box[1], 0)
        x2 = min(ref_box[2] + 1, im_w)
        y2 = min(ref_box[3] + 1, im_h)
        pred_mask = mask[(y1 - ref_box[1]): (y2 - ref_box[1]),
                         (x1 - ref_box[0]): (x2 - ref_box[0])]
        # Calculate max region overlap
        ovmax, jmax = -1, -1
        for j in range(len(R['det'])):
            gt_mask_bound = R['bbox'][j].astype(int)
            pred_mask_bound = new_boxes[i, :4].astype(int)
            crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1,
                                     gt_mask_bound[0]:gt_mask_bound[2] + 1]
            ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask)
            if ov > ovmax:
                ovmax = ov
                jmax = j
        if ovmax > IoU:
            if not R['difficult'][jmax]:
                if not R['det'][jmax]:
                    tp[i] = 1.
                    R['det'][jmax] = 1
                else:
                    fp[i] = 1.
        else:
            fp[i] = 1
    # 7. Calculate precision
    fp = np.cumsum(fp)
    tp = np.cumsum(tp)
    rec = tp / float(n_pos)
    # avoid divide by zero in case the first matches a difficult gt
    prec = tp / np.maximum(fp + tp, np.finfo(np.float64).eps)
    ap = voc_ap(rec, prec, use_07_metric=use_07_metric)
    return ap
--- a/lib/faster_rcnn/__init__.py
+++ b/lib/faster_rcnn/__init__.py
@@ -13,7 +13,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer
+from lib.faster_rcnn.anchor_target_layer import AnchorTargetLayer
-from lib.faster_rcnn.layers.data_layer import DataLayer
+from lib.faster_rcnn.data_layer import DataLayer
-from lib.faster_rcnn.layers.proposal_layer import ProposalLayer
+from lib.faster_rcnn.proposal_layer import ProposalLayer
-from lib.faster_rcnn.layers.proposal_target_layer import ProposalTargetLayer
+from lib.faster_rcnn.proposal_target_layer import ProposalTargetLayer
--- a/lib/faster_rcnn/layers/anchor_target_layer.py
+++ b/lib/faster_rcnn/layers/anchor_target_layer.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import numpy as np
 import numpy.random as npr
 import dragon.vm.torch as torch
 from lib.core.config import cfg
 from lib.utils import logger
 from lib.utils.blob import blob_to_tensor
 from lib.utils.boxes import bbox_transform
 from lib.utils.boxes import dismantle_gt_boxes
 from lib.utils.cython_bbox import bbox_overlaps
 from lib.faster_rcnn.generate_anchors import generate_anchors
 class AnchorTargetLayer(torch.nn.Module):
    """Assign anchors to ground-truth targets."""
    def __init__(self):
        super(AnchorTargetLayer, self).__init__()
        # Load the basic configs
        # C4 backbone takes the first stride
        self.scales = cfg.RPN.SCALES
        self.stride = cfg.RPN.STRIDES[0]
        self.ratios = cfg.RPN.ASPECT_RATIOS
        # Allow boxes to sit over the edge by a small amount
        self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
        # Generate base anchors
        self.base_anchors = generate_anchors(
            base_size=self.stride,
            ratios=self.ratios,
            scales=np.array(self.scales),
        )
    def forward(self, features, gt_boxes, ims_info):
        """Produces anchor classification labels and bounding-box regression targets.
        Parameters
        ----------
        features : sequence of dragon.vm.torch.Tensor
            The features of specific conv layers.
        gt_boxes : numpy.ndarray
            The packed ground-truth boxes.
        ims_info : numpy.ndarray
            The information of input images.
        """
        num_images = cfg.TRAIN.IMS_PER_BATCH
        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
        if len(gt_boxes_wide) != num_images:
            logger.fatal(
                'Input {} images, got {} slices of gt boxes.'
                .format(num_images, len(gt_boxes_wide))
            )
        # Generate proposals from shifted anchors
        height, width = features[0].shape[-2:]
        shift_x = np.arange(0, width) * self.stride
        shift_y = np.arange(0, height) * self.stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()
        # Add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # Reshape to (K * A, 4) shifted anchors
        A = self.base_anchors.shape[0]
        K = shifts.shape[0]
        all_anchors = (self.base_anchors.reshape((1, A, 4)) +
                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)
        # label: 1 is positive, 0 is negative, -1 is don not care
        all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32)
        all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
        all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
        all_bbox_outside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
        for ix in range(num_images):
            # GT boxes (x1, y1, x2, y2, label)
            gt_boxes = gt_boxes_wide[ix]
            im_info = ims_info[ix]
            if self._allowed_border >= 0:
                # Only keep anchors inside the image
                inds_inside = np.where(
                    (all_anchors[:, 0] >= -self._allowed_border) &
                    (all_anchors[:, 1] >= -self._allowed_border) &
                    (all_anchors[:, 2] < im_info[1] + self._allowed_border) &
                    (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
                anchors = all_anchors[inds_inside, :]
            else:
                inds_inside = np.arange(all_anchors.shape[0])
                anchors = all_anchors
            num_inside = len(inds_inside)
            # label: 1 is positive, 0 is negative, -1 is don't care
            labels = np.empty((num_inside,), dtype=np.float32)
            labels.fill(-1)
            # Overlaps between the anchors and the gt boxes
            overlaps = bbox_overlaps(
                np.ascontiguousarray(anchors, dtype=np.float),
                np.ascontiguousarray(gt_boxes, dtype=np.float),
            )
            argmax_overlaps = overlaps.argmax(axis=1)
            max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
            gt_argmax_overlaps = overlaps.argmax(axis=0)
            gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
            if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
                # Assign bg labels first so that positive labels can clobber them
                labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
            # fg label: for each gt, anchor with highest overlap
            labels[gt_argmax_overlaps] = 1
            # fg label: above threshold IOU
            labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
            if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
                # Assign bg labels last so that negative labels can clobber positives
                labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
            # Subsample positive labels if we have too many
            num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
            fg_inds = np.where(labels == 1)[0]
            if len(fg_inds) > num_fg:
                disable_inds = npr.choice(
                    fg_inds,
                    size=len(fg_inds) - num_fg,
                    replace=False,
                )
                labels[disable_inds] = -1
                fg_inds = np.where(labels == 1)[0]
            # Subsample negative labels if we have too many
            num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
            bg_inds = np.where(labels == 0)[0]
            if len(bg_inds) > num_bg:
                disable_inds = npr.choice(
                    bg_inds,
                    size=len(bg_inds) - num_bg,
                    replace=False,
                )
                labels[disable_inds] = -1
            bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
            bbox_targets[fg_inds, :] = bbox_transform(
                ex_rois=anchors[fg_inds, :],
-                gt_rois=gt_boxes[argmax_overlaps[fg_inds], 0:4],
+                gt_rois=gt_boxes[argmax_overlaps[fg_inds], :4],
            )
            bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
-            bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
+            bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
            bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
            bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
            bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
            all_labels[ix, inds_inside] = labels  # label
            all_bbox_targets[ix, inds_inside] = bbox_targets
            all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights
            all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights
        labels = all_labels \
            .reshape((num_images, height, width, A)) \
            .transpose(0, 3, 1, 2) \
            .reshape((num_images, total_anchors))
        bbox_targets = all_bbox_targets \
            .reshape((num_images, height, width, A * 4)) \
            .transpose(0, 3, 1, 2)
        bbox_inside_weights = all_bbox_inside_weights \
            .reshape((num_images, height, width, A * 4)) \
            .transpose(0, 3, 1, 2)
        bbox_outside_weights = all_bbox_outside_weights \
            .reshape((num_images, height, width, A * 4)) \
            .transpose(0, 3, 1, 2)
        return {
            'labels': blob_to_tensor(labels),
            'bbox_targets': blob_to_tensor(bbox_targets),
            'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
            'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
        }
--- a/lib/faster_rcnn/data/__init__.py
+++ b/lib/faster_rcnn/data/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
--- a/lib/faster_rcnn/data/blob_fetcher.py
+++ b/lib/faster_rcnn/data/blob_fetcher.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import multiprocessing
-import numpy as np
-from lib.core.config import cfg
-from lib.utils.blob import im_list_to_blob
-class BlobFetcher(multiprocessing.Process):
-    def __init__(self, **kwargs):
-        super(BlobFetcher, self).__init__()
-        self.q1_in = self.q2_in = self.q_out = None
-        self.daemon = True
-    def get(self, Q_in):
-        processed_ims, ims_info, all_boxes = [], [], []
-        for ix in range(cfg.TRAIN.IMS_PER_BATCH):
-            im, im_scale, gt_boxes = Q_in.get()
-            processed_ims.append(im)
-            ims_info.append(list(im.shape[0:2]) + [im_scale])
-            # Encode boxes by adding the idx of images
-            im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), dtype=np.float32)
-            im_boxes[:, 0:gt_boxes.shape[1]] = gt_boxes
-            im_boxes[:, -1] = ix
-            all_boxes.append(im_boxes)
-        return {
-            'data': im_list_to_blob(processed_ims),
-            'ims_info': np.array(ims_info, dtype=np.float32),
-            'gt_boxes': np.concatenate(all_boxes, axis=0),
-        }
-    def run(self):
-        while True:
-            if self.q1_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
-                self.q_out.put(self.get(self.q1_in))
-            elif self.q2_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
-                self.q_out.put(self.get(self.q2_in))
--- a/lib/faster_rcnn/data/data_reader.py
+++ b/lib/faster_rcnn/data/data_reader.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import math
-import multiprocessing
-import numpy
-from dragon.tools import db
-from lib.core.config import cfg
-class DataReader(multiprocessing.Process):
-    """Collect encoded str from `LMDB`_.
-    Partition and shuffle records over distributed nodes.
-    Parameters
-    ----------
-    source : str
-        The path of database.
-    shuffle : bool, optional, default=False
-        Whether to shuffle the data.
-    num_chunks : int, optional, default=2048
-        The number of chunks to split.
-    """
-    def __init__(self, **kwargs):
-        """Create a DataReader."""
-        super(DataReader, self).__init__()
-        self._source = kwargs.get('source', '')
-        self._use_shuffle = kwargs.get('shuffle', False)
-        self._num_chunks = kwargs.get('num_chunks', 2048)
-        self._part_idx, self._num_parts = 0, 1
-        self._cursor, self._chunk_cursor = 0, 0
-        self._chunk_size, self._perm_size = 0, 0
-        self._head, self._tail, self._num_entries = 0, 0, 0
-        self._db, self._zfill, self._perm = None, None, None
-        self._rng_seed = cfg.RNG_SEED
-        self.q_out = None
-        self.daemon = True
-    def element(self):
-        """Get the value of current record.
-        Returns
-        -------
-        str
-            The encoded str.
-        """
-        return self._db.value()
-    def redirect(self, target):
-        """Redirect to the target position.
-        Parameters
-        ----------
-        target : int
-            The key of the record.
-        Notes
-        -----
-        The redirection reopens the database.
-        You can drop caches by ``echo 3 > /proc/sys/vm/drop_caches``.
-        This will disturb getting stuck when *Database Size* >> *RAM Size*.
-        """
-        self._db.close()
-        self._db.open(self._source)
-        self._cursor = target
-        self._db.set(str(target).zfill(self._zfill))
-    def reset(self):
-        """Reset the cursor and environment."""
-        if self._num_parts > 1 or self._use_shuffle:
-            self._chunk_cursor = 0
-            self._part_idx = (self._part_idx + 1) % self._num_parts
-            if self._use_shuffle:
-                self._perm = numpy.random.permutation(self._perm_size)
-            self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
-            self._tail = self._head * self._chunk_size
-            if self._head >= self._num_entries: self.next_chunk()
-            self._tail = self._head + self._chunk_size
-            self._tail = min(self._num_entries, self._tail)
-        else:
-            self._head, self._tail = 0, self._num_entries
-        self.redirect(self._head)
-    def next_record(self):
-        """Step the cursor of records."""
-        self._db.next()
-        self._cursor += 1
-    def next_chunk(self):
-        """Step the cursor of chunks."""
-        self._chunk_cursor += 1
-        if self._chunk_cursor >= self._perm_size:
-            self.reset()
-        else:
-            self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
-            self._head = self._head * self._chunk_size
-            if self._head >= self._num_entries:
-                self.next_chunk()
-            else:
-                self._tail = self._head + self._chunk_size
-                self._tail = min(self._num_entries, self._tail)
-            self.redirect(self._head)
-    def run(self):
-        """Start the process."""
-        # Fix seed
-        numpy.random.seed(self._rng_seed)
-        # Init db
-        self._db = db.LMDB()
-        self._db.open(self._source)
-        self._zfill = self._db.zfill()
-        self._num_entries = self._db.num_entries()
-        epoch_size = self._num_entries // self._num_parts + 1
-        if self._use_shuffle:
-            if self._num_chunks <= 0:
-                # Each chunk has at most 1 record (Record-Wise)
-                self._chunk_size, self._perm_size = 1, epoch_size
-            else:
-                # Search a optimal chunk size (Chunk-Wise)
-                min_size, max_size = \
-                    1, self._db._total_size * 1.0 \
-                    / (self._num_chunks * (1 << 20))
-                while min_size * 2 < max_size: min_size *= 2
-                self._perm_size = int(math.ceil(
-                    self._db._total_size * 1.1 /
-                    (self._num_parts * min_size << 20)))
-                self._chunk_size = int(
-                    self._num_entries * 1.0 /
-                    (self._perm_size * self._num_parts) + 1)
-                limit = (self._num_parts - 0.5) * self._perm_size * self._chunk_size
-                if self._num_entries <= limit:
-                    # Roll back to Record-Wise shuffle
-                    self._chunk_size, self._perm_size = 1, epoch_size
-        else:
-            # One chunk has at most K records
-            self._chunk_size, self._perm_size = epoch_size, 1
-        self._perm = numpy.arange(self._perm_size)
-        # Init env
-        self.reset()
-        # Run!
-        while True:
-            self.q_out.put(self.element())
-            self.next_record()
-            if self._cursor >= self._tail:
-                if self._num_parts > 1 or self._use_shuffle:
-                    self.next_chunk()
-                else:
-                    self.reset()
--- a/lib/faster_rcnn/data/data_batch.py
+++ b/lib/faster_rcnn/data/data_batch.py
@@ -13,55 +13,70 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-from multiprocessing import Queue
+import multiprocessing as mp
 import time
 import dragon
-import pprint
+import dragon.vm.torch as torch
+import numpy as np
 from lib.core.config import cfg
-from lib.faster_rcnn.data.data_reader import DataReader
+from lib.faster_rcnn.data_transformer import DataTransformer
-from lib.faster_rcnn.data.data_transformer import DataTransformer
+from lib.datasets.factory import get_imdb
-from lib.faster_rcnn.data.blob_fetcher import BlobFetcher
 from lib.utils import logger
+from lib.utils.blob import im_list_to_blob
-class DataBatch(object):
+class DataLayer(torch.nn.Module):
-    """DataBatch aims to prefetch data by ``Triple-Buffering``.
+    """Generate a mini-batch of data."""
-    It takes full advantages of the Process/Thread of Python,
+    def __init__(self):
+        super(DataLayer, self).__init__()
+        database = get_imdb(cfg.TRAIN.DATABASE)
+        self.data_batch = DataBatch(**{
+            'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
+            'classes': database.classes,
+            'shuffle': cfg.TRAIN.USE_SHUFFLE,
+            'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
+            'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
+        })
+    def forward(self):
+        # Get an array blob from the Queue
+        outputs = self.data_batch.get()
+        # Zero-Copy the array to tensor
+        outputs['data'] = torch.from_numpy(outputs['data'])
+        return outputs
+class DataBatch(mp.Process):
+    """Prefetch the batch of data."""
-    which provides remarkable I/O speed up for scalable distributed training.
-    """
    def __init__(self, **kwargs):
        """Construct a ``DataBatch``.
        Parameters
        ----------
-        source : str
+        dataset : lambda
-            The path of database.
+            The creator of a dataset.
        shuffle : bool, optional, default=False
            Whether to shuffle the data.
-        num_chunks : int, optional, default=2048
+        num_chunks : int, optional, default=0
            The number of chunks to split.
-        batch_size : int, optional, default=128
+        batch_size : int, optional, default=2
            The size of a mini-batch.
        prefetch : int, optional, default=5
            The prefetch count.
        """
        super(DataBatch, self).__init__()
-        # Init mpi
+        # Distributed settings
-        global_rank, local_rank, group_size = 0, 0, 1
+        rank, group_size = 0, 1
-        if dragon.mpi.is_init():
+        process_group = dragon.distributed.get_default_process_group()
-            group = dragon.mpi.is_parallel()
+        if process_group is not None and kwargs.get(
-            if group is not None:  # DataParallel
+                'phase', 'TRAIN') == 'TRAIN':
-                global_rank = dragon.mpi.rank()
+            group_size = process_group.size
-                group_size = len(group)
+            rank = dragon.distributed.get_rank(process_group)
-                for i, node in enumerate(group):
-                    if global_rank == node:
-                        local_rank = i
        kwargs['group_size'] = group_size
        # Configuration
@@ -71,6 +86,7 @@ class DataBatch(object):
        self._num_transformers = kwargs.get('num_transformers', -1)
        self._max_transformers = kwargs.get('max_transformers', 3)
        self._num_fetchers = kwargs.get('num_fetchers', 1)
+        self.daemon = True
        # Io-Aware Policy
        if self._num_transformers == -1:
@@ -81,66 +97,52 @@ class DataBatch(object):
        self._num_transformers = min(
            self._num_transformers, self._max_transformers)
-        # Init queues
+        # Initialize queues
-        self.Q1 = Queue(self._prefetch * self._num_readers * self._batch_size)
+        num_batches = self._prefetch * self._num_readers
-        self.Q21 = Queue(self._prefetch * self._num_readers * self._batch_size)
+        self.Q1 = mp.Queue(num_batches * self._batch_size)
-        self.Q22 = Queue(self._prefetch * self._num_readers * self._batch_size)
+        self.Q21 = mp.Queue(num_batches * self._batch_size)
-        self.Q3 = Queue(self._prefetch * self._num_readers)
+        self.Q22 = mp.Queue(num_batches * self._batch_size)
+        self.Q3 = mp.Queue(num_batches)
-        # Init readers
+        # Initialize readers
        self._readers = []
        for i in range(self._num_readers):
-            self._readers.append(DataReader(**kwargs))
-            self._readers[-1].q_out = self.Q1
-        for i in range(self._num_readers):
            part_idx, num_parts = i, self._num_readers
            num_parts *= group_size
-            part_idx += local_rank * self._num_readers
+            part_idx += rank * self._num_readers
-            self._readers[i]._num_parts = num_parts
+            self._readers.append(dragon.io.DataReader(
-            self._readers[i]._part_idx = part_idx
+                num_parts=num_parts, part_idx=part_idx, **kwargs))
-            self._readers[i]._rng_seed += part_idx
+            self._readers[i]._seed += part_idx
+            self._readers[i].q_out = self.Q1
            self._readers[i].start()
            time.sleep(0.1)
-        # Init transformers
+        # Initialize transformers
        self._transformers = []
        for i in range(self._num_transformers):
            transformer = DataTransformer(**kwargs)
-            transformer._rng_seed += (i + local_rank * self._num_transformers)
+            transformer._rng_seed += (i + rank * self._num_transformers)
            transformer.q_in = self.Q1
-            transformer.q1_out = self.Q21
+            transformer.q1_out, transformer.q2_out = self.Q21, self.Q22
-            transformer.q2_out = self.Q22
            transformer.start()
            self._transformers.append(transformer)
            time.sleep(0.1)
-        # Init blob fetchers
+        # Initialize batch-producer
-        self._fetchers = []
+        self.start()
-        for i in range(self._num_fetchers):
-            fetcher = BlobFetcher(**kwargs)
-            fetcher.q1_in = self.Q21
-            fetcher.q2_in = self.Q22
-            fetcher.q_out = self.Q3
-            fetcher.start()
-            self._fetchers.append(fetcher)
-            time.sleep(0.1)
-        # Prevent to echo multiple nodes
-        if local_rank == 0:
-            self.echo()
+        # Register cleanup callbacks
        def cleanup():
            def terminate(processes):
                for process in processes:
                    process.terminate()
                    process.join()
-            terminate(self._fetchers)
+            terminate([self])
-            logger.info('Terminating BlobFetcher ......')
+            logger.info('Terminate DataBatch.')
            terminate(self._transformers)
-            logger.info('Terminating DataTransformer ......')
+            logger.info('Terminate DataTransformer.')
            terminate(self._readers)
-            logger.info('Terminating DataReader......')
+            logger.info('Terminate DataReader.')
        import atexit
        atexit.register(cleanup)
@@ -156,20 +158,27 @@ class DataBatch(object):
        """
        return self.Q3.get()
-    def echo(self):
+    def run(self):
-        """Print I/O Information.
+        """Start the process to produce batches."""
+        def produce(q_in):
-        Returns
+            processed_ims, ims_info, all_boxes = [], [], []
-        -------
+            for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
-        None
+                im, im_scale, gt_boxes = q_in.get()
+                processed_ims.append(im)
-        """
+                ims_info.append(list(im.shape[:2]) + [im_scale])
-        print('---------------------------------------------------------')
+                im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
-        print('BatchFetcher({} Threads), Using config:'.format(
+                im_boxes[:, :gt_boxes.shape[1]], im_boxes[:, -1] = gt_boxes, image_index
-            self._num_readers + self._num_transformers + self._num_fetchers))
+                all_boxes.append(im_boxes)
-        params = {'queue_size': self._prefetch,
+            return {
-                  'n_readers': self._num_readers,
+                'data': im_list_to_blob(processed_ims),
-                  'n_transformers': self._num_transformers,
+                'ims_info': np.array(ims_info, dtype=np.float32),
-                  'n_fetchers': self._num_fetchers}
+                'gt_boxes': np.concatenate(all_boxes, axis=0),
-        pprint.pprint(params)
+            }
-        print('---------------------------------------------------------')
+        q1, q2 = self.Q21, self.Q22
+        while True:
+            if q1.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
+                self.Q3.put(produce(q1))
+            elif q2.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
+                self.Q3.put(produce(q2))
+            q1, q2 = q2, q1  # Sample two queues uniformly
--- a/lib/faster_rcnn/data/data_transformer.py
+++ b/lib/faster_rcnn/data/data_transformer.py
@@ -14,22 +14,13 @@ from __future__ import division
 from __future__ import print_function
 import multiprocessing
-import numpy as np
-import numpy.random as npr
-try:
+import cv2
-    import cv2
+import numpy as np
-except ImportError as e:
-    print('Failed to import cv2. Error: {0}'.format(str(e)))
-try:
-    import PIL.Image
-except ImportError as e:
-    print('Failed to import PIL. Error: {0}'.format(str(e)))
 from lib.core.config import cfg
-from lib.proto import anno_pb2 as pb
-from lib.utils import logger
 from lib.utils.blob import prep_im_for_blob
+from lib.utils.boxes import flip_boxes
 class DataTransformer(multiprocessing.Process):
@@ -47,44 +38,45 @@ class DataTransformer(multiprocessing.Process):
    def make_roi_dict(
        self,
-        ann_datum,
+        example,
        im_scale,
        apply_flip=False,
        offsets=None,
    ):
-        annotations = ann_datum.annotation
        n_objects = 0
        if not self._use_diff:
-            for ann in annotations:
+            for obj in example['object']:
-                if not ann.difficult:
+                if obj.get('difficult', 0) == 0:
                    n_objects += 1
        else:
-            n_objects = len(annotations)
+            n_objects = len(example['object'])
        roi_dict = {
-            'width': ann_datum.datum.width,
+            'width': example['width'],
-            'height': ann_datum.datum.height,
+            'height': example['height'],
            'gt_classes': np.zeros((n_objects,), 'int32'),
            'boxes': np.zeros((n_objects, 4), 'float32'),
        }
        # Filter the difficult instances
-        rec_idx = 0
+        object_idx = 0
-        for ann in annotations:
+        for obj in example['object']:
-            if not self._use_diff and ann.difficult:
+            if not self._use_diff and \
+                    obj.get('difficult', 0) > 0:
                continue
-            roi_dict['boxes'][rec_idx, :] = [
+            roi_dict['boxes'][object_idx, :] = [
-                max(0, ann.x1),
+                max(0, obj['xmin']),
-                max(0, ann.y1),
+                max(0, obj['ymin']),
-                min(ann.x2, ann_datum.datum.width - 1),
+                min(obj['xmax'], example['width'] - 1),
-                min(ann.y2, ann_datum.datum.height - 1),
+                min(obj['ymax'], example['height'] - 1),
            ]
-            roi_dict['gt_classes'][rec_idx] = self._class_to_ind[ann.name]
+            roi_dict['gt_classes'][object_idx] = \
-            rec_idx += 1
+                self._class_to_ind[obj['name']]
+            object_idx += 1
        # Flip the boxes if necessary
        if apply_flip:
-            roi_dict['boxes'] = _flip_boxes(
+            roi_dict['boxes'] = flip_boxes(
                roi_dict['boxes'], roi_dict['width'])
        # Scale the boxes to the detecting scale
@@ -102,50 +94,34 @@ class DataTransformer(multiprocessing.Process):
        return roi_dict
    @classmethod
-    def get_image(cls, serialized):
+    def get_image(cls, example):
-        datum = pb.AnnotatedDatum()
+        img = np.frombuffer(example['content'], np.uint8)
-        datum.ParseFromString(serialized)
+        return cv2.imdecode(img, -1)
-        datum = datum.datum
-        im = np.fromstring(datum.data, np.uint8)
-        return cv2.imdecode(im, -1) if datum.encoded is True else \
-            im.reshape((datum.height, datum.width, datum.channels))
    @classmethod
-    def get_annotations(cls, serialized):
+    def get_annotations(cls, example):
-        datum = pb.AnnotatedDatum()
-        datum.ParseFromString(serialized)
-        filename = datum.filename
-        annotations = datum.annotation
        objects = []
-        for ix, ann in enumerate(annotations):
+        for ix, obj in enumerate(example['object']):
            objects.append({
-                'name': ann.name,
+                'name': obj['name'],
-                'difficult': int(ann.difficult),
+                'difficult': obj.get('difficult', 0),
-                'bbox': [ann.x1, ann.y1, ann.x2, ann.y2],
+                'bbox': [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']],
-                'mask': ann.mask,
            })
-        return filename, objects
+        return example['id'], objects
-    def get(self, serialized):
+    def get(self, example):
-        datum = pb.AnnotatedDatum()
+        img = np.frombuffer(example['content'], np.uint8)
-        datum.ParseFromString(serialized)
+        img = cv2.imdecode(img, -1)
-        im_datum = datum.datum
-        im = np.fromstring(im_datum.data, np.uint8)
-        if im_datum.encoded is True:
-            im = cv2.imdecode(im, -1)
-        else:
-            h, w = im_datum.height, im_datum.width
-            im = im.reshape((h, w, im_datum.channels))
        # Scale
-        scale_indices = npr.randint(len(cfg.TRAIN.SCALES))
+        scale_indices = np.random.randint(len(cfg.TRAIN.SCALES))
        target_size = cfg.TRAIN.SCALES[scale_indices]
-        im, im_scale, jitter = prep_im_for_blob(im, target_size, cfg.TRAIN.MAX_SIZE)
+        im, im_scale, jitter = prep_im_for_blob(img, target_size, cfg.TRAIN.MAX_SIZE)
        # Flip
        apply_flip = False
        if self._use_flipped:
-            if npr.randint(0, 2) > 0:
+            if np.random.randint(2) > 0:
                im = im[:, ::-1, :]
                apply_flip = True
@@ -160,8 +136,8 @@ class DataTransformer(multiprocessing.Process):
            # To a square (target_size, target_size)
            im, offsets = _get_image_with_target_size([target_size] * 2, im)
-        # Datum -> RoIDict
+        # Example -> RoIDict
-        roi_dict = self.make_roi_dict(datum, im_scale, apply_flip, offsets)
+        roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets)
        # Post-Process for gt boxes
        # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
@@ -171,29 +147,16 @@ class DataTransformer(multiprocessing.Process):
        return im, im_scale, gt_boxes
    def run(self):
-        npr.seed(self._rng_seed)
+        np.random.seed(self._rng_seed)
        while True:
-            serialized = self.q_in.get()
+            outputs = self.get(self.q_in.get())
-            data = self.get(serialized)
+            if len(outputs[2]) < 1:
-            # Ensure that there should be at least 1 ground-truth
+                continue  # Ignore the non-object image
-            if len(data[2]) < 1:
+            aspect_ratio = float(outputs[0].shape[0]) / outputs[0].shape[1]
-                continue
+            if aspect_ratio > 1.:
-            aspect_ratio = float(data[0].shape[0]) / data[0].shape[1]
+                self.q1_out.put(outputs)
-            if aspect_ratio > 1.0:
-                self.q1_out.put(data)
            else:
-                self.q2_out.put(data)
+                self.q2_out.put(outputs)
-def _flip_boxes(boxes, width):
-    flip_boxes = boxes.copy()
-    old_x1 = boxes[:, 0].copy()
-    old_x2 = boxes[:, 2].copy()
-    flip_boxes[:, 0] = width - old_x2 - 1
-    flip_boxes[:, 2] = width - old_x1 - 1
-    if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
-        logger.fatal('Encounter invalid coordinates after flipping boxes.')
-    return flip_boxes
 def _get_image_with_target_size(target_size, img):

--- a/lib/faster_rcnn/generate_anchors.py
+++ b/lib/faster_rcnn/generate_anchors.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # Codes are based on:
 #
 #      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/generate_anchors.py>
 #
 # ------------------------------------------------------------
-import numpy as np
+from __future__ import absolute_import
+from __future__ import division
-# Verify that we compute the same anchors as Shaoqing's matlab implementation:
+from __future__ import print_function
-#
-#    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
+import numpy as np
-#    >> anchors
-#
+# Verify that we compute the same anchors as Shaoqing's matlab implementation:
-#    anchors =
+#
-#
+#    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
-#       -83   -39   100    56
+#    >> anchors
-#      -175   -87   192   104
+#
-#      -359  -183   376   200
+#    anchors =
-#       -55   -55    72    72
+#
-#      -119  -119   136   136
+#       -83   -39   100    56
-#      -247  -247   264   264
+#      -175   -87   192   104
-#       -35   -79    52    96
+#      -359  -183   376   200
-#       -79  -167    96   184
+#       -55   -55    72    72
-#      -167  -343   184   360
+#      -119  -119   136   136
+#      -247  -247   264   264
-# array([[ -83.,  -39.,  100.,   56.],
+#       -35   -79    52    96
-#       [-175.,  -87.,  192.,  104.],
+#       -79  -167    96   184
-#       [-359., -183.,  376.,  200.],
+#      -167  -343   184   360
-#       [ -55.,  -55.,   72.,   72.],
-#       [-119., -119.,  136.,  136.],
+# array([[ -83.,  -39.,  100.,   56.],
-#       [-247., -247.,  264.,  264.],
+#       [-175.,  -87.,  192.,  104.],
-#       [ -35.,  -79.,   52.,   96.],
+#       [-359., -183.,  376.,  200.],
-#       [ -79., -167.,   96.,  184.],
+#       [ -55.,  -55.,   72.,   72.],
-#       [-167., -343.,  184.,  360.]])
+#       [-119., -119.,  136.,  136.],
+#       [-247., -247.,  264.,  264.],
+#       [ -35.,  -79.,   52.,   96.],
-def generate_anchors(
+#       [ -79., -167.,   96.,  184.],
-    base_size=16,
+#       [-167., -343.,  184.,  360.]])
-    ratios=(0.5, 1, 2),
-    scales=2**np.arange(3, 6),
-):
+def generate_anchors(
-    """
+    base_size=16,
-    Generate anchor (reference) windows by enumerating aspect ratios X
+    ratios=(0.5, 1, 2),
-    scales wrt a reference (0, 0, 15, 15) window.
+    scales=2**np.arange(3, 6),
-    """
+):
-    base_anchor = np.array([1, 1, base_size, base_size]) - 1
+    """
-    ratio_anchors = _ratio_enum(base_anchor, ratios)
+    Generate anchor (reference) windows by enumerating aspect ratios X
-    anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
+    scales wrt a reference (0, 0, 15, 15) window.
-                         for i in range(ratio_anchors.shape[0])])
+    """
-    return anchors
+    base_anchor = np.array([1, 1, base_size, base_size]) - 1
+    ratio_anchors = _ratio_enum(base_anchor, ratios)
+    anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
-def generate_anchors_v2(
+                         for i in range(ratio_anchors.shape[0])])
-    stride=16,
+    return anchors
-    ratios=(0.5, 1, 2),
-    sizes=(32, 64, 128, 256, 512),
-):
+def generate_anchors_v2(
-    """
+    stride=16,
-    Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
+    ratios=(0.5, 1, 2),
-    are centered on stride / 2, have (approximate) sqrt areas of the specified
+    sizes=(32, 64, 128, 256, 512),
-    sizes, and aspect ratios as given.
+):
    """
-    return generate_anchors(
+    Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
-        base_size=stride,
+    are centered on stride / 2, have (approximate) sqrt areas of the specified
-        ratios=ratios,
+    sizes, and aspect ratios as given.
-        scales=np.array(sizes, dtype=np.float) / stride,
+    """
-    )
+    return generate_anchors(
+        base_size=stride,
+        ratios=ratios,
-def _whctrs(anchor):
+        scales=np.array(sizes, dtype=np.float) / stride,
-    """Return width, height, x center, and y center for an anchor (window)."""
+    )
-    w = anchor[2] - anchor[0] + 1
-    h = anchor[3] - anchor[1] + 1
-    x_ctr = anchor[0] + 0.5 * (w - 1)
+def _whctrs(anchor):
-    y_ctr = anchor[1] + 0.5 * (h - 1)
+    """Return width, height, x center, and y center for an anchor (window)."""
-    return w, h, x_ctr, y_ctr
+    w = anchor[2] - anchor[0] + 1
+    h = anchor[3] - anchor[1] + 1
+    x_ctr = anchor[0] + 0.5 * (w - 1)
-def _mkanchors(ws, hs, x_ctr, y_ctr):
+    y_ctr = anchor[1] + 0.5 * (h - 1)
-    """
+    return w, h, x_ctr, y_ctr
-    Given a vector of widths (ws) and heights (hs) around a center
-    (x_ctr, y_ctr), output a set of anchors (windows).
-    """
+def _mkanchors(ws, hs, x_ctr, y_ctr):
-    ws = ws[:, np.newaxis]
+    """
-    hs = hs[:, np.newaxis]
+    Given a vector of widths (ws) and heights (hs) around a center
-    anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
+    (x_ctr, y_ctr), output a set of anchors (windows).
-                         y_ctr - 0.5 * (hs - 1),
+    """
-                         x_ctr + 0.5 * (ws - 1),
+    ws = ws[:, np.newaxis]
-                         y_ctr + 0.5 * (hs - 1)))
+    hs = hs[:, np.newaxis]
-    return anchors
+    anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
+                         y_ctr - 0.5 * (hs - 1),
+                         x_ctr + 0.5 * (ws - 1),
-def _ratio_enum(anchor, ratios):
+                         y_ctr + 0.5 * (hs - 1)))
-    """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
+    return anchors
-    w, h, x_ctr, y_ctr = _whctrs(anchor)
-    size = w * h
-    size_ratios = size / ratios
+def _ratio_enum(anchor, ratios):
-    ws = np.round(np.sqrt(size_ratios))
+    """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
-    hs = np.round(ws * ratios)
+    w, h, x_ctr, y_ctr = _whctrs(anchor)
-    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+    size = w * h
-    return anchors
+    size_ratios = size / ratios
+    ws = np.round(np.sqrt(size_ratios))
+    hs = np.round(ws * ratios)
-def _scale_enum(anchor, scales):
+    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
-    """Enumerate a set of anchors for each scale wrt an anchor."""
+    return anchors
-    w, h, x_ctr, y_ctr = _whctrs(anchor)
-    ws = w * scales
-    hs = h * scales
+def _scale_enum(anchor, scales):
-    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+    """Enumerate a set of anchors for each scale wrt an anchor."""
-    return anchors
+    w, h, x_ctr, y_ctr = _whctrs(anchor)
+    ws = w * scales
+    hs = h * scales
-if __name__ == '__main__':
+    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
-    print(generate_anchors())
+    return anchors
+if __name__ == '__main__':
+    print(generate_anchors())
--- a/lib/faster_rcnn/layers/__init__.py
+++ b/lib/faster_rcnn/layers/__init__.py
-# --------------------------------------------------------
-# Mask R-CNN @ Detectron
-# Copyright (c) 2017 SeetaTech
-# Written by Ting Pan
-# --------------------------------------------------------
\ No newline at end of file
--- a/lib/faster_rcnn/layers/data_layer.py
+++ b/lib/faster_rcnn/layers/data_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import dragon.vm.torch as torch
-from lib.core.config import cfg
-from lib.datasets.factory import get_imdb
-from lib.faster_rcnn.data.data_batch import DataBatch
-class DataLayer(torch.nn.Module):
-    def __init__(self):
-        super(DataLayer, self).__init__()
-        database = get_imdb(cfg.TRAIN.DATABASE)
-        self.data_batch = DataBatch(**{
-            'source': database.source,
-            'classes': database.classes,
-            'shuffle': cfg.TRAIN.USE_SHUFFLE,
-            'num_chunks': 0,  # Record-Wise Shuffle
-            'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
-        })
-    def forward(self):
-        # Get an array blob from the Queue
-        outputs = self.data_batch.get()
-        # Zero-Copy the array to tensor
-        outputs['data'] = torch.from_numpy(outputs['data'])
-        return outputs
--- a/lib/faster_rcnn/layers/proposal_layer.py
+++ b/lib/faster_rcnn/layers/proposal_layer.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # --------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import dragon.vm.torch as torch
 import numpy as np
 from lib.core.config import cfg
 from lib.faster_rcnn.generate_anchors import generate_anchors
 from lib.nms.nms_wrapper import nms
 from lib.utils.blob import blob_to_tensor
 from lib.utils.boxes import bbox_transform_inv
 from lib.utils.boxes import clip_tiled_boxes
 from lib.utils.boxes import filter_boxes
 class ProposalLayer(torch.nn.Module):
    """
    Compute proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").
    """
    def __init__(self):
        super(ProposalLayer, self).__init__()
        # Load the basic configs
        self.scales = cfg.RPN.SCALES
        self.stride = cfg.RPN.STRIDES[0]
        self.ratios = cfg.RPN.ASPECT_RATIOS
        # Generate base anchors
        self.base_anchors = generate_anchors(
            base_size=self.stride,
            ratios=self.ratios,
            scales=np.array(self.scales),
        )
    def forward(self, features, cls_prob, bbox_pred, ims_info):
        cfg_key = 'TRAIN' if self.training else 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        # Get resources
        num_images = ims_info.shape[0]
        # Generate proposals from shifted anchors
        height, width = cls_prob.shape[-2:]
        shift_x = np.arange(0, width) * self.stride
        shift_y = np.arange(0, height) * self.stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()
        # Add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # Reshape to (K * A, 4) shifted anchors
        A = self.base_anchors.shape[0]
        K = shifts.shape[0]
        anchors = \
            self.base_anchors.reshape((1, A, 4)) + \
            shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        all_anchors = anchors.reshape((K * A, 4))
        # Prepare for the outputs
        batch_rois = []
        # scores & deltas are (1, A, H, W) format
        # Transpose to (1, H, W, A)
        batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1))
        batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1))
        # Extract RoIs separately
        for ix in range(num_images):
            scores = batch_scores[ix].reshape((-1, 1))  # [1, n] -> [n, 1]
            deltas = batch_deltas[ix].reshape((-1, 4))
            if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
                order = np.argsort(-scores.squeeze())
            else:
                # Avoid sorting possibly large arrays; First partition to get top K
                # unsorted and then sort just those (~20x faster for 200k scores)
                inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
                order = np.argsort(-scores[inds].squeeze())
                order = inds[order]
            deltas = deltas[order]
            anchors = all_anchors[order]
            scores = scores[order]
            # 1. Convert anchors into proposals via bbox transformations
            proposals = bbox_transform_inv(anchors, deltas)
            # 2. Clip predicted boxes to image
            proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
            proposals = proposals[keep, :]
            scores = scores[keep]
            # 6. Apply nms (e.g. threshold = 0.7)
            # 7. Take after_nms_topN (e.g. 300)
            # 8. Return the top proposals (-> RoIs top)
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            # Output rois blob
            batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
            batch_inds.fill(ix)
            rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
            batch_rois.append(rpn_rois)
        # Merge RoIs into a blob
        rpn_rois = np.concatenate(batch_rois, axis=0)
        if cfg_key == 'TRAIN':
            return rpn_rois
        else:
            return [blob_to_tensor(rpn_rois)]
--- a/lib/faster_rcnn/layers/proposal_target_layer.py
+++ b/lib/faster_rcnn/layers/proposal_target_layer.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # --------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import dragon.vm.torch as torch
 import numpy as np
 import numpy.random as npr
 from lib.core.config import cfg
 from lib.utils.blob import blob_to_tensor
 from lib.utils.boxes import bbox_transform
 from lib.utils.boxes import dismantle_gt_boxes
 from lib.utils.cython_bbox import bbox_overlaps
 class ProposalTargetLayer(torch.nn.Module):
    """Assign object detection proposals to ground-truth targets."""
    def __init__(self):
        super(ProposalTargetLayer, self).__init__()
        self.num_classes = cfg.MODEL.NUM_CLASSES
    def forward(self, rpn_rois, gt_boxes):
        num_images = cfg.TRAIN.IMS_PER_BATCH
        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
        all_rois = rpn_rois
        # GT boxes (x1, y1, x2, y2, label)
        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
        # Prepare for the outputs
        keys = ['labels', 'rois', 'bbox_targets',
                'bbox_inside_weights', 'bbox_outside_weights']
        batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
        # Generate targets separately
        for ix in range(num_images):
            gt_boxes = gt_boxes_wide[ix]
            # Extract proposals for this image
            rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
            # Include ground-truth boxes in the set of candidate rois
            inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
            rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
            # Sample a batch of rois for training
            rois_per_image = cfg.TRAIN.BATCH_SIZE
            fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
            labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
                rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
            bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
            _fmap_batch([
                labels,
                rois, 
                bbox_targets,
                bbox_inside_weights,
                bbox_outside_weights],
                batch_outputs,
                keys,
            )
        # Merge targets into blobs
        for k, v in batch_outputs.items():
            batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
        return {
            'rois': [blob_to_tensor(batch_outputs['rois'])],
            'labels': blob_to_tensor(batch_outputs['labels']),
            'bbox_targets': blob_to_tensor(batch_outputs['bbox_targets']),
            'bbox_inside_weights': blob_to_tensor(batch_outputs['bbox_inside_weights']),
            'bbox_outside_weights': blob_to_tensor(batch_outputs['bbox_outside_weights']),
        }
 def _get_bbox_regression_labels(bbox_target_data, num_classes):
    """Bounding-box regression targets (bbox_target_data) are stored in a
    compact form N x (class, tx, ty, tw, th)
    This function expands those targets into the 4-of-4*K representation used
    by the network (i.e. only one class has non-zero targets).
    Returns:
        bbox_target (ndarray): N x 4K blob of regression targets
        bbox_inside_weights (ndarray): N x 4K blob of loss weights
    """
    clss = bbox_target_data[:, 0]
    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
    bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
    inds = np.where(clss > 0)[0]
    for ind in inds:
        cls = clss[ind]
        start = 4 * cls
        end = start + 4
        bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
        bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
    return bbox_targets, bbox_inside_weights
 def _compute_targets(ex_rois, gt_rois, labels):
    """Compute bounding-box regression targets for an image."""
    assert ex_rois.shape[0] == gt_rois.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois.shape[1] == 4
    targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
    return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
 def _sample_rois(
    all_rois,
    gt_boxes,
    fg_rois_per_image,
    rois_per_image,
    num_classes,
 ):
    """Generate a random sample of RoIs."""
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float),
    )
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]
    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[fg_rois_per_this_image:] = 0
    rois = all_rois[keep_inds]
    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)
    return labels, rois, bbox_targets, bbox_inside_weights
 def _fmap_batch(inputs, outputs, keys):
    for i, key in enumerate(keys):
        outputs[key].append(inputs[i])
--- a/lib/faster_rcnn/test.py
+++ b/lib/faster_rcnn/test.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import dragon.vm.torch as torch
 import numpy as np
 from lib.core.config import cfg
 from lib.nms.nms_wrapper import nms
 from lib.nms.nms_wrapper import soft_nms
 from lib.utils.blob import im_list_to_blob
 from lib.utils.blob import tensor_to_blob
 from lib.utils.boxes import bbox_transform_inv
 from lib.utils.boxes import clip_tiled_boxes
 from lib.utils.image import scale_image
 from lib.utils.timer import Timer
 from lib.utils.vis import vis_one_image
 def im_detect(detector, raw_image):
    """Detect a image, with single or multiple scales."""
    # Prepare images
    ims, ims_scale = scale_image(raw_image)
    # Prepare blobs
    blobs = {'data': im_list_to_blob(ims)}
    blobs['ims_info'] = np.array([
        list(blobs['data'].shape[1:3]) + [im_scale]
        for im_scale in ims_scale], dtype=np.float32)
-    blobs['data'] = torch.from_numpy(blobs['data'])
+    blobs['data'] = torch.from_numpy(blobs['data'])
-    # Do Forward
-    with torch.no_grad():
+    # Do Forward
-        outputs = detector.forward(inputs=blobs)
+    with torch.no_grad():
+        outputs = detector.forward(inputs=blobs)
-    # Decode results
-    batch_rois = tensor_to_blob(outputs['rois'])
+    # Decode results
-    batch_scores = tensor_to_blob(outputs['cls_prob'])
+    batch_rois = tensor_to_blob(outputs['rois'])
-    batch_deltas = tensor_to_blob(outputs['bbox_pred'])
+    batch_scores = tensor_to_blob(outputs['cls_prob'])
+    batch_deltas = tensor_to_blob(outputs['bbox_pred'])
-    batch_boxes = bbox_transform_inv(
-        boxes=batch_rois[:, 1:5],
+    batch_boxes = bbox_transform_inv(
-        deltas=batch_deltas,
+        boxes=batch_rois[:, 1:5],
-        weights=cfg.BBOX_REG_WEIGHTS,
+        deltas=batch_deltas,
-    )
+        weights=cfg.BBOX_REG_WEIGHTS,
+    )
-    scores_wide, boxes_wide = [], []
+    scores_wide, boxes_wide = [], []
-    for im_idx in range(len(ims)):
-        indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0]
+    for im_idx in range(len(ims)):
-        boxes = batch_boxes[indices]
+        indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0]
-        boxes /= ims_scale[im_idx]
+        boxes = batch_boxes[indices]
-        clip_tiled_boxes(boxes, raw_image.shape)
+        boxes /= ims_scale[im_idx]
-        scores_wide.append(batch_scores[indices])
+        clip_tiled_boxes(boxes, raw_image.shape)
-        boxes_wide.append(boxes)
+        scores_wide.append(batch_scores[indices])
+        boxes_wide.append(boxes)
-    return (np.vstack(scores_wide), np.vstack(boxes_wide)) \
-        if len(scores_wide) > 1 else (scores_wide[0], boxes_wide[0])
+    return (np.vstack(scores_wide), np.vstack(boxes_wide)) \
+        if len(scores_wide) > 1 else (scores_wide[0], boxes_wide[0])
-def test_net(detector, server):
-    # Load settings
+def test_net(detector, server):
-    classes = server.classes
+    # Load settings
-    num_images = server.num_images
+    classes = server.classes
-    num_classes = server.num_classes
+    num_images = server.num_images
-    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
+    num_classes = server.num_classes
+    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
-    _t = {'im_detect': Timer(), 'misc': Timer()}
+    _t = {'im_detect': Timer(), 'misc': Timer()}
-    for i in range(num_images):
-        image_id, raw_image = server.get_image()
+    for i in range(num_images):
+        image_id, raw_image = server.get_image()
-        _t['im_detect'].tic()
-        scores, boxes = im_detect(detector, raw_image)
+        _t['im_detect'].tic()
-        _t['im_detect'].toc()
+        scores, boxes = im_detect(detector, raw_image)
+        _t['im_detect'].toc()
-        _t['misc'].tic()
-        boxes_this_image = [[]]
+        _t['misc'].tic()
-        for j in range(1, num_classes):
+        boxes_this_image = [[]]
-            inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
+        for j in range(1, num_classes):
-            cls_scores = scores[inds, j]
+            inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
-            cls_boxes = boxes[inds, j*4:(j+1)*4]
+            cls_scores = scores[inds, j]
-            cls_detections = np.hstack(
+            cls_boxes = boxes[inds, j*4:(j+1)*4]
-                (cls_boxes, cls_scores[:, np.newaxis])
+            cls_detections = np.hstack(
-            ).astype(np.float32, copy=False)
+                (cls_boxes, cls_scores[:, np.newaxis])
-            if cfg.TEST.USE_SOFT_NMS:
+            ).astype(np.float32, copy=False)
-                keep = soft_nms(
+            if cfg.TEST.USE_SOFT_NMS:
-                    cls_detections, cfg.TEST.NMS,
+                keep = soft_nms(
-                    method=cfg.TEST.SOFT_NMS_METHOD,
+                    cls_detections, cfg.TEST.NMS,
-                    sigma=cfg.TEST.SOFT_NMS_SIGMA,
+                    method=cfg.TEST.SOFT_NMS_METHOD,
-                )
+                    sigma=cfg.TEST.SOFT_NMS_SIGMA,
-            else:
+                )
-                keep = nms(cls_detections, cfg.TEST.NMS, force_cpu=True)
+            else:
-            cls_detections = cls_detections[keep, :]
+                keep = nms(cls_detections, cfg.TEST.NMS, force_cpu=True)
-            all_boxes[j][i] = cls_detections
+            cls_detections = cls_detections[keep, :]
-            boxes_this_image.append(cls_detections)
+            all_boxes[j][i] = cls_detections
+            boxes_this_image.append(cls_detections)
-        if cfg.VIS or cfg.VIS_ON_FILE:
-            vis_one_image(
+        if cfg.VIS or cfg.VIS_ON_FILE:
-                raw_image, classes, boxes_this_image,
+            vis_one_image(
-                thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
+                raw_image, classes, boxes_this_image,
-                filename=server.get_save_filename(image_id),
+                thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
-            )
+                filename=server.get_save_filename(image_id),
+            )
-        # Limit to max_per_image detections *over all classes*
-        if cfg.TEST.DETECTIONS_PER_IM > 0:
+        # Limit to max_per_image detections *over all classes*
-            image_scores = []
+        if cfg.TEST.DETECTIONS_PER_IM > 0:
-            for j in range(1, num_classes):
+            image_scores = []
-                if len(all_boxes[j][i]) < 1: continue
+            for j in range(1, num_classes):
-                image_scores.append(all_boxes[j][i][:, -1])
+                if len(all_boxes[j][i]) < 1: continue
-            if len(image_scores) > 0:
+                image_scores.append(all_boxes[j][i][:, -1])
-                image_scores = np.hstack(image_scores)
+            if len(image_scores) > 0:
-            if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
+                image_scores = np.hstack(image_scores)
-                image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
+            if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
-                for j in range(1, num_classes):
+                image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
-                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
+                for j in range(1, num_classes):
-                    all_boxes[j][i] = all_boxes[j][i][keep, :]
+                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
-        _t['misc'].toc()
+                    all_boxes[j][i] = all_boxes[j][i][keep, :]
+        _t['misc'].toc()
-        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
-              .format(i + 1, num_images, _t['im_detect'].average_time,
+        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
-                      _t['misc'].average_time), end='')
+              .format(i + 1, num_images,
+                      _t['im_detect'].average_time,
-    print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
+                      _t['misc'].average_time),
+              end='')
-    print('Evaluating detections')
-    server.evaluate_detections(all_boxes)
+    print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
+    print('Evaluating detections')
+    server.evaluate_detections(all_boxes)
--- a/lib/fpn/__init__.py
+++ b/lib/fpn/__init__.py
@@ -13,6 +13,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-from lib.fpn.layers.anchor_target_layer import AnchorTargetLayer
+from lib.fpn.anchor_target_layer import AnchorTargetLayer
-from lib.fpn.layers.proposal_layer import ProposalLayer
+from lib.fpn.proposal_layer import ProposalLayer
-from lib.fpn.layers.proposal_target_layer import ProposalTargetLayer
+from lib.fpn.proposal_target_layer import ProposalTargetLayer
--- a/lib/fpn/layers/anchor_target_layer.py
+++ b/lib/fpn/layers/anchor_target_layer.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import collections
 import dragon.vm.torch as torch
 import numpy as np
 import numpy.random as npr
 from lib.core.config import cfg
 from lib.faster_rcnn.generate_anchors import generate_anchors
 from lib.utils import logger
 from lib.utils.blob import blob_to_tensor
 from lib.utils.boxes import bbox_transform
 from lib.utils.boxes import dismantle_gt_boxes
 from lib.utils.cython_bbox import bbox_overlaps
 class AnchorTargetLayer(torch.nn.Module):
    """Assign anchors to ground-truth targets."""
    def __init__(self):
        super(AnchorTargetLayer, self).__init__()
        # Load the basic configs
        self.scales = cfg.RPN.SCALES
        self.strides = cfg.RPN.STRIDES
        self.ratios = cfg.RPN.ASPECT_RATIOS
        if len(self.scales) != len(self.strides):
            logger.fatal(
                'Given {} scales and {} strides.'
                .format(len(self.scales), len(self.strides))
            )
        # Allow boxes to sit over the edge by a small amount
        self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
        # Generate base anchors
        self.base_anchors = []
        for i in range(len(self.strides)):
            base_size, scale = self.strides[i], self.scales[i]
            if not isinstance(scale, collections.Iterable):
                scale = [scale]
            self.base_anchors.append(
                generate_anchors(
                    base_size=base_size,
                    ratios=self.ratios,
                    scales=np.array(scale),
                )
            )
    def forward(self, features, gt_boxes, ims_info):
        """Produces anchor classification labels and bounding-box regression targets."""
        num_images = cfg.TRAIN.IMS_PER_BATCH
        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
        if len(gt_boxes_wide) != num_images:
            logger.fatal(
                'Input {} images, got {} slices of gt boxes.'
                .format(num_images, len(gt_boxes_wide))
            )
        # Generate proposals from shifted anchors
        all_anchors, total_anchors = [], 0
        for i in range(len(self.strides)):
            height, width = features[i].shape[-2:]
            shift_x = np.arange(0, width) * self.strides[i]
            shift_y = np.arange(0, height) * self.strides[i]
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()
            # Add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # Reshape to (K * A, 4) shifted anchors
            A = self.base_anchors[i].shape[0]
            K = shifts.shape[0]
            anchors = (self.base_anchors[i].reshape((1, A, 4)) +
                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
            # [K, A, 4] -> [A, K, 4]
            anchors = anchors.transpose((1, 0, 2))
            anchors = anchors.reshape((A * K, 4))
            all_anchors.append(anchors)
            total_anchors += anchors.shape[0]
        all_anchors = np.vstack(all_anchors)
        # label: 1 is positive, 0 is negative, -1 is don't care
        labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
        bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
        bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
        bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
        for ix in range(num_images):
            # GT boxes (x1, y1, x2, y2, label, has_mask)
            gt_boxes = gt_boxes_wide[ix]
            im_info = ims_info[ix]
            if self._allowed_border >= 0:
                # Only keep anchors inside the image
                inds_inside = np.where(
                    (all_anchors[:, 0] >= -self._allowed_border) &
                    (all_anchors[:, 1] >= -self._allowed_border) &
                    (all_anchors[:, 2] < im_info[1] + self._allowed_border) &
                    (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
                anchors = all_anchors[inds_inside, :]
            else:
                inds_inside = np.arange(all_anchors.shape[0])
                anchors = all_anchors
            num_inside = len(inds_inside)
            # label: 1 is positive, 0 is negative, -1 is don't care
            labels = np.empty((num_inside,), dtype=np.float32)
            labels.fill(-1)
            # Overlaps between the anchors and the gt boxes
            overlaps = bbox_overlaps(
                np.ascontiguousarray(anchors, dtype=np.float),
                np.ascontiguousarray(gt_boxes, dtype=np.float),
            )
            argmax_overlaps = overlaps.argmax(axis=1)
            max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
            gt_argmax_overlaps = overlaps.argmax(axis=0)
            gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                       np.arange(overlaps.shape[1])]
            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
            # fg label: for each gt, anchor with highest overlap
            labels[gt_argmax_overlaps] = 1
            # fg label: above threshold IOU
            labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
            # bg label: below threshold IOU
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
            # Subsample positive labels if we have too many
            num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
            fg_inds = np.where(labels == 1)[0]
            if len(fg_inds) > num_fg:
                disable_inds = npr.choice(
                    fg_inds, size=(len(fg_inds) - num_fg), replace=False)
                labels[disable_inds] = -1
                fg_inds = np.where(labels == 1)[0]
            # Subsample negative labels if we have too many
            num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
            bg_inds = np.where(labels == 0)[0]
            if len(bg_inds) > num_bg:
                disable_inds = npr.choice(
                    bg_inds, size=(len(bg_inds) - num_bg), replace=False)
                labels[disable_inds] = -1
            bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
            bbox_targets[fg_inds, :] = bbox_transform(
                anchors[fg_inds, :],
                gt_boxes[argmax_overlaps[fg_inds], 0:4],
            )
            bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
            bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
            bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
            bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
            bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
            labels_wide[ix, inds_inside] = labels  # label
            bbox_targets_wide[ix, inds_inside] = bbox_targets
            bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
            bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
        labels = labels_wide.reshape((num_images, total_anchors))
        bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
        bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
        bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
        return {
            'labels': blob_to_tensor(labels),
            'bbox_targets': blob_to_tensor(bbox_targets),
            'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
            'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
        }
--- a/lib/fpn/layers/__init__.py
+++ b/lib/fpn/layers/__init__.py
-# --------------------------------------------------------
-# Mask R-CNN @ Detectron
-# Copyright (c) 2017 SeetaTech
-# Written by Ting Pan
-# --------------------------------------------------------
\ No newline at end of file
--- a/lib/fpn/layers/proposal_layer.py
+++ b/lib/fpn/layers/proposal_layer.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import collections
 import dragon.vm.torch as torch
 import numpy as np
 from lib.core.config import cfg
 from lib.faster_rcnn.generate_anchors import generate_anchors
 from lib.nms.nms_wrapper import nms
 from lib.utils import logger
 from lib.utils.blob import blob_to_tensor
 from lib.utils.boxes import bbox_transform_inv
 from lib.utils.boxes import clip_tiled_boxes
 from lib.utils.boxes import filter_boxes
 class ProposalLayer(torch.nn.Module):
    """
    Compute proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").
    """
    def __init__(self):
        super(ProposalLayer, self).__init__()
        # Load the basic configs
        self.scales = cfg.RPN.SCALES
        self.strides = cfg.RPN.STRIDES
        self.ratios = cfg.RPN.ASPECT_RATIOS
        if len(self.scales) != len(self.strides):
            logger.fatal(
                'Given {} scales and {} strides.'
                .format(len(self.scales), len(self.strides))
            )
        # Generate base anchors
        self.base_anchors = []
        for i in range(len(self.strides)):
            base_size, scale = self.strides[i], self.scales[i]
            if not isinstance(scale, collections.Iterable):
                scale = [scale]
            self.base_anchors.append(
                generate_anchors(
                    base_size=base_size,
                    ratios=self.ratios,
                    scales=np.array(scale),
                )
            )
    def generate_grid_anchors(self, features):
        # Generate proposals from shifted anchors
        anchors_wide = []
        for i in range(len(self.strides)):
            height, width = features[i].shape[-2:]
            shift_x = np.arange(0, width) * self.strides[i]
            shift_y = np.arange(0, height) * self.strides[i]
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()
            # Add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # Reshape to (K * A, 4) shifted anchors
            A = self.base_anchors[i].shape[0]
            K = shifts.shape[0]
            anchors = (self.base_anchors[i].reshape((1, A, 4)) +
                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
            # [K, A, 4] -> [A, K, 4]
            anchors = anchors.transpose((1, 0, 2))
            anchors = anchors.reshape((A * K, 4))
            anchors_wide.append(anchors)
        return np.vstack(anchors_wide)
    def forward(self, features, cls_prob, bbox_pred, ims_info):
        cfg_key = 'TRAIN' if self.training else 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        # Get resources
        num_images = ims_info.shape[0]
        all_anchors = self.generate_grid_anchors(features)  # [n, 4]
        if cls_prob.shape[0] != num_images or \
                bbox_pred.shape[0] != num_images:
            logger.fatal('Incorrect num of images: {}'.format(num_images))
        # Prepare for the outputs
        batch_rois = []
        batch_scores = cls_prob.numpy(True)
        batch_deltas = bbox_pred.numpy(True) \
            .transpose((0, 2, 1))  # [?, 4, n] -> [?, n, 4]
        # Extract RoIs separately
        for ix in range(num_images):
            scores = batch_scores[ix].reshape((-1, 1))  # [1, n] -> [n, 1]
            deltas = batch_deltas[ix]  # [n, 4]
            if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
                order = np.argsort(-scores.squeeze())
            else:
                # Avoid sorting possibly large arrays; First partition to get top K
                # unsorted and then sort just those (~20x faster for 200k scores)
                inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
                order = np.argsort(-scores[inds].squeeze())
                order = inds[order]
            deltas = deltas[order]
            anchors = all_anchors[order]
            scores = scores[order]
            # 1. Convert anchors into proposals via bbox transformations
            proposals = bbox_transform_inv(anchors, deltas)
            # 2. Clip predicted boxes to image
            proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
            # 3. remove predicted boxes with either height or width < threshold
            keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
            proposals = proposals[keep, :]
            scores = scores[keep]
            # 6. Apply nms (e.g. threshold = 0.7)
            # 7. Take after_nms_topN (e.g. 300)
            # 8. Return the top proposals (-> RoIs top)
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            # Output rois blob
            batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
            batch_inds.fill(ix)
            rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
            batch_rois.append(rpn_rois)
        # Merge RoIs into a blob
        rpn_rois = np.concatenate(batch_rois, axis=0)
        if cfg_key == 'TRAIN':
            return rpn_rois
        else:
            # Distribute rois into K levels
            min_level = cfg.FPN.ROI_MIN_LEVEL
            max_level = cfg.FPN.ROI_MAX_LEVEL
            K = max_level - min_level + 1
            fpn_levels = _map_rois_to_fpn_levels(rpn_rois, min_level, max_level)
            all_rois = []
            for i in range(K):
                lv_indices = np.where(fpn_levels == (i + min_level))[0]
                if len(lv_indices) == 0:
                    # Fake a tiny roi to avoid empty roi pooling
                    all_rois.append(blob_to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
                else:
                    all_rois.append(blob_to_tensor(rpn_rois[lv_indices]))
            return all_rois
 def _map_rois_to_fpn_levels(rois, k_min, k_max):
    """
    Determine which FPN level each RoI in a set of RoIs
    should map to based on the heuristic in the FPN paper.
    """
    if len(rois) == 0:
        return []
    ws = rois[:, 3] - rois[:, 1] + 1
    hs = rois[:, 4] - rois[:, 2] + 1
    s = np.sqrt(ws * hs)
    s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
    lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4
    target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
    return np.clip(target_levels, k_min, k_max)
--- a/lib/fpn/layers/proposal_target_layer.py
+++ b/lib/fpn/layers/proposal_target_layer.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import numpy as np
 import numpy.random as npr
 import dragon.vm.torch as torch
 from lib.core.config import cfg
 from lib.utils.blob import blob_to_tensor
 from lib.utils.boxes import bbox_transform
 from lib.utils.boxes import dismantle_gt_boxes
 from lib.utils.cython_bbox import bbox_overlaps
 class ProposalTargetLayer(torch.nn.Module):
    """Assign object detection proposals to ground-truth targets.
    Produces proposal classification labels and bounding-box regression targets.
    """
    def __init__(self):
        super(ProposalTargetLayer, self).__init__()
        self.num_classes = cfg.MODEL.NUM_CLASSES
        self.fake_outputs = {
            'rois': np.array([[0, 0, 0, 1, 1]], dtype=np.float32),
            'labels': np.array([-1], dtype=np.float32),
            'bbox_targets': np.zeros((1, self.num_classes * 4), dtype=np.float32),
            'bbox_inside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
            'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
        }
    def forward(self, rpn_rois, gt_boxes):
        num_images = cfg.TRAIN.IMS_PER_BATCH
        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
        all_rois = rpn_rois
        # GT boxes (x1, y1, x2, y2, label)
        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
        # Prepare for the outputs
        keys = ['labels', 'rois', 'bbox_targets',
                'bbox_inside_weights', 'bbox_outside_weights']
        outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
        batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
        # Generate targets separately
        for ix in range(num_images):
            gt_boxes = gt_boxes_wide[ix]
            # Extract proposals for this image
            rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
            # Include ground-truth boxes in the set of candidate rois
            inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
            rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
            # Sample a batch of rois for training
            rois_per_image = cfg.TRAIN.BATCH_SIZE
            fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
            labels, rois, bbox_targets, bbox_inside_weights = \
                _sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
            bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
            _fmap_batch([
                labels,
                rois,
                bbox_targets,
                bbox_inside_weights,
                bbox_outside_weights],
                batch_outputs,
                keys,
            )
        # Merge targets into blobs
        for k, v in batch_outputs.items():
            batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
        # Distribute rois into K levels
        min_level = cfg.FPN.ROI_MIN_LEVEL
        max_level = cfg.FPN.ROI_MAX_LEVEL
        K = max_level - min_level + 1
        fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
        lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)]
        _fmap_rois(
            inputs=[batch_outputs[key] for key in keys],
            fake_outputs=self.fake_outputs,
            outputs=outputs,
            keys=keys,
            levels=lvs_indices,
        )
        return {
            'rois': [blob_to_tensor(outputs['rois'][i]) for i in range(K)],
            'labels': blob_to_tensor(np.concatenate(outputs['labels'], axis=0)),
            'bbox_targets': blob_to_tensor(np.vstack(outputs['bbox_targets'])),
            'bbox_inside_weights': blob_to_tensor(np.vstack(outputs['bbox_inside_weights'])),
            'bbox_outside_weights': blob_to_tensor(np.vstack(outputs['bbox_outside_weights'])),
        }
 def _get_bbox_regression_labels(bbox_target_data, num_classes):
    """Bounding-box regression targets (bbox_target_data) are stored in a
    compact form N x (class, tx, ty, tw, th)
    This function expands those targets into the 4-of-4*K representation used
    by the network (i.e. only one class has non-zero targets).
    Returns:
        bbox_target (ndarray): N x 4K blob of regression targets
        bbox_inside_weights (ndarray): N x 4K blob of loss weights
    """
    clss = bbox_target_data[:, 0]
    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
    bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
    inds = np.where(clss > 0)[0]
    for ind in inds:
        cls = clss[ind]
        start = 4 * cls
        end = start + 4
        bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
        bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
    return bbox_targets, bbox_inside_weights
 def _compute_targets(ex_rois, gt_rois, labels):
    """Compute bounding-box regression targets for an image."""
    assert ex_rois.shape[0] == gt_rois.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois.shape[1] == 4
    targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
    return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
 def _map_rois_to_fpn_levels(rois, k_min, k_max):
    """
    Determine which FPN level each RoI in a set of RoIs
    should map to based on the heuristic in the FPN paper.
    """
    if len(rois) == 0:
        return []
    ws = rois[:, 3] - rois[:, 1] + 1
    hs = rois[:, 4] - rois[:, 2] + 1
    s = np.sqrt(ws * hs)
    s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
    lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4
    target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
    return np.clip(target_levels, k_min, k_max)
 def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
    """Sample a batch of RoIs comprising foreground and background examples."""
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]
    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[fg_rois_per_this_image:] = 0
    rois = all_rois[keep_inds]
    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)
    return labels, rois, bbox_targets, bbox_inside_weights
 def _fmap_batch(inputs, outputs, keys):
    for i, key in enumerate(keys):
        outputs[key].append(inputs[i])
 def _fmap_rois(inputs, fake_outputs, outputs, keys, levels):
    def impl(a, b, indices):
        return a[indices] if len(indices) > 0 else b
    for k in range(len(levels)):
        inds = levels[k]
        for i, key in enumerate(keys):
            outputs[key].append(impl(inputs[i], fake_outputs[key], inds))
--- a/lib/modeling/__init__.py
+++ b/lib/modeling/__init__.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 # Import custom modules
 from lib.modeling.base import affine
 from lib.modeling.base import bn
 from lib.modeling.base import conv1x1
 from lib.modeling.base import conv3x3
 from lib.modeling.fast_rcnn import FastRCNN
 from lib.modeling.fpn import FPN
 from lib.modeling.retinanet import RetinaNet
 from lib.modeling.rpn import RPN
 from lib.modeling.ssd import SSD
--- a/lib/modeling/airnet.py
+++ b/lib/modeling/airnet.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import dragon.vm.torch as torch
 from lib.modeling import affine
 from lib.modeling import conv1x1
 from lib.modeling import conv3x3
 class WideResBlock(torch.nn.Module):
    def __init__(self, dim_in, dim_out, stride=1, downsample=None):
        super(WideResBlock, self).__init__()
        self.conv1 = conv3x3(dim_in, dim_out, stride)
        self.bn1 = affine(dim_out)
        self.conv2 = conv3x3(dim_out, dim_out)
        self.bn2 = affine(dim_out)
        self.downsample = downsample
        self.relu = torch.nn.ReLU(inplace=True)
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample is not None:
            residual = self.downsample(residual)
        out += residual
        out = self.relu(out)
        return out
 class InceptionBlock(torch.nn.Module):
    def __init__(self, dim_in, dim_out):
        super(InceptionBlock, self).__init__()
        self.conv1 = conv1x1(dim_in, dim_out)
        self.bn1 = affine(dim_out)
        self.conv2 = conv3x3(dim_out, dim_out // 2)
        self.bn2 = affine(dim_out // 2)
        self.conv3a = conv3x3(dim_out // 2, dim_out)
        self.bn3a = affine(dim_out)
        self.conv3b = conv3x3(dim_out, dim_out)
        self.bn3b = affine(dim_out)
        self.conv4 = conv3x3(dim_out * 3, dim_out)
        self.bn4 = affine(dim_out)
        self.relu = torch.nn.ReLU(inplace=True)
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out_1x1 = self.bn1(out)
        out_1x1 = self.relu(out_1x1)
        out = self.conv2(out_1x1)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3a(out)
        out_3x3_a = self.bn3a(out)
        out_3x3_a = self.relu(out_3x3_a)
        out = self.conv3b(out_1x1)
        out_3x3_b = self.bn3b(out)
        out_3x3_b = self.relu(out_3x3_b)
        out = torch.cat([out_1x1, out_3x3_a, out_3x3_b], dim=1)
        out = self.conv4(out)
        out = self.bn4(out)
        out += residual
        out = self.relu(out)
        return out
 class AirNet(torch.nn.Module):
    def __init__(self, blocks, num_stages):
        super(AirNet, self).__init__()
        self.dim_in, filters = 64, [64, 128, 256, 384]
        self.feature_dims = [None, None] + \
                            filters[1:num_stages - 1]
        self.conv1 = torch.nn.Conv2d(
            3, 64,
            kernel_size=7,
            stride=2,
            padding=3,
            bias=False,
        )
        self.bn1 = affine(self.dim_in)
        self.relu = torch.nn.ReLU(inplace=True)
        self.maxpool = torch.nn.MaxPool2d(
            kernel_size=2,
            stride=2,
            padding=0,
            ceil_mode=True,
        )
        self.layer1 = self.make_blocks(filters[0], blocks[0])
        self.layer2 = self.make_blocks(filters[1], blocks[1], 2)
        if num_stages >= 4:
            self.layer3 = self.make_blocks(filters[2], blocks[2], 2)
        if num_stages >= 5:
            self.layer4 = self.make_blocks(filters[3], blocks[3], 2)
        self.reset_parameters()
    def reset_parameters(self):
        # The Kaiming Initialization
        for m in self.modules():
            if isinstance(m, torch.nn.Conv2d):
                torch.nn.init.kaiming_uniform_(
                    m.weight,
                    # Fix the gain for [-127, 127]
                    a=1,
                )  # Xavier Initialization
    def make_blocks(self, dim_out, blocks, stride=1):
        downsample = torch.nn.Sequential(
            conv1x1(self.dim_in, dim_out, stride=stride),
            affine(dim_out),
        )
        layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)]
        self.dim_in = dim_out
        for i in range(1, len(blocks)):
            if blocks[i] == 'r':
                layers.append(WideResBlock(dim_out, dim_out))
            elif blocks[i] == 'i':
                layers.append(InceptionBlock(dim_out, dim_out))
            else:
                raise ValueError('Unknown block flag: ' + blocks[i])
        return torch.nn.Sequential(*layers)
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        outputs = [None, None, self.layer2(x)]
        if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])]
        if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])]
        return outputs
 def airnet(num_stages):
    blocks = (
        ('r', 'r'),  # conv2
        ('r', 'i'),  # conv3
        ('r', 'i'),  # conv4
        ('r', 'i'),  # conv5
    )
    return AirNet(blocks, num_stages)
 def make_airnet_(): return airnet(5)
 def make_airnet_3b(): return airnet(3)
 def make_airnet_4b(): return airnet(4)
 def make_airnet_5b(): return airnet(5)
--- a/lib/modeling/base.py
+++ b/lib/modeling/base.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 """Define some basic structures."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import dragon.vm.torch as torch
 def affine(dim_in, inplace=True):
    """AffineBN, weight and bias are fixed."""
    return torch.nn.Affine(
        dim_in,
        fix_weight=True,
        fix_bias=True,
        inplace=inplace,
    )
 def bn(dim_in, eps=1e-5):
    """The BatchNorm."""
    return torch.nn.BatchNorm2d(dim_in, eps=eps)
 def conv1x1(dim_in, dim_out, stride=1, bias=False):
    """1x1 convolution."""
    return torch.nn.Conv2d(
        dim_in,
        dim_out,
        kernel_size=1,
        stride=stride,
        bias=bias,
    )
 def conv3x3(dim_in, dim_out, stride=1, bias=False):
    """3x3 convolution with padding."""
    return torch.nn.Conv2d(
        dim_in,
        dim_out,
        kernel_size=3,
        stride=stride,
        padding=1,
        bias=bias,
    )
--- a/lib/modeling/detector.py
+++ b/lib/modeling/detector.py
@@ -35,11 +35,13 @@ class Detector(torch.nn.Module):
    ``lib.core.config`` for their hyper-parameters.
    """
    def __init__(self):
        super(Detector, self).__init__()
        model = cfg.MODEL.TYPE
        backbone = cfg.MODEL.BACKBONE.lower().split('.')
        body, modules = backbone[0], backbone[1:]
+        self.recorder = None
        # + Data Loader
        self.data_layer = importlib.import_module(
@@ -92,9 +94,14 @@ class Detector(torch.nn.Module):
        Parameters
        ----------
-        inputs : dict or None
+        inputs : dict, optional
            The inputs.
+        Returns
+        -------
+        dict
+            The outputs.
        """
        # 0. Get the inputs
        if inputs is None:
@@ -161,7 +168,6 @@ class Detector(torch.nn.Module):
        """Optimize the graph for the inference.
        It usually involves the removing of BN or Affine.
        """
        ##################################
        #  Merge Affine into Convolution #

--- a/lib/modeling/factory.py
+++ b/lib/modeling/factory.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import collections
 import importlib
 _STORE = collections.defaultdict(dict)
 ###########################################
 #                                         #
 #                 Body                    #
 #                                         #
 ###########################################
 # ResNet
 for D in [18, 34, 50, 101, 152, 200, 269]:
    _STORE['BODY']['resnet{}'.format(D)] = \
        'lib.modeling.resnet.make_resnet_{}'.format(D)
 # VGG
 for D in [16, 19]:
    for T in ['', '_reduced_300', '_reduced_512']:
        _STORE['BODY']['vgg{}{}'.format(D, T)] = \
            'lib.modeling.vgg.make_vgg_{}{}'.format(D, T)
 # AirNet
 for D in ['', '3b', '4b', '5b']:
    _STORE['BODY']['airnet{}'.format(D)] = \
        'lib.modeling.airnet.make_airnet_{}'.format(D)
 def get_template_func(name, sets, desc):
    name = name.lower()
    if name not in sets:
        raise ValueError(
            'The {} for {} was not registered.\n'
            'Registered modules: [{}]'.format(
                name, desc, ', '.join(sets.keys())))
    module_name = '.'.join(sets[name].split('.')[0:-1])
    func_name = sets[name].split('.')[-1]
    try:
        module = importlib.import_module(module_name)
        return getattr(module, func_name)
    except ImportError as e:
        raise ValueError('Can not import module from: ' + module_name)
 def get_body_func(name):
    return get_template_func(
        name, _STORE['BODY'], 'Body')
--- a/lib/modeling/fast_rcnn.py
+++ b/lib/modeling/fast_rcnn.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import collections
 import dragon.vm.torch as torch
 from lib.core.config import cfg
 from lib.ops.modules import RPNDecoder
 class FastRCNN(torch.nn.Module):
    """Generate proposal regions for R-CNN series.
    The pipeline is as follows:
    ... ->   RoIs   \                          /-> cls_score -> cls_loss
                     -> RoIFeatureXform -> MLP
    ... -> Features /                          \-> bbox_pred -> bbox_loss
    """
    def __init__(self, dim_in=256):
        super(FastRCNN, self).__init__()
        if len(cfg.RPN.STRIDES) > 1:
            # RPN with multiple strides(i.e. FPN)
            from lib.fpn import ProposalLayer, ProposalTargetLayer
        else:
            # RPN with single stride(i.e. C4)
            from lib.faster_rcnn import ProposalLayer, ProposalTargetLayer
        self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
        self.fc6 = torch.nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
        self.fc7 = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
        self.cls_score = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
        self.bbox_pred = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
        self.rpn_decoder = RPNDecoder()
        self.proposal_layer = ProposalLayer()
        self.proposal_target_layer = ProposalTargetLayer()
        self.softmax = torch.nn.Softmax(dim=1)
        self.relu = torch.nn.ReLU(inplace=True)
        self.sigmoid = torch.nn.Sigmoid(inplace=False)
        self.roi_func = {
            'RoIPool': torch.vision.ops.roi_pool,
            'RoIAlign': torch.vision.ops.roi_align,
        }[cfg.FRCNN.ROI_XFORM_METHOD]
        self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
-        self.bbox_loss = torch.nn.SmoothL1Loss(beta=1., reduction='batch_size')
+        self.bbox_loss = torch.nn.SmoothL1Loss(reduction='batch_size')
        # Compute spatial scales for multiple strides
        roi_levels = [level for level in range(
            cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)]
        self.spatial_scales = [1.0 / (2 ** level) for level in roi_levels]
        self.reset_parameters()
    def reset_parameters(self):
        # Careful initialization for Fast R-CNN
        torch.nn.init.normal_(self.cls_score.weight, std=0.01)
        torch.nn.init.normal_(self.bbox_pred.weight, std=0.001)
        for name, p in self.named_parameters():
            if 'bias' in name:
                torch.nn.init.constant_(p, 0)
    def RoIFeatureTransform(self, feature, rois, spatial_scale):
        return self.roi_func(
            feature, rois,
            output_size=(
                cfg.FRCNN.ROI_XFORM_RESOLUTION,
                cfg.FRCNN.ROI_XFORM_RESOLUTION,
            ),
            spatial_scale=spatial_scale,
        )
    def forward(self, **kwargs):
        # Generate Proposals
        # Apply the CXX implementation during inference
        proposal_func = self.proposal_layer \
            if self.training else self.rpn_decoder
        self.rcnn_data = {
            'rois': proposal_func(
                kwargs['features'],
                self.sigmoid(kwargs['rpn_cls_score'].data),
                kwargs['rpn_bbox_pred'],
                kwargs['ims_info'],
            )
        }
        # Generate Targets from Proposals
        if self.training:
            self.rcnn_data.update(
                self.proposal_target_layer(
                    rpn_rois=self.rcnn_data['rois'],
                    gt_boxes=kwargs['gt_boxes'],
                )
            )
        # Transform RoI Feature
        roi_features = []
        if len(self.rcnn_data['rois']) > 1:
            for i, spatial_scale in enumerate(self.spatial_scales):
                roi_features.append(
                    self.RoIFeatureTransform(
                        kwargs['features'][i],
                        self.rcnn_data['rois'][i],
                        spatial_scale,
                    )
                )
            roi_features = torch.cat(roi_features, dim=0)
        else:
            spatial_scale = 1.0 / cfg.RPN.STRIDES[0]
            roi_features = \
                self.RoIFeatureTransform(
                    kwargs['features'][0],
                    self.rcnn_data['rois'][0],
                    spatial_scale,
                )
        # Apply a simple MLP
        roi_features = roi_features.view(-1, self.roi_head_dim)
        rcnn_output = self.relu(self.fc6(roi_features))
        rcnn_output = self.relu(self.fc7(rcnn_output))
        # Compute rcnn logits
        cls_score = self.cls_score(rcnn_output).float()
-        outputs = collections.OrderedDict({
+        outputs = collections.OrderedDict([
-            'bbox_pred':
+            ('bbox_pred', self.bbox_pred(rcnn_output).float()),
-                self.bbox_pred(rcnn_output).float(),
+        ])
-        })
+        if self.training:
-        if self.training:
+            # Compute rcnn losses
-            # Compute rcnn losses
+            outputs.update(collections.OrderedDict([
-            outputs.update(collections.OrderedDict({
+                ('cls_loss', self.cls_loss(
-                'cls_loss': self.cls_loss(
+                    cls_score, self.rcnn_data['labels'])),
-                    cls_score,
+                ('bbox_loss', self.bbox_loss(
-                    self.rcnn_data['labels'],
+                    outputs['bbox_pred'],
-                ),
+                    self.rcnn_data['bbox_targets'],
-                'bbox_loss': self.bbox_loss(
+                    self.rcnn_data['bbox_inside_weights'],
-                    outputs['bbox_pred'],
+                    self.rcnn_data['bbox_outside_weights'],
-                    self.rcnn_data['bbox_targets'],
+                )),
-                    self.rcnn_data['bbox_inside_weights'],
+            ]))
-                    self.rcnn_data['bbox_outside_weights'],
+        else:
-                ),
+            # Return the rois to decode the refine boxes
-            }))
+            if len(self.rcnn_data['rois']) > 1:
-        else:
+                outputs['rois'] = torch.cat(
-            # Return the rois to decode the refine boxes
+                    self.rcnn_data['rois'], dim=0)
-            if len(self.rcnn_data['rois']) > 1:
+            else:
-                outputs['rois'] = torch.cat(
+                outputs['rois'] = self.rcnn_data['rois'][0]
-                    self.rcnn_data['rois'], dim=0)
+            # Return the classification prob
-            else:
+            outputs['cls_prob'] = self.softmax(cls_score)
-                outputs['rois'] = self.rcnn_data['rois'][0]
-            # Return the classification prob
+        return outputs
-            outputs['cls_prob'] = self.softmax(cls_score)
-        return outputs
--- a/lib/modeling/fpn.py
+++ b/lib/modeling/fpn.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import dragon.vm.torch as torch
 from lib.core.config import cfg
 from lib.modeling import conv1x1
 from lib.modeling import conv3x3
 HIGHEST_BACKBONE_LVL = 5  # E.g., "conv5"-like level
 class FPN(torch.nn.Module):
    """Feature Pyramid Networks for R-CNN and RetinaNet."""
    def __init__(self, feature_dims):
        super(FPN, self).__init__()
        self.C = torch.nn.ModuleList()
        self.P = torch.nn.ModuleList()
        self.apply_func = self.apply_on_rcnn
        for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1):
            self.C.append(conv1x1(feature_dims[lvl - 1], cfg.FPN.DIM, bias=True))
            self.P.append(conv3x3(cfg.FPN.DIM, cfg.FPN.DIM, bias=True))
        if 'retinanet' in cfg.MODEL.TYPE:
            for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
                dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else cfg.FPN.DIM
                self.P.append(conv3x3(dim_in, cfg.FPN.DIM, stride=2, bias=True))
            self.apply_func = self.apply_on_retinanet
        self.relu = torch.nn.ReLU(inplace=False)
        self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True)
        self.reset_parameters()
        self.feature_dims = [cfg.FPN.DIM]
    def reset_parameters(self):
        for m in self.modules():
            if isinstance(m, torch.nn.Conv2d):
                torch.nn.init.kaiming_uniform_(
                    m.weight,
                    a=1,  # Fix the gain for [-127, 127]
                )  # Xavier Initialization
                torch.nn.init.constant_(m.bias, 0)
    def apply_on_rcnn(self, features):
        fpn_input = self.C[-1](features[-1])
        min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
        outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
        # Apply MaxPool for higher features
        for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
            outputs.append(self.maxpool(outputs[-1]))
        # Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
        for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
            lateral_output = self.C[i - min_lvl](features[i - 1])
            upscale_output = torch.vision.ops.nn_resize(
                fpn_input, dsize=lateral_output.shape[-2:])
            fpn_input = lateral_output.__iadd__(upscale_output)
            outputs.insert(0, self.P[i - min_lvl](fpn_input))
        return outputs
    def apply_on_retinanet(self, features):
        fpn_input = self.C[-1](features[-1])
        min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
-        outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)]
+        outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
        # Add extra convolutions for higher features
        extra_input = features[-1]
        for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
            outputs.append(self.P[i - min_lvl](extra_input))
            if i != max_lvl:
                extra_input = self.relu(outputs[-1])
        # Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
        for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
            lateral_output = self.C[i - min_lvl](features[i - 1])
            upscale_output = torch.vision.ops.nn_resize(
                fpn_input, dsize=lateral_output.shape[-2:])
            fpn_input = lateral_output.__iadd__(upscale_output)
            outputs.insert(0, self.P[i - min_lvl](fpn_input))
        return outputs
    def forward(self, features):
        return self.apply_func(features)
--- a/lib/modeling/resnet.py
+++ b/lib/modeling/resnet.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # Codes are based on:
 #
 #      <https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import dragon.vm.torch as torch
 from lib.core.config import cfg
 from lib.modeling import affine
 from lib.modeling import conv1x1
 from lib.modeling import conv3x3
 class BasicBlock(torch.nn.Module):
    def __init__(
        self,
        dim_in,
        dim_out,
        stride=1,
        downsample=None,
        dropblock=None,
    ):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(dim_in, dim_out, stride)
        self.bn1 = affine(dim_out)
        self.relu = torch.nn.ReLU(inplace=True)
        self.conv2 = conv3x3(dim_out, dim_out)
        self.bn2 = affine(dim_out)
        self.downsample = downsample
        self.dropblock = dropblock
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        if self.dropblock is not None:
            out = self.dropblock(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.dropblock is not None:
            residual = self.dropblock(residual)
        if self.downsample is not None:
            residual = self.downsample(residual)
        out += residual
        out = self.relu(out)
        return out
 class Bottleneck(torch.nn.Module):
    # 1x64d => 0.25 (ResNet)
    # 32x8d, 64x4d => 1.0 (ResNeXt)
    contraction = cfg.RESNET.NUM_GROUPS \
        * cfg.RESNET.GROUP_WIDTH / 256.0
    def __init__(
        self,
        dim_in,
        dim_out,
        stride=1,
        downsample=None,
        dropblock=None,
    ):
        super(Bottleneck, self).__init__()
        dim = int(dim_out * self.contraction)
        self.conv1 = conv1x1(dim_in, dim)
        self.bn1 = affine(dim)
        self.conv2 = conv3x3(dim, dim, stride=stride)
        self.bn2 = affine(dim)
        self.conv3 = conv1x1(dim, dim_out)
        self.bn3 = affine(dim_out)
        self.relu = torch.nn.ReLU(inplace=True)
        self.downsample = downsample
        self.dropblock = dropblock
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        if self.dropblock is not None:
            out = self.dropblock(out)
        out = self.conv3(out)
        out = self.bn3(out)
        if self.dropblock is not None:
            residual = self.dropblock(residual)
        if self.downsample is not None:
            residual = self.downsample(residual)
        out += residual
        out = self.relu(out)
        return out
 class ResNet(torch.nn.Module):
    def __init__(self, block, layers, filters):
        super(ResNet, self).__init__()
        self.dim_in, filters = filters[0], filters[1:]
        self.feature_dims = [self.dim_in] + filters
        self.conv1 = torch.nn.Conv2d(
            3, 64,
            kernel_size=7,
            stride=2,
            padding=3,
            bias=False,
        )
        self.bn1 = affine(self.dim_in)
        self.relu = torch.nn.ReLU(inplace=True)
        self.maxpool = torch.nn.MaxPool2d(
            kernel_size=3,
            stride=2,
            padding=0,
            ceil_mode=True,
        )
        self.drop3 = torch.nn.DropBlock2d(
            kp=0.9,
            block_size=7,
            alpha=0.25,
            decrement=cfg.DROPBLOCK.DECREMENT
        ) if cfg.DROPBLOCK.DROP_ON else None
        self.drop4 = torch.nn.DropBlock2d(
            kp=0.9,
            block_size=7,
            alpha=1.00,
            decrement=cfg.DROPBLOCK.DECREMENT
        ) if cfg.DROPBLOCK.DROP_ON else None
        self.layer1 = self.make_blocks(block, filters[0], layers[0])
        self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
        self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3)
        self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, self.drop4)
        self.reset_parameters()
    def reset_parameters(self):
        # The Kaiming Initialization
        for m in self.modules():
            if isinstance(m, torch.nn.Conv2d):
                torch.nn.init.kaiming_normal_(
                    m.weight,
                    nonlinearity='relu',
                )
        # Stop the gradients if necessary
        def freeze_func(m):
            if isinstance(m, torch.nn.Conv2d):
                m.weight.requires_grad = False
                m._buffers['weight'] = m.weight
                del m._parameters['weight']
        if cfg.MODEL.FREEZE_AT > 0:
            self.conv1.apply(freeze_func)
        for i in range(cfg.MODEL.FREEZE_AT, 1, -1):
            getattr(self, 'layer{}'.format(i - 1)).apply(freeze_func)
    def make_blocks(self, block, dim_out, blocks, stride=1, dropblock=None):
        downsample = None
        if stride != 1 or self.dim_in != dim_out:
            downsample = torch.nn.Sequential(
                conv1x1(self.dim_in, dim_out, stride=stride),
                affine(dim_out),
            )
        layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)]
        self.dim_in = dim_out
        for i in range(1, blocks):
            layers.append(block(dim_out, dim_out, dropblock=dropblock))
        return torch.nn.Sequential(*layers)
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        outputs = [x]
        outputs += [self.layer1(outputs[-1])]
        outputs += [self.layer2(outputs[-1])]
        outputs += [self.layer3(outputs[-1])]
        outputs += [self.layer4(outputs[-1])]
        return outputs
 def resnet(depth):
    if depth == 18:
        units = [2, 2, 2, 2]
    elif depth == 34:
        units = [3, 4, 6, 3]
    elif depth == 50:
        units = [3, 4, 6, 3]
    elif depth == 101:
        units = [3, 4, 23, 3]
    elif depth == 152:
        units = [3, 8, 36, 3]
    elif depth == 200:
        units = [3, 24, 36, 3]
    elif depth == 269:
        units = [3, 30, 48, 8]
    else:
        raise ValueError('Unsupported depth: %d' % depth)
    block = Bottleneck if depth >= 50 else BasicBlock
    filters = [64, 256, 512, 1024, 2048] \
        if depth >= 50 else [64, 64, 128, 256, 512]
    return ResNet(block, units, filters)
 def make_resnet_18(): return resnet(18)
 def make_resnet_34(): return resnet(34)
 def make_resnet_50(): return resnet(50)
 def make_resnet_101(): return resnet(101)
 def make_resnet_152(): return resnet(152)
--- a/lib/modeling/retinanet.py
+++ b/lib/modeling/retinanet.py
@@ -59,8 +59,7 @@ class RetinaNet(torch.nn.Module):
            gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
        )
        self.bbox_loss = torch.nn.SmoothL1Loss(
-            beta=1. / 9., reduction='batch_size',
+            beta=.11, reduction='batch_size')
-        )
        self.reset_parameters()
    def reset_parameters(self):
@@ -133,26 +132,22 @@ class RetinaNet(torch.nn.Module):
                gt_boxes=gt_boxes,
                ims_info=ims_info,
            )
-        return collections.OrderedDict({
+        return collections.OrderedDict([
-            'cls_loss':
+            ('cls_loss', self.cls_loss(
-                self.cls_loss(
+                cls_score, self.retinanet_data['labels'])),
-                    cls_score,
+            ('bbox_loss', self.bbox_loss(
-                    self.retinanet_data['labels'],
+                bbox_pred,
-                ),
+                self.retinanet_data['bbox_targets'],
-            'bbox_loss':
+                self.retinanet_data['bbox_inside_weights'],
-                self.bbox_loss(
+                self.retinanet_data['bbox_outside_weights'],
-                    bbox_pred,
+            )),
-                    self.retinanet_data['bbox_targets'],
+        ])
-                    self.retinanet_data['bbox_inside_weights'],
-                    self.retinanet_data['bbox_outside_weights'],
-                )
-        })
    def forward(self, *args, **kwargs):
        cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
        cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
-        outputs = collections.OrderedDict({'bbox_pred': bbox_pred})
+        outputs = collections.OrderedDict([('bbox_pred', bbox_pred)])
        if self.training:
            outputs.update(

--- a/lib/modeling/rpn.py
+++ b/lib/modeling/rpn.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import collections
 import dragon.vm.torch as torch
 from lib.core.config import cfg
 from lib.modeling import conv1x1
 from lib.modeling import conv3x3
 class RPN(torch.nn.Module):
    """Region Proposal Networks for R-CNN series."""
    def __init__(self, dim_in=256):
        super(RPN, self).__init__()
        ##################################
        #           RPN outputs          #
        ##################################
        num_anchors = len(cfg.RPN.ASPECT_RATIOS) * (
            len(cfg.RPN.SCALES) if len(cfg.RPN.STRIDES) == 1 else 1)
        self.output = conv3x3(dim_in, dim_in, bias=True)
        self.cls_score = conv1x1(dim_in, num_anchors, bias=True)
        self.bbox_pred = conv1x1(dim_in, num_anchors * 4, bias=True)
        self.relu = torch.nn.ReLU(inplace=True)
        ##################################
        #            RPN losses          #
        ##################################
        if len(cfg.RPN.STRIDES) > 1:
            # RPN with multiple strides(i.e. FPN)
-            from lib.fpn.layers.anchor_target_layer import AnchorTargetLayer
+            from lib.fpn.anchor_target_layer import AnchorTargetLayer
        else:
            # RPN with single stride(i.e. C4)
-            from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer
+            from lib.faster_rcnn.anchor_target_layer import AnchorTargetLayer
        self.anchor_target_layer = AnchorTargetLayer()
        self.cls_loss = torch.nn.BCEWithLogitsLoss()
-        self.bbox_loss = torch.nn.SmoothL1Loss(beta=1. / 9.)
+        self.bbox_loss = torch.nn.SmoothL1Loss(
-        self.reset_parameters()
+            beta=.11, reduction='batch_size')
+        self.reset_parameters()
-    def reset_parameters(self):
-        # Initialization for the RPN
+    def reset_parameters(self):
-        # Weight ~ Normal(0, 0.01)
+        # Initialization for the RPN
-        for m in self.modules():
+        # Weight ~ Normal(0, 0.01)
-            if isinstance(m, torch.nn.Conv2d):
+        for m in self.modules():
-                torch.nn.init.normal_(m.weight, std=0.01)
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.init.normal_(m.weight, std=0.01)
-    def compute_outputs(self, features):
-        """Compute the RPN logits.
+    def compute_outputs(self, features):
+        """Compute the RPN logits.
-        Parameters
-        ----------
+        Parameters
-        features : sequence of dragon.vm.torch.Tensor
+        ----------
-            The features of specific conv layers.
+        features : sequence of dragon.vm.torch.Tensor
+            The features of specific conv layers.
-        """
-        # Compute rpn logits
+        """
-        cls_score_wide,  bbox_pred_wide = [], []
+        # Compute rpn logits
-        for feature in features:
+        cls_score_wide,  bbox_pred_wide = [], []
-            x = self.relu(self.output(feature))
+        for feature in features:
-            if len(features) > 1:
+            x = self.relu(self.output(feature))
-                cls_score = self.cls_score(x).view(0, -1)
+            if len(features) > 1:
-                bbox_pred = self.bbox_pred(x).view(0, 4, -1)
+                cls_score = self.cls_score(x).view(0, -1)
-            else:
+                bbox_pred = self.bbox_pred(x).view(0, 4, -1)
-                cls_score = self.cls_score(x)
+            else:
-                bbox_pred = self.bbox_pred(x)
+                cls_score = self.cls_score(x)
-            cls_score_wide.append(cls_score)
+                bbox_pred = self.bbox_pred(x)
-            bbox_pred_wide.append(bbox_pred)
+            cls_score_wide.append(cls_score)
+            bbox_pred_wide.append(bbox_pred)
-        if len(features) > 1:
-            # Concat them if necessary
+        if len(features) > 1:
-            return torch.cat(cls_score_wide, dim=1), \
+            # Concat them if necessary
-                   torch.cat(bbox_pred_wide, dim=2)
+            return torch.cat(cls_score_wide, dim=1), \
-        else:
+                   torch.cat(bbox_pred_wide, dim=2)
-            return cls_score_wide[0], bbox_pred_wide[0]
+        else:
+            return cls_score_wide[0], bbox_pred_wide[0]
-    def compute_losses(
-        self,
+    def compute_losses(
-        features,
+        self,
-        cls_score,
+        features,
-        bbox_pred,
+        cls_score,
-        gt_boxes,
+        bbox_pred,
-        ims_info,
+        gt_boxes,
-    ):
+        ims_info,
-        """Compute the RPN classification loss and regression loss.
+    ):
+        """Compute the RPN classification loss and regression loss.
-        Parameters
-        ----------
+        Parameters
-        features : sequence of dragon.vm.torch.Tensor
+        ----------
-            The features of specific conv layers.
+        features : sequence of dragon.vm.torch.Tensor
-        cls_score : dragon.vm.torch.Tensor
+            The features of specific conv layers.
-            The (binary) classification logits.
+        cls_score : dragon.vm.torch.Tensor
-        bbox_pred : dragon.vm.torch.Tensor
+            The (binary) classification logits.
-            The bbox regression logits.
+        bbox_pred : dragon.vm.torch.Tensor
-        gt_boxes : numpy.ndarray
+            The bbox regression logits.
-            The packed ground-truth boxes.
+        gt_boxes : numpy.ndarray
-        ims_info : numpy.ndarray
+            The packed ground-truth boxes.
-            The information of input images.
+        ims_info : numpy.ndarray
+            The information of input images.
-        """
-        self.rpn_data = \
+        """
-            self.anchor_target_layer(
+        self.rpn_data = \
-                features=features,
+            self.anchor_target_layer(
-                gt_boxes=gt_boxes,
+                features=features,
-                ims_info=ims_info,
+                gt_boxes=gt_boxes,
-            )
+                ims_info=ims_info,
-        return collections.OrderedDict({
+            )
-            'rpn_cls_loss':
+        return collections.OrderedDict([
-                self.cls_loss(cls_score, self.rpn_data['labels']),
+            ('rpn_cls_loss', self.cls_loss(
-            'rpn_bbox_loss':
+                cls_score, self.rpn_data['labels'])),
-                self.bbox_loss(
+            ('rpn_bbox_loss', self.bbox_loss(
-                    bbox_pred,
+                bbox_pred,
-                    self.rpn_data['bbox_targets'],
+                self.rpn_data['bbox_targets'],
-                    self.rpn_data['bbox_inside_weights'],
+                self.rpn_data['bbox_inside_weights'],
-                    self.rpn_data['bbox_outside_weights'],
+                self.rpn_data['bbox_outside_weights'],
-                )
+            )),
-        })
+        ])
    def forward(self, *args, **kwargs):
        cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
        cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
-        outputs = collections.OrderedDict({
+        outputs = collections.OrderedDict([
-            'rpn_cls_score': cls_score,
+            ('rpn_cls_score', cls_score),
-            'rpn_bbox_pred': bbox_pred,
+            ('rpn_bbox_pred', bbox_pred),
-        })
+        ])
        if self.training:
            outputs.update(
                self.compute_losses(
                    kwargs['features'],
                    cls_score,
                    bbox_pred,
                    kwargs['gt_boxes'],
                    kwargs['ims_info'],
                )
            )
        return outputs
--- a/lib/modeling/ssd.py
+++ b/lib/modeling/ssd.py
@@ -136,32 +136,29 @@ class SSD(torch.nn.Module):
                gt_boxes=gt_boxes,
            )
        )
-        return collections.OrderedDict({
+        return collections.OrderedDict([
            # A compensating factor of 4.0 is used
            # As we normalize both the pos and neg samples
-            'cls_loss':
+            ('cls_loss', self.cls_loss(
-                self.cls_loss(
+                cls_score.view(-1, cfg.MODEL.NUM_CLASSES),
-                    cls_score.view(-1, cfg.MODEL.NUM_CLASSES),
+                self.ssd_data['labels']) * 4.),
-                    self.ssd_data['labels']
+            ('bbox_loss', self.bbox_loss(
-                ) * 4.,
+                bbox_pred,
-            'bbox_loss':
+                self.ssd_data['bbox_targets'],
-                self.bbox_loss(
+                self.ssd_data['bbox_inside_weights'],
-                    bbox_pred,
+                self.ssd_data['bbox_outside_weights'],
-                    self.ssd_data['bbox_targets'],
+            )),
-                    self.ssd_data['bbox_inside_weights'],
+        ])
-                    self.ssd_data['bbox_outside_weights'],
-                )
-        })
    def forward(self, *args, **kwargs):
        prior_boxes = self.prior_box_layer(kwargs['features'])
        cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
        cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
-        outputs = collections.OrderedDict({
+        outputs = collections.OrderedDict([
-            'prior_boxes': prior_boxes,
+            ('bbox_pred', bbox_pred),
-            'bbox_pred': bbox_pred,
+            ('prior_boxes', prior_boxes),
-        })
+        ])
        if self.training:
            outputs.update(

--- a/lib/nms/nms_wrapper.py
+++ b/lib/nms/nms_wrapper.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # Codes are based on:
 #
 #      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/nms_wrapper.py>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from lib.core.config import cfg
 from lib.utils import logger
 try:
    from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms
 except ImportError as e:
    print('Failed to import cpu nms. Error: {0}'.format(str(e)))
 try:
    from lib.nms.gpu_nms import gpu_nms
 except ImportError as e:
    print('Failed to import gpu nms. Error: {0}'.format(str(e)))
 def nms(detections, thresh, force_cpu=False):
    """Perform either CPU or GPU Hard-NMS."""
    if detections.shape[0] == 0:
        return []
    if cfg.USE_GPU_NMS and not force_cpu:
        return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
    else:
        return cpu_nms(detections, thresh)
 def soft_nms(
    detections,
    thresh,
    method='linear',
    sigma=0.5,
    score_thresh=0.001,
 ):
    """Perform CPU Soft-NMS."""
    if detections.shape[0] == 0:
        return []
    methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
    if method not in methods:
        logger.fatal('Unknown soft nms method: {}'.format(method))
    return cpu_soft_nms(
        detections,
        thresh,
        methods[method],
        sigma,
        score_thresh,
    )
--- a/lib/proto/__init__.py
+++ b/lib/proto/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
\ No newline at end of file
--- a/lib/proto/anno.proto
+++ b/lib/proto/anno.proto
-syntax = "proto2";
-message Datum {
-  optional int32 channels = 1;
-  optional int32 height = 2;
-  optional int32 width = 3;
-  optional bytes data = 4;
-  optional int32 label = 5;
-  repeated float float_data = 6;
-  optional bool encoded = 7 [default = false];
-  repeated int32 labels = 8;
-}
-message Annotation {
-  optional float x1 = 1;
-  optional float y1 = 2;
-  optional float x2 = 3;
-  optional float y2 = 4;
-  optional string name = 5;
-  optional bool difficult = 6 [default = false];
-  optional string mask = 7;
-}
-message AnnotatedDatum {
-  optional Datum datum = 1;
-  optional string filename = 2;
-  repeated Annotation annotation = 3;
-}
--- a/lib/pycocotools/.gitignore
+++ b/lib/pycocotools/.gitignore
+_mask.c
--- a/lib/retinanet/__init__.py
+++ b/lib/retinanet/__init__.py
@@ -13,5 +13,5 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-from lib.faster_rcnn.layers.data_layer import DataLayer
+from lib.faster_rcnn.data_layer import DataLayer
-from lib.retinanet.layers.anchor_target_layer import AnchorTargetLayer
+from lib.retinanet.anchor_target_layer import AnchorTargetLayer
--- a/lib/retinanet/layers/anchor_target_layer.py
+++ b/lib/retinanet/layers/anchor_target_layer.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import dragon.vm.torch as torch
 import numpy as np
 from lib.core.config import cfg
 from lib.faster_rcnn.generate_anchors import generate_anchors_v2
 from lib.utils import logger
 from lib.utils.blob import blob_to_tensor
 from lib.utils.boxes import bbox_transform
 from lib.utils.boxes import dismantle_gt_boxes
 from lib.utils.cython_bbox import bbox_overlaps
 class AnchorTargetLayer(torch.nn.Module):
    """Assign anchors to ground-truth targets."""
    def __init__(self):
        super(AnchorTargetLayer, self).__init__()
        # Load the basic configs
        k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
        scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
        anchor_scale = cfg.RETINANET.ANCHOR_SCALE
        self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
        self.ratios = cfg.RETINANET.ASPECT_RATIOS
        # Generate base anchors
        self.base_anchors = []
        for stride in self.strides:
            sizes = [stride * anchor_scale *
                     (2 ** (octave / float(scales_per_octave)))
                     for octave in range(scales_per_octave)]
            self.base_anchors.append(
                generate_anchors_v2(
                    stride=stride,
                    ratios=self.ratios,
                    sizes=sizes,
                ))
    def forward(self, features, gt_boxes, ims_info):
        """Produces anchor classification labels and bounding-box regression targets."""
        num_images = cfg.TRAIN.IMS_PER_BATCH
        gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
        if len(gt_boxes_wide) != num_images:
            logger.fatal(
                'Input {} images, got {} slices of gt boxes.'
                .format(num_images, len(gt_boxes_wide))
            )
        # Generate proposals from shifted anchors
        all_anchors, total_anchors = [], 0
        for i in range(len(self.strides)):
            height, width = features[i].shape[-2:]
            shift_x = np.arange(0, width) * self.strides[i]
            shift_y = np.arange(0, height) * self.strides[i]
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()
            # Add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # Reshape to (K * A, 4) shifted anchors
            A = self.base_anchors[i].shape[0]
            K = shifts.shape[0]
            anchors = (self.base_anchors[i].reshape((1, A, 4)) +
                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
            # [K, A, 4] -> [A, K, 4]
            anchors = anchors.transpose((1, 0, 2))
            anchors = anchors.reshape((A * K, 4))
            all_anchors.append(anchors)
            total_anchors += anchors.shape[0]
        all_anchors = np.concatenate(all_anchors, axis=0)
        # label: 1 is positive, 0 is negative, -1 is don't care
        labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
        bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
        bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
        bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
        anchors = all_anchors
        inds_inside = np.arange(all_anchors.shape[0])
        num_inside = len(inds_inside)
        for ix in range(num_images):
            # GT boxes (x1, y1, x2, y2, label)
            gt_boxes = gt_boxes_wide[ix]
            # label: 1 is positive, 0 is negative, -1 is don't care
            labels = np.empty((num_inside,), dtype=np.float32)
            labels.fill(-1)
            # Overlaps between the anchors and the gt boxes
            overlaps = bbox_overlaps(
                np.ascontiguousarray(anchors, dtype=np.float),
                np.ascontiguousarray(gt_boxes, dtype=np.float),
            )
            argmax_overlaps = overlaps.argmax(axis=1)
            max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
            # fg label: for each gt, anchor with highest overlap
            gt_argmax_overlaps = overlaps.argmax(axis=0)
            gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
            gt_inds = argmax_overlaps[gt_argmax_overlaps]
            labels[gt_argmax_overlaps] = gt_boxes[gt_inds, 4]
            # fg label: above threshold IOU
            inds = max_overlaps >= cfg.RETINANET.POSITIVE_OVERLAP
            gt_inds = argmax_overlaps[inds]
            labels[inds] = gt_boxes[gt_inds, 4]
            fg_inds = np.where(labels > 0)[0]
            # bg label: below threshold IOU
            labels[max_overlaps < cfg.RETINANET.NEGATIVE_OVERLAP] = 0
            bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
            bbox_targets[fg_inds, :] = bbox_transform(
                anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], :4])
            bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
            bbox_inside_weights[fg_inds, :] = np.array((1., 1., 1., 1.))
            bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
            bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1)
            labels_wide[ix, inds_inside] = labels
            bbox_targets_wide[ix, inds_inside] = bbox_targets
            bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
            bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
        labels = labels_wide.reshape((num_images, total_anchors))
        bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
        bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
        bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
        return {
            'labels': blob_to_tensor(labels),
            'bbox_targets': blob_to_tensor(bbox_targets),
            'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
            'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
        }
--- a/lib/retinanet/layers/__init__.py
+++ b/lib/retinanet/layers/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
--- a/lib/retinanet/test.py
+++ b/lib/retinanet/test.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import dragon.vm.torch as torch
 import numpy as np
 from lib.core.config import cfg
 from lib.nms.nms_wrapper import nms
 from lib.nms.nms_wrapper import soft_nms
 from lib.utils.blob import im_list_to_blob
 from lib.utils.blob import tensor_to_blob
 from lib.utils.image import scale_image
 from lib.utils.timer import Timer
 from lib.utils.vis import vis_one_image
 def im_detect(detector, raw_image):
    """Detect a image, with single or multiple scales."""
    # Prepare images
    ims, ims_scale = scale_image(raw_image)
    # Prepare blobs
    blobs = {'data': im_list_to_blob(ims)}
    blobs['ims_info'] = np.array([
        list(blobs['data'].shape[1:3]) + [im_scale]
        for im_scale in ims_scale], dtype=np.float32,
    )
    blobs['data'] = torch.from_numpy(blobs['data'])
    # Do Forward
    with torch.no_grad():
        outputs = detector.forward(inputs=blobs)
    # Unpack results
    return tensor_to_blob(outputs['detections'])[:, 1:]
 def ims_detect(detector, raw_images):
    """Detect images, with single or multiple scales."""
    # Prepare images
    ims, ims_scale = scale_image(raw_images[0])
    num_scales = len(ims_scale)
    ims_shape = [im.shape for im in raw_images]
    for item_idx in range(1, len(raw_images)):
        ims_ext, ims_scale_ext = scale_image(raw_images[item_idx])
        ims += ims_ext
        ims_scale += ims_scale_ext
    # Prepare blobs
    blobs = {'data': im_list_to_blob(ims)}
    blobs['ims_info'] = np.array([
        list(blobs['data'].shape[1:3]) + [im_scale]
        for im_scale in ims_scale], dtype=np.float32,
    )
    blobs['data'] = torch.from_numpy(blobs['data'])
    # Do Forward
    with torch.no_grad():
        outputs = detector.forward(inputs=blobs)
    # Unpack results
    results = tensor_to_blob(outputs['detections'])
    detections_wide = [[] for _ in range(len(ims_shape))]
    for i in range(len(ims)):
        indices = np.where(results[:, 0].astype(np.int32) == i)[0]
        detections = results[indices, 1:]
        detections_wide[i // num_scales].append(detections)
    for i in range(len(ims_shape)):
        detections_wide[i] = np.vstack(detections_wide[i]) \
            if len(detections_wide[i]) > 1 else detections_wide[i][0]
    return detections_wide
 def test_net(net, server):
    # Load settings
    classes = server.classes
    num_images = server.num_images
    num_classes = server.num_classes
    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
    _t = {'im_detect': Timer(), 'misc': Timer()}
    for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
        # Collect raw images and ground-truths
        image_ids, raw_images = [], []
        for item_idx in range(cfg.TEST.IMS_PER_BATCH):
            if batch_idx + item_idx >= num_images: continue
            image_id, raw_image = server.get_image()
            image_ids.append(image_id)
            raw_images.append(raw_image)
        # Run detecting on specific scales
        _t['im_detect'].tic()
        if cfg.TEST.IMS_PER_BATCH > 1:
            results = ims_detect(net, raw_images)
        else:
            results = [im_detect(net, raw_images[0])]
        _t['im_detect'].toc()
        # Post-Processing
        _t['misc'].tic()
        for item_idx, detections in enumerate(results):
            i = batch_idx + item_idx
            boxes_this_image = [[]]
            # {x1, y1, x2, y2, score, cls}
            detections = np.array(detections)
            for j in range(1, num_classes):
                cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0]
                cls_boxes = detections[cls_indices, 0:4]
                cls_scores = detections[cls_indices, 4]
                cls_detections = np.hstack((
                    cls_boxes, cls_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                if cfg.TEST.USE_SOFT_NMS:
                    keep = soft_nms(
                        cls_detections,
                        cfg.TEST.NMS,
                        method=cfg.TEST.SOFT_NMS_METHOD,
                        sigma=cfg.TEST.SOFT_NMS_SIGMA,
                    )
                else:
                    keep = nms(
                        cls_detections,
                        cfg.TEST.NMS,
                        force_cpu=True,
                    )
                cls_detections = cls_detections[keep, :]
                all_boxes[j][i] = cls_detections
                boxes_this_image.append(cls_detections)
            if cfg.VIS or cfg.VIS_ON_FILE:
                vis_one_image(
                    raw_images[item_idx],
                    classes,
                    boxes_this_image,
                    thresh=cfg.VIS_TH,
                    box_alpha=1.,
                    show_class=True,
                    filename=server.get_save_filename(image_ids[item_idx]),
                )
            # Limit to max_per_image detections *over all classes*
            if cfg.TEST.DETECTIONS_PER_IM > 0:
                image_scores = []
                for j in range(1, num_classes):
                    if len(all_boxes[j][i]) < 1:
                        continue
                    image_scores.append(all_boxes[j][i][:, -1])
                if len(image_scores) > 0:
                    image_scores = np.hstack(image_scores)
                if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
                    image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
                    for j in range(1, num_classes):
                        keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                        all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()
        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
              .format(batch_idx + cfg.TEST.IMS_PER_BATCH,
-                      num_images, _t['im_detect'].average_time,
+                      num_images,
-                      _t['misc'].average_time), end='')
+                      _t['im_detect'].average_time,
+                      _t['misc'].average_time),
-    print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
+              end='')
-    print('Evaluating detections')
+    print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
-    server.evaluate_detections(all_boxes)
+    print('Evaluating detections')
+    server.evaluate_detections(all_boxes)
--- a/lib/ssd/__init__.py
+++ b/lib/ssd/__init__.py
@@ -13,8 +13,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-from lib.ssd.layers.data_layer import DataLayer
+from lib.ssd.data_layer import DataLayer
-from lib.ssd.layers.hard_mining_layer import HardMiningLayer
+from lib.ssd.hard_mining_layer import HardMiningLayer
-from lib.ssd.layers.multibox_layer import MultiBoxMatchLayer
+from lib.ssd.multibox_layer import MultiBoxMatchLayer
-from lib.ssd.layers.multibox_layer import MultiBoxTargetLayer
+from lib.ssd.multibox_layer import MultiBoxTargetLayer
-from lib.ssd.layers.priorbox_layer import PriorBoxLayer
+from lib.ssd.priorbox_layer import PriorBoxLayer
--- a/lib/ssd/data/cat.jpg
+++ b/lib/ssd/data/cat.jpg
--- a/lib/ssd/data/__init__.py
+++ b/lib/ssd/data/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
\ No newline at end of file
--- a/lib/ssd/data/blob_fetcher.py
+++ b/lib/ssd/data/blob_fetcher.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import multiprocessing
-import numpy as np
-from lib.core.config import cfg
-class BlobFetcher(multiprocessing.Process):
-    def __init__(self, **kwargs):
-        super(BlobFetcher, self).__init__()
-        self._img_blob_size = (
-            cfg.TRAIN.IMS_PER_BATCH,
-            cfg.SSD.RESIZE.HEIGHT,
-            cfg.SSD.RESIZE.WIDTH, 3,
-        )
-        self.q_in = self.q_out = None
-        self.daemon = True
-    def get(self):
-        img_blob, boxes_blob = np.zeros(self._img_blob_size, 'uint8'), []
-        for i in range(cfg.TRAIN.IMS_PER_BATCH):
-            img_blob[i], gt_boxes = self.q_in.get()
-            # Pack the boxes by adding the index of images
-            boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), np.float32)
-            boxes[:, :gt_boxes.shape[1]] = gt_boxes
-            boxes[:, -1] = i
-            boxes_blob.append(boxes)
-        return {
-            'data': img_blob,
-            'gt_boxes': np.concatenate(boxes_blob, 0),
-        }
-    def run(self):
-        while True:
-            self.q_out.put(self.get())
--- a/lib/ssd/data/data_batch.py
+++ b/lib/ssd/data/data_batch.py
@@ -13,54 +13,69 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-from multiprocessing import Queue
+import multiprocessing as mp
 import time
 import dragon
-import pprint
+import dragon.vm.torch as torch
+import numpy as np
-from lib.faster_rcnn.data.data_reader import DataReader
+from lib.core.config import cfg
-from lib.ssd.data.data_transformer import DataTransformer
+from lib.datasets.factory import get_imdb
-from lib.ssd.data.blob_fetcher import BlobFetcher
+from lib.ssd.data_transformer import DataTransformer
 from lib.utils import logger
-class DataBatch(object):
+class DataLayer(torch.nn.Module):
-    """DataBatch aims to prefetch data by ``Triple-Buffering``.
+    """Generate a mini-batch of data."""
-    It takes full advantages of the Process/Thread of Python,
+    def __init__(self):
+        super(DataLayer, self).__init__()
+        database = get_imdb(cfg.TRAIN.DATABASE)
+        self.data_batch = DataBatch(**{
+            'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
+            'classes': database.classes,
+            'shuffle': cfg.TRAIN.USE_SHUFFLE,
+            'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
+            'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
+        })
-    which provides remarkable I/O speed up for scalable distributed training.
+    def forward(self):
+        # Get an array blob from the Queue
+        outputs = self.data_batch.get()
+        # Zero-Copy the array to tensor
+        outputs['data'] = torch.from_numpy(outputs['data'])
+        return outputs
+class DataBatch(mp.Process):
+    """Prefetch the batch of data."""
-    """
    def __init__(self, **kwargs):
        """Construct a ``DataBatch``.
        Parameters
        ----------
-        source : str
+        dataset : lambda
-            The path of database.
+            The creator of a dataset.
        shuffle : bool, optional, default=False
            Whether to shuffle the data.
-        num_chunks : int, optional, default=2048
+        num_chunks : int, optional, default=0
            The number of chunks to split.
-        batch_size : int, optional, default=128
+        batch_size : int, optional, default=32
            The size of a mini-batch.
        prefetch : int, optional, default=5
            The prefetch count.
        """
        super(DataBatch, self).__init__()
-        # Init mpi
+        # Distributed settings
-        global_rank, local_rank, group_size = 0, 0, 1
+        rank, group_size = 0, 1
-        if dragon.mpi.is_init():
+        process_group = dragon.distributed.get_default_process_group()
-            group = dragon.mpi.is_parallel()
+        if process_group is not None and kwargs.get(
-            if group is not None:  # DataParallel
+                'phase', 'TRAIN') == 'TRAIN':
-                global_rank = dragon.mpi.rank()
+            group_size = process_group.size
-                group_size = len(group)
+            rank = dragon.distributed.get_rank(process_group)
-                for i, node in enumerate(group):
-                    if global_rank == node:
-                        local_rank = i
        kwargs['group_size'] = group_size
        # Configuration
@@ -77,63 +92,50 @@ class DataBatch(object):
        self._num_transformers = min(
            self._num_transformers, self._max_transformers)
-        # Init queues
+        # Initialize queues
-        self.Q1 = Queue(self._prefetch * self._num_readers * self._batch_size)
+        num_batches = self._prefetch * self._num_readers
-        self.Q2 = Queue(self._prefetch * self._num_readers * self._batch_size)
+        self.Q1 = mp.Queue(num_batches * self._batch_size)
-        self.Q3 = Queue(self._prefetch * self._num_readers)
+        self.Q2 = mp.Queue(num_batches * self._batch_size)
+        self.Q3 = mp.Queue(num_batches)
-        # Init readers
+        # Initialize readers
        self._readers = []
        for i in range(self._num_readers):
-            self._readers.append(DataReader(**kwargs))
-            self._readers[-1].q_out = self.Q1
-        for i in range(self._num_readers):
            part_idx, num_parts = i, self._num_readers
            num_parts *= group_size
-            part_idx += local_rank * self._num_readers
+            part_idx += rank * self._num_readers
-            self._readers[i]._num_parts = num_parts
+            self._readers.append(dragon.io.DataReader(
-            self._readers[i]._part_idx = part_idx
+                num_parts=num_parts, part_idx=part_idx, **kwargs))
-            self._readers[i]._rng_seed += part_idx
+            self._readers[i]._seed += part_idx
+            self._readers[i].q_out = self.Q1
            self._readers[i].start()
            time.sleep(0.1)
-        # Init transformers
+        # Initialize transformers
        self._transformers = []
        for i in range(self._num_transformers):
            transformer = DataTransformer(**kwargs)
-            transformer._rng_seed += (i + local_rank * self._num_transformers)
+            transformer._rng_seed += (i + rank * self._num_transformers)
-            transformer.q_in = self.Q1
+            transformer.q_in, transformer.q_out = self.Q1, self.Q2
-            transformer.q_out = self.Q2
            transformer.start()
            self._transformers.append(transformer)
            time.sleep(0.1)
-        # Init blob fetchers
+        # Initialize batch-producer
-        self._fetchers = []
+        self.start()
-        for i in range(self._num_fetchers):
-            fetcher = BlobFetcher(**kwargs)
-            fetcher.q_in = self.Q2
-            fetcher.q_out = self.Q3
-            fetcher.start()
-            self._fetchers.append(fetcher)
-            time.sleep(0.1)
-        # Prevent to echo multiple nodes
-        if local_rank == 0:
-            self.echo()
+        # Register cleanup callbacks
        def cleanup():
            def terminate(processes):
                for process in processes:
                    process.terminate()
                    process.join()
-            terminate(self._fetchers)
+            terminate([self])
-            logger.info('Terminating BlobFetcher ......')
+            logger.info('Terminate DataBatch.')
            terminate(self._transformers)
-            logger.info('Terminating DataTransformer ......')
+            logger.info('Terminate DataTransformer.')
            terminate(self._readers)
-            logger.info('Terminating DataReader......')
+            logger.info('Terminate DataReader.')
        import atexit
        atexit.register(cleanup)
@@ -149,14 +151,24 @@ class DataBatch(object):
        """
        return self.Q3.get()
-    def echo(self):
+    def run(self):
-        """Print I/O Information."""
+        """Start the process to produce batches."""
-        print('---------------------------------------------------------')
+        image_batch_shape = (
-        print('BatchFetcher({} Threads), Using config:'.format(
+            cfg.TRAIN.IMS_PER_BATCH,
-            self._num_readers + self._num_transformers + self._num_fetchers))
+            cfg.SSD.RESIZE.HEIGHT,
-        params = {'queue_size': self._prefetch,
+            cfg.SSD.RESIZE.WIDTH, 3,
-                  'n_readers': self._num_readers,
+        )
-                  'n_transformers': self._num_transformers,
-                  'n_fetchers': self._num_fetchers}
+        while True:
-        pprint.pprint(params)
+            boxes_to_pack = []
-        print('---------------------------------------------------------')
+            image_batch = np.zeros(image_batch_shape, 'uint8')
+            for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
+                image_batch[image_index], gt_boxes = self.Q2.get()
+                boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
+                boxes[:, :gt_boxes.shape[1]], boxes[:, -1] = gt_boxes, image_index
+                boxes_to_pack.append(boxes)
+            self.Q3.put({
+                'data': image_batch,
+                'gt_boxes': np.concatenate(boxes_to_pack),
+            })
--- a/lib/ssd/data/data_transformer.py
+++ b/lib/ssd/data/data_transformer.py
@@ -13,14 +13,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import cv2
 import multiprocessing
+import cv2
 import numpy as np
 from lib.core.config import cfg
-from lib.proto import anno_pb2 as pb
+from lib.ssd import transforms
-from lib.ssd.data import transforms
+from lib.utils.boxes import flip_boxes
-from lib.utils import logger
 class DataTransformer(multiprocessing.Process):
@@ -41,38 +41,41 @@ class DataTransformer(multiprocessing.Process):
        self.q_in = self.q_out = None
        self.daemon = True
-    def make_roi_dict(self, ann_datum, flip=False):
+    def make_roi_dict(self, example, flip=False):
-        annotations = ann_datum.annotation
        n_objects = 0
        if not self._use_diff:
-            for ann in annotations:
+            for obj in example['object']:
-                if not ann.difficult: n_objects += 1
+                if obj.get('difficult', 0) == 0:
-        else: n_objects = len(annotations)
+                    n_objects += 1
+        else:
+            n_objects = len(example['object'])
        roi_dict = {
-            'width': ann_datum.datum.width,
+            'width': example['width'],
-            'height': ann_datum.datum.height,
+            'height': example['height'],
-            'gt_classes': np.zeros((n_objects,), dtype=np.int32),
+            'gt_classes': np.zeros((n_objects,), 'int32'),
-            'boxes': np.zeros((n_objects, 4), dtype=np.float32),
+            'boxes': np.zeros((n_objects, 4), 'float32'),
-            'normalized_boxes': np.zeros((n_objects, 4), dtype=np.float32),
+            'normalized_boxes': np.zeros((n_objects, 4), 'float32'),
        }
-        rec_idx = 0
+        # Filter the difficult instances
-        for ann in annotations:
+        object_idx = 0
-            if not self._use_diff and ann.difficult:
+        for obj in example['object']:
+            if not self._use_diff and \
+                    obj.get('difficult', 0) > 0:
                continue
-            roi_dict['boxes'][rec_idx, :] = [
+            roi_dict['boxes'][object_idx, :] = [
-                max(0, ann.x1),
+                max(0, obj['xmin']),
-                max(0, ann.y1),
+                max(0, obj['ymin']),
-                min(ann.x2, ann_datum.datum.width - 1),
+                min(obj['xmax'], example['width'] - 1),
-                min(ann.y2, ann_datum.datum.height - 1),
+                min(obj['ymax'], example['height'] - 1),
            ]
-            roi_dict['gt_classes'][rec_idx] = \
+            roi_dict['gt_classes'][object_idx] = \
-                self._class_to_ind[ann.name]
+                self._class_to_ind[obj['name']]
-            rec_idx += 1
+            object_idx += 1
        if flip:
-            roi_dict['boxes'] = _flip_boxes(
+            roi_dict['boxes'] = flip_boxes(
                roi_dict['boxes'], roi_dict['width'])
        roi_dict['boxes'][:, 0::2] /= roi_dict['width']
@@ -80,26 +83,19 @@ class DataTransformer(multiprocessing.Process):
        return roi_dict
-    def get(self, serialized):
+    def get(self, example):
-        ann_datum = pb.AnnotatedDatum()
+        img = np.frombuffer(example['content'], np.uint8)
-        ann_datum.ParseFromString(serialized)
+        img = cv2.imdecode(img, -1)
-        img_datum = ann_datum.datum
-        img = np.fromstring(img_datum.data, np.uint8)
-        if img_datum.encoded is True:
-            img = cv2.imdecode(img, -1)
-        else:
-            h, w = img_datum.height, img_datum.width
-            img = img.reshape((h, w, img_datum.channels))
        # Flip
        flip = False
        if self._mirror:
-            if np.random.randint(0, 2) > 0:
+            if np.random.randint(2) > 0:
                img = img[:, ::-1, :]
                flip = True
-        # Datum -> RoIDB
+        # Example -> RoIDict
-        roi_dict = self.make_roi_dict(ann_datum, flip)
+        roi_dict = self.make_roi_dict(example, flip)
        # Post-Process for gt boxes
        # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
@@ -120,19 +116,7 @@ class DataTransformer(multiprocessing.Process):
    def run(self):
        np.random.seed(self._rng_seed)
        while True:
-            serialized = self.q_in.get()
+            outputs = self.get(self.q_in.get())
-            im, gt_boxes = self.get(serialized)
+            if len(outputs[1]) < 1:
-            if len(gt_boxes) < 1:
+                continue  # Ignore the non-object image
-                continue
+            self.q_out.put(outputs)
-            self.q_out.put((im, gt_boxes))
-def _flip_boxes(boxes, width):
-    flip_boxes = boxes.copy()
-    old_x1 = boxes[:, 0].copy()
-    old_x2 = boxes[:, 2].copy()
-    flip_boxes[:, 0] = width - old_x2 - 1
-    flip_boxes[:, 2] = width - old_x1 - 1
-    if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
-        logger.fatal('Encounter invalid coordinates after flipping boxes.')
-    return flip_boxes
--- a/lib/ssd/generate_anchors.py
+++ b/lib/ssd/generate_anchors.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import numpy as np
 def generate_anchors(min_sizes, max_sizes, ratios):
    """
    Generate anchor (reference) windows by enumerating
    aspect ratios, min_sizes, max_sizes wrt a reference ctr (x, y, w, h).
    """
    total_anchors = []
    for idx, min_size in enumerate(min_sizes):
        # Note that SSD assume it is a ctr-anchor
        base_anchor = np.array([0, 0, min_size, min_size])
        anchors = _ratio_enum(base_anchor, ratios)
        if len(max_sizes) > 0:
            max_size = max_sizes[idx]
            _anchors = anchors[0].reshape((1, 4))
            _anchors = np.vstack([_anchors, _max_size_enum(
                base_anchor, min_size, max_size)])
            anchors = np.vstack([_anchors, anchors[1:]])
        total_anchors.append(anchors)
    return np.vstack(total_anchors)
 def _whctrs(anchor):
    """Return width, height, x center, and y center for an anchor (window)."""
    w, h = anchor[2], anchor[3]
    x_ctr, y_ctr = anchor[0], anchor[1]
    return w, h, x_ctr, y_ctr
 def _mkanchors(ws, hs, x_ctr, y_ctr):
    """
    Given a vector of widths (ws) and heights (hs) around a center
    (x_ctr, y_ctr), output a set of anchors (windows).
    """
    ws = ws[:, np.newaxis]
    hs = hs[:, np.newaxis]
    anchors = np.hstack((x_ctr - 0.5 * ws,
                         y_ctr - 0.5 * hs,
                         x_ctr + 0.5 * ws,
                         y_ctr + 0.5 * hs))
    return anchors
 def _ratio_enum(anchor, ratios):
    """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
    w, h, x_ctr, y_ctr = _whctrs(anchor)
    size = w * h
    size_ratios = size / ratios
    hs = np.round(np.sqrt(size_ratios))
    ws = np.round(hs * ratios)
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
    return anchors
 def _max_size_enum(base_anchor, min_size, max_size):
    """Enumerate a anchor for max_size wrt base_anchor."""
    w, h, x_ctr, y_ctr = _whctrs(base_anchor)
    ws = hs = np.sqrt([min_size * max_size])
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
    return anchors
 if __name__ == '__main__':
    print(generate_anchors(min_sizes=[30], max_sizes=[60], ratios=[1, 0.5, 2]))
--- a/lib/ssd/layers/hard_mining_layer.py
+++ b/lib/ssd/layers/hard_mining_layer.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import dragon.vm.torch as torch
 import numpy as np
 from lib.core.config import cfg
 from lib.utils.blob import blob_to_tensor
 class HardMiningLayer(torch.nn.Module):
    def __init__(self):
        super(HardMiningLayer, self).__init__()
    def forward(self, conf_prob, match_labels, max_overlaps):
        # Confidence of each matched box
        conf_prob_wide = conf_prob.numpy(True)
        # Label of each matched box
        match_labels_wide = match_labels
        # Max overlaps between default boxes and gt boxes
        max_overlaps_wide = max_overlaps
        # label ``-1`` will be ignored
        labels_wide = -np.ones(match_labels_wide.shape, dtype=np.int64)
        for ix in range(match_labels_wide.shape[0]):
            match_labels = match_labels_wide[ix]
            max_overlaps = max_overlaps_wide[ix]
            conf_prob = conf_prob_wide[ix]
            conf_loss = np.zeros(match_labels.shape, dtype=np.float32)
            inds = np.where(match_labels >= 0)[0]
            flt_min = np.finfo(float).eps
            # Softmax cross-entropy
            conf_loss[inds] = -np.log(np.maximum(
                conf_prob[inds, match_labels[inds]], flt_min))
            # Filter negatives
            fg_inds = np.where(match_labels > 0)[0]
            neg_inds = np.where(match_labels == 0)[0]
            neg_overlaps = max_overlaps[neg_inds]
            eligible_neg_inds = np.where(neg_overlaps < cfg.SSD.OHEM.NEG_OVERLAP)[0]
            sel_inds = neg_inds[eligible_neg_inds]
            # Do Mining
            sel_loss = conf_loss[sel_inds]
            num_pos = len(fg_inds)
            num_sel = min(int(num_pos * cfg.SSD.OHEM.NEG_POS_RATIO), len(sel_inds))
            sorted_sel_inds = sel_inds[np.argsort(-sel_loss)]
            bg_inds = sorted_sel_inds[:num_sel]
            labels_wide[ix][fg_inds] = match_labels[fg_inds]  # Keep fg indices
            labels_wide[ix][bg_inds] = 0  # Use hard negatives as bg indices
        # Feed labels to compute cls loss
        return {'labels': blob_to_tensor(labels_wide)}
--- a/lib/ssd/layers/__init__.py
+++ b/lib/ssd/layers/__init__.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
\ No newline at end of file
--- a/lib/ssd/layers/data_layer.py
+++ b/lib/ssd/layers/data_layer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import dragon.vm.torch as torch
-from lib.core.config import cfg
-from lib.datasets.factory import get_imdb
-from lib.ssd.data.data_batch import DataBatch
-class DataLayer(torch.nn.Module):
-    def __init__(self):
-        super(DataLayer, self).__init__()
-        database = get_imdb(cfg.TRAIN.DATABASE)
-        self.data_batch = DataBatch(**{
-            'source': database.source,
-            'classes': database.classes,
-            'shuffle': cfg.TRAIN.USE_SHUFFLE,
-            'num_chunks': 2048,  # Chunk-Wise Shuffle
-            'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
-        })
-    def forward(self):
-        # Get an array blob from the Queue
-        outputs = self.data_batch.get()
-        # Zero-Copy the array to tensor
-        outputs['data'] = torch.from_numpy(outputs['data'])
-        return outputs
--- a/lib/ssd/layers/multibox_layer.py
+++ b/lib/ssd/layers/multibox_layer.py
--- a/lib/ssd/layers/priorbox_layer.py
+++ b/lib/ssd/layers/priorbox_layer.py
--- a/lib/ssd/test.py
+++ b/lib/ssd/test.py
 # ------------------------------------------------------------
 # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
 #
 # Licensed under the BSD 2-Clause License.
 # You should have received a copy of the BSD 2-Clause License
 # along with the software. If not, See,
 #
 #      <https://opensource.org/licenses/BSD-2-Clause>
 #
 # ------------------------------------------------------------
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import cv2
 import dragon.vm.torch as torch
 import numpy as np
 from lib.core.config import cfg
 from lib.nms.nms_wrapper import nms
 from lib.nms.nms_wrapper import soft_nms
 from lib.utils.blob import tensor_to_blob
 from lib.utils.boxes import bbox_transform_inv
 from lib.utils.boxes import clip_tiled_boxes
 from lib.utils.timer import Timer
 from lib.utils.vis import vis_one_image
 def get_images(ims):
    target_h = cfg.SSD.RESIZE.HEIGHT
    target_w = cfg.SSD.RESIZE.WIDTH
    processed_ims, im_scales = [], []
    for im in ims:
        im_scales.append((float(target_h) / im.shape[0],
                          float(target_w) / im.shape[1]))
        processed_ims.append(cv2.resize(im, (target_w, target_h)))
    ims_blob = np.array(processed_ims, dtype=np.uint8)
    return ims_blob, im_scales
 def ims_detect(detector, ims):
    """Detect images, with the single scale."""
    # Prepare blobs
    data, im_scales = get_images(ims)
    data = torch.from_numpy(data).cuda(cfg.GPU_ID)
    # Do Forward
    with torch.no_grad():
        outputs = detector.forward(inputs={'data': data})
    # Decode results
    batch_boxes = []
    scores = tensor_to_blob(outputs['cls_prob'])
    prior_boxes = tensor_to_blob(outputs['prior_boxes'])
    box_deltas = tensor_to_blob(outputs['bbox_pred'])
    for i in range(box_deltas.shape[0]):
        boxes = bbox_transform_inv(
            boxes=prior_boxes,
            deltas=box_deltas[i],
            weights=cfg.BBOX_REG_WEIGHTS,
        )
        boxes[:, 0::2] /= im_scales[i][1]
        boxes[:, 1::2] /= im_scales[i][0]
        batch_boxes.append(clip_tiled_boxes(boxes, ims[i].shape))
    return scores, batch_boxes
 def test_net(net, server):
    # Load settings
    classes = server.classes
    num_images = server.num_images
    num_classes = server.num_classes
    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
    _t = {'im_detect': Timer(), 'misc': Timer()}
    for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
        # Collect raw images and ground-truths
        image_ids, raw_images = [], []
        for item_idx in range(cfg.TEST.IMS_PER_BATCH):
            if batch_idx + item_idx >= num_images: continue
            image_id, raw_image = server.get_image()
            image_ids.append(image_id)
            raw_images.append(raw_image)
        _t['im_detect'].tic()
        batch_scores, batch_boxes = ims_detect(net, raw_images)
        _t['im_detect'].toc()
        _t['misc'].tic()
        for item_idx in range(len(batch_scores)):
            i = batch_idx + item_idx
            scores = batch_scores[item_idx]
            boxes = batch_boxes[item_idx]
            boxes_this_image = [[]]
            for j in range(1, num_classes):
                inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
                cls_scores = scores[inds, j]
                cls_boxes = boxes[inds]
                pre_nms_inds = np.argsort(-cls_scores)[:cfg.TEST.NMS_TOP_K]
                cls_scores = cls_scores[pre_nms_inds]
                cls_boxes = cls_boxes[pre_nms_inds]
                cls_detections = np.hstack(
                    (cls_boxes, cls_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                if cfg.TEST.USE_SOFT_NMS:
                    keep = soft_nms(
                        cls_detections,
                        cfg.TEST.NMS,
                        method=cfg.TEST.SOFT_NMS_METHOD,
                        sigma=cfg.TEST.SOFT_NMS_SIGMA,
                    )
                else:
                    keep = nms(
                        cls_detections,
                        cfg.TEST.NMS,
                        force_cpu=True,
                    )
                cls_detections = cls_detections[keep, :]
                all_boxes[j][i] = cls_detections
                boxes_this_image.append(cls_detections)
            if cfg.VIS or cfg.VIS_ON_FILE:
                vis_one_image(
                    raw_images[item_idx],
                    classes,
                    boxes_this_image,
                    thresh=cfg.VIS_TH,
                    box_alpha=1.0,
                    show_class=True,
                    filename=server.get_save_filename(image_ids[item_idx]),
                )
            # Limit to max_per_image detections *over all classes*
            if cfg.TEST.DETECTIONS_PER_IM > 0:
                image_scores = []
                for j in range(1, num_classes):
                    if len(all_boxes[j][i]) < 1:
                        continue
                    image_scores.append(all_boxes[j][i][:, -1])
                if len(image_scores) > 0:
                    image_scores = np.hstack(image_scores)
                if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
                    image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
                    for j in range(1, num_classes):
                        keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                        all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()
        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
              .format(batch_idx + cfg.TEST.IMS_PER_BATCH,
-                      num_images, _t['im_detect'].average_time,
+                      num_images,
-                      _t['misc'].average_time), end='')
+                      _t['im_detect'].average_time,
+                      _t['misc'].average_time),
-    print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
+              end='')
-    print('Evaluating detections')
+    print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
-    server.evaluate_detections(all_boxes)
+    print('Evaluating detections')
+    server.evaluate_detections(all_boxes)
--- a/lib/ssd/data/transforms.py
+++ b/lib/ssd/data/transforms.py
--- a/lib/ssd/data/transforms_test.py
+++ b/lib/ssd/data/transforms_test.py
@@ -19,7 +19,7 @@ sys.path.append('../../')
 import cv2
 import numpy as np
-from lib.ssd.data import transforms
+from lib.ssd import transforms
 if __name__ == '__main__':

--- a/lib/utils/boxes.py
+++ b/lib/utils/boxes.py
@@ -201,6 +201,16 @@ def expand_boxes(boxes, scale):
    return boxes_exp
+def flip_boxes(boxes, width):
+    """Flip the boxes horizontally."""
+    flip_boxes = boxes.copy()
+    old_x1 = boxes[:, 0].copy()
+    old_x2 = boxes[:, 2].copy()
+    flip_boxes[:, 0] = width - old_x2 - 1
+    flip_boxes[:, 2] = width - old_x1 - 1
+    return flip_boxes
 def filter_boxes(boxes, min_size):
    """Remove all boxes with any side smaller than min size."""
    ws = boxes[:, 2] - boxes[:, 0] + 1

--- a/lib/utils/mask_transform.py
+++ b/lib/utils/mask_transform.py
--- a/tools/mpi_train.py
+++ b/tools/mpi_train.py
@@ -62,22 +62,20 @@ if __name__ == '__main__':
    if checkpoint is not None:
        cfg.TRAIN.WEIGHTS = checkpoint
-    # Setup MPI
+    # Setup the distributed environment
-    if cfg.NUM_GPUS != dragon.mpi.size():
+    world_rank = dragon.distributed.get_rank()
+    world_size = dragon.distributed.get_world_size()
+    if cfg.NUM_GPUS != world_size:
        raise ValueError(
-            'Excepted {} mpi nodes, but got {}.'
+            'Excepted staring of {} processes, got {}.'
-            .format(len(args.gpus), dragon.mpi.size())
+            .format(cfg.NUM_GPUS, world_size)
        )
-    GPUs = [i for i in range(cfg.NUM_GPUS)]
+    logger.set_root_logger(world_rank == 0)
-    cfg.GPU_ID = GPUs[dragon.mpi.rank()]
-    dragon.mpi.add_parallel_group([i for i in range(cfg.NUM_GPUS)])
-    dragon.mpi.set_parallel_mode('NCCL' if cfg.USE_NCCL else 'MPI')
-    # Setup logger
+    # Select the GPU depending on the rank of process
-    if dragon.mpi.rank() != 0:
+    cfg.GPU_ID = [i for i in range(cfg.NUM_GPUS)][world_rank]
-        logger.set_root_logger(False)
-    # Fix the random seeds (numpy and dragon) for reproducibility
+    # Fix the random seed for reproducibility
    numpy.random.seed(cfg.RNG_SEED)
    dragon.config.set_random_seed(cfg.RNG_SEED)
@@ -89,7 +87,8 @@ if __name__ == '__main__':
    # Ready to train the network
    logger.info('Output will be saved to `{:s}`'
                .format(coordinator.checkpoints_dir()))
-    train_net(coordinator, start_iter)
+    with dragon.distributed.new_group(
+            ranks=[i for i in range(cfg.NUM_GPUS)],
-    # Finalize mpi
+            backend='NCCL' if cfg.USE_NCCL else 'MPI',
-    dragon.mpi.finalize()
+            verbose=True).as_default():
+        train_net(coordinator, start_iter)
--- a/tools/train.py
+++ b/tools/train.py
@@ -82,7 +82,7 @@ if __name__ == '__main__':
        if checkpoint is not None:
            cfg.TRAIN.WEIGHTS = checkpoint
-        # Fix the random seeds (numpy and dragon) for reproducibility
+        # Fix the random seed for reproducibility
        numpy.random.seed(cfg.RNG_SEED)
        dragon.config.set_random_seed(cfg.RNG_SEED)