Refactor the API of rotated boxes

Ting PAN
Commit 41b3932b authored Nov 01, 2019 by Ting PAN
Showing with 1435 additions and 804 deletions
CHANGES
compile/make.sh
compile/rbox.cc
configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_1x.yml
configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_2x.yml
configs/faster_rcnn/voc_faster_rcnn_R-50-FPN.yml
configs/faster_rcnn/voc_faster_rcnn_VGG-16-C4.yml
configs/retinanet/coco_retinanet_400_R-50-FPN_1x.yml
configs/retinanet/coco_retinanet_400_R-50-FPN_4x.yml
configs/retinanet/voc_retinanet_300_AirNet-FPN.yml
configs/retinanet/voc_retinanet_300_R-18-FPN.yml
configs/retinanet/voc_retinanet_300_R-34-FPN.yml
configs/ssd/voc_ssd_300_AirNet-5b.yml
configs/ssd/voc_ssd_300_VGG-16.yml
configs/ssd/voc_ssd_320_R-50-FPN.yml
lib/core/config.py
lib/core/coordinator.py
lib/core/solver.py
lib/core/test.py
lib/core/train.py
--- a/CHANGES
+++ b/CHANGES
 ------------------------------------------------------------------------
 The list of most significant changes made over time in SeetaDet.

+SeetaDet 0.2.3 (20191101)
+
+Dragon Minimum Required (Version 0.3.0.dev20191021)
+
+Changes:
+
+Preview Features:
+
+- Refactor the API of rotated boxes.
+
+- Simplify the solver by adding LRScheduler.
+
+- Change the ``ITER`` naming to ``STEP``.
+
+Bugs fixed:
+
+- None
+
+------------------------------------------------------------------------
+
+SeetaDet 0.2.2 (20191021)
+
+Dragon Minimum Required (Version 0.3.0.dev20191021)
+
+Changes:
+
+Preview Features:
+
+- Add the dumping if detection results.
+
+Bugs fixed:
+
+- None
+
+------------------------------------------------------------------------
+
 SeetaDet 0.2.1 (20191017)

 Dragon Minimum Required (Version 0.3.0.dev20191017)

--- a/compile/make.sh
+++ b/compile/make.sh
 #!/bin/sh

-# delete cache
+# Delete cache
 rm -r build install *.c *.cpp

-# compile cython modules
+# Compile cpp modules
 python setup.py build_ext --inplace
-g++ -o ../lib/utils/ctypes_rbox.so -shared -fPIC -O2 rbox.cc -fopenmp
+g++ -o ../lib/utils/ctypes_rbox.so -shared -fPIC -O2 rbox.cc -std=c++11 -fopenmp

-# compile cuda modules
+# Compile cuda modules
 cd build && cmake .. && make install && cd ..

-# setup
+# Copy to the library root
 cp -r install/lib ../
--- a/compile/rbox.cc
+++ b/compile/rbox.cc
--- a/configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_1x.yml
+++ b/configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_1x.yml
@@ -22,11 +22,9 @@ MODEL:
  NUM_CLASSES: 81
 SOLVER:
  BASE_LR: 0.02
-  WEIGHT_DECAY: 0.0001
-  LR_POLICY: steps_with_decay
-  STEPS: [60000, 80000]
-  MAX_ITERS: 90000
-  SNAPSHOT_ITERS: 5000
+  DECAY_STEPS: [60000, 80000]
+  MAX_STEPS: 90000
+  SNAPSHOT_EVERY: 5000
  SNAPSHOT_PREFIX: coco_faster_rcnn
 FRCNN:
  ROI_XFORM_METHOD: RoIAlign

--- a/configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_2x.yml
+++ b/configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_2x.yml
@@ -22,11 +22,9 @@ MODEL:
  NUM_CLASSES: 81
 SOLVER:
  BASE_LR: 0.02
-  WEIGHT_DECAY: 0.0001
-  LR_POLICY: steps_with_decay
-  STEPS: [120000, 160000]
-  MAX_ITERS: 180000
-  SNAPSHOT_ITERS: 5000
+  DECAY_STEPS: [120000, 160000]
+  MAX_STEPS: 180000
+  SNAPSHOT_EVERY: 5000
  SNAPSHOT_PREFIX: coco_faster_rcnn
 FRCNN:
  ROI_XFORM_METHOD: RoIAlign

--- a/configs/faster_rcnn/voc_faster_rcnn_R-50-FPN.yml
+++ b/configs/faster_rcnn/voc_faster_rcnn_R-50-FPN.yml
@@ -13,11 +13,9 @@ MODEL:
  NUM_CLASSES: 21
 SOLVER:
  BASE_LR: 0.002
-  WEIGHT_DECAY: 0.0001
-  LR_POLICY: steps_with_decay
-  STEPS: [100000, 140000]
-  MAX_ITERS: 140000
-  SNAPSHOT_ITERS: 5000
+  DECAY_STEPS: [100000, 140000]
+  MAX_STEPS: 140000
+  SNAPSHOT_EVERY: 5000
  SNAPSHOT_PREFIX: voc_faster_rcnn
 FRCNN:
  ROI_XFORM_METHOD: RoIAlign

--- a/configs/faster_rcnn/voc_faster_rcnn_VGG-16-C4.yml
+++ b/configs/faster_rcnn/voc_faster_rcnn_VGG-16-C4.yml
@@ -14,10 +14,9 @@ MODEL:
 SOLVER:
  BASE_LR: 0.001
  WEIGHT_DECAY: 0.0005
-  LR_POLICY: steps_with_decay
-  STEPS: [100000, 140000]
-  MAX_ITERS: 140000
-  SNAPSHOT_ITERS: 5000
+  DECAY_STEPS: [100000, 140000]
+  MAX_STEPS: 140000
+  SNAPSHOT_EVERY: 5000
  SNAPSHOT_PREFIX: voc_faster_rcnn
 RPN:
  STRIDES: [16]

--- a/configs/retinanet/coco_retinanet_400_R-50-FPN_1x.yml
+++ b/configs/retinanet/coco_retinanet_400_R-50-FPN_1x.yml
@@ -22,11 +22,9 @@ MODEL:
  NUM_CLASSES: 81
 SOLVER:
  BASE_LR: 0.02
-  WEIGHT_DECAY: 0.0001
-  LR_POLICY: steps_with_decay
-  STEPS: [30000, 40000]
-  MAX_ITERS: 45000
-  SNAPSHOT_ITERS: 5000
+  DECAY_STEPS: [30000, 40000]
+  MAX_STEPS: 45000
+  SNAPSHOT_EVERY: 5000
  SNAPSHOT_PREFIX: coco_retinanet_400
 FPN:
  RPN_MIN_LEVEL: 3

--- a/configs/retinanet/coco_retinanet_400_R-50-FPN_4x.yml
+++ b/configs/retinanet/coco_retinanet_400_R-50-FPN_4x.yml
@@ -22,12 +22,10 @@ MODEL:
  NUM_CLASSES: 81
 SOLVER:
  BASE_LR: 0.02
-  WEIGHT_DECAY: 0.0001
-  WARM_UP_ITERS: 2000 # default: 500
-  LR_POLICY: steps_with_decay
-  STEPS: [120000, 160000]
-  MAX_ITERS: 180000
-  SNAPSHOT_ITERS: 5000
+  WARM_UP_STEPS: 2000 # default: 500
+  DECAY_STEPS: [120000, 160000]
+  MAX_STEPS: 180000
+  SNAPSHOT_EVERY: 5000
  SNAPSHOT_PREFIX: coco_retinanet_400
 FPN:
  RPN_MIN_LEVEL: 3
@@ -41,9 +39,9 @@ TRAIN:
  IMS_PER_BATCH: 8
  SCALES: [400]
  MAX_SIZE: 666
-  SCALE_JITTERING: True
-  COLOR_JITTERING: True
-  SCALE_RANGE: [0.75, 1.33]
+  USE_SCALE_JITTER: True
+  USE_COLOR_JITTER: True
+  SCALE_JITTER_RANGE: [0.75, 1.33]
 TEST:
  DATABASE: '/data/coco_2014_minival'
  JSON_FILE: '/data/instances_minival2014.json'

--- a/configs/retinanet/voc_retinanet_300_AirNet-FPN.yml
+++ b/configs/retinanet/voc_retinanet_300_AirNet-FPN.yml
@@ -13,11 +13,9 @@ MODEL:
  NUM_CLASSES: 21
 SOLVER:
  BASE_LR: 0.02
-  WEIGHT_DECAY: 0.0001
-  LR_POLICY: steps_with_decay
-  STEPS: [40000, 50000, 60000]
-  MAX_ITERS: 60000
-  SNAPSHOT_ITERS: 5000
+  DECAY_STEPS: [40000, 50000, 60000]
+  MAX_STEPS: 60000
+  SNAPSHOT_EVERY: 5000
  SNAPSHOT_PREFIX: voc_retinanet_300
 FPN:
  RPN_MIN_LEVEL: 3
@@ -28,9 +26,9 @@ TRAIN:
  IMS_PER_BATCH: 32
  SCALES: [300]
  MAX_SIZE: 500
-  SCALE_RANGE: [0.5, 2.0]
-  SCALE_JITTERING: True
-  COLOR_JITTERING: True
+  SCALE_JITTER_RANGE: [0.5, 2.0]
+  USE_SCALE_JITTER: True
+  USE_COLOR_JITTER: True
 TEST:
  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'

--- a/configs/retinanet/voc_retinanet_300_R-18-FPN.yml
+++ b/configs/retinanet/voc_retinanet_300_R-18-FPN.yml
@@ -13,12 +13,10 @@ MODEL:
  NUM_CLASSES: 21
 SOLVER:
  BASE_LR: 0.01
-  WEIGHT_DECAY: 0.0001
-  LR_POLICY: steps_with_decay
-  STEPS: [40000, 50000, 60000]
-  WARM_UP_ITERS: 2000
-  MAX_ITERS: 60000
-  SNAPSHOT_ITERS: 5000
+  DECAY_STEPS: [40000, 50000, 60000]
+  WARM_UP_STEPS: 2000
+  MAX_STEPS: 60000
+  SNAPSHOT_EVERY: 5000
  SNAPSHOT_PREFIX: voc_retinanet_300
 FPN:
  RPN_MIN_LEVEL: 3
@@ -29,9 +27,9 @@ TRAIN:
  IMS_PER_BATCH: 32
  SCALES: [300]
  MAX_SIZE: 500
-  SCALE_RANGE: [0.5, 2.0]
-  SCALE_JITTERING: True
-  COLOR_JITTERING: True
+  SCALE_JITTER_RANGE: [0.5, 2.0]
+  USE_SCALE_JITTER: True
+  USE_COLOR_JITTER: True
 TEST:
  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'

--- a/configs/retinanet/voc_retinanet_300_R-34-FPN.yml
+++ b/configs/retinanet/voc_retinanet_300_R-34-FPN.yml
@@ -13,12 +13,10 @@ MODEL:
  NUM_CLASSES: 21
 SOLVER:
  BASE_LR: 0.01
-  WEIGHT_DECAY: 0.0001
-  LR_POLICY: steps_with_decay
-  STEPS: [40000, 50000, 60000]
-  WARM_UP_ITERS: 2000
-  MAX_ITERS: 60000
-  SNAPSHOT_ITERS: 5000
+  DECAY_STEPS: [40000, 50000, 60000]
+  WARM_UP_STEPS: 2000
+  MAX_STEPS: 60000
+  SNAPSHOT_EVERY: 5000
  SNAPSHOT_PREFIX: voc_retinanet_300
 FPN:
  RPN_MIN_LEVEL: 3
@@ -29,9 +27,9 @@ TRAIN:
  IMS_PER_BATCH: 32
  SCALES: [300]
  MAX_SIZE: 500
-  SCALE_RANGE: [0.5, 2.0]
-  SCALE_JITTERING: True
-  COLOR_JITTERING: True
+  SCALE_JITTER_RANGE: [0.5, 2.0]
+  USE_SCALE_JITTER: True
+  USE_COLOR_JITTER: True
 TEST:
  DATABASE: '/data/voc_2007_test'
  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'

--- a/configs/ssd/voc_ssd_300_AirNet-5b.yml
+++ b/configs/ssd/voc_ssd_300_AirNet-5b.yml
@@ -13,11 +13,9 @@ MODEL:
  NUM_CLASSES: 21
 SOLVER:
  BASE_LR: 0.001
-  WEIGHT_DECAY: 0.0001
-  LR_POLICY: steps_with_decay
-  STEPS: [80000, 100000, 120000]
-  MAX_ITERS: 120000
-  SNAPSHOT_ITERS: 5000
+  DECAY_STEPS: [80000, 100000, 120000]
+  MAX_STEPS: 120000
+  SNAPSHOT_EVERY: 5000
  SNAPSHOT_PREFIX: voc_ssd_300
 SSD:
  RESIZE:

--- a/configs/ssd/voc_ssd_300_VGG-16.yml
+++ b/configs/ssd/voc_ssd_300_VGG-16.yml
@@ -13,13 +13,12 @@ MODEL:
            'sheep', 'sofa', 'train', 'tvmonitor']
  NUM_CLASSES: 21
 SOLVER:
-  BASE_LR: 0.002
+  BASE_LR: 0.001
  WARM_UP_FACTOR: 0.
  WEIGHT_DECAY: 0.0005
-  LR_POLICY: steps_with_decay
-  STEPS: [80000, 100000, 120000]
-  MAX_ITERS: 120000
-  SNAPSHOT_ITERS: 5000
+  DECAY_STEPS: [80000, 100000, 120000]
+  MAX_STEPS: 120000
+  SNAPSHOT_EVERY: 5000
  SNAPSHOT_PREFIX: voc_ssd_300
 SSD:
  RESIZE:

--- a/configs/ssd/voc_ssd_320_R-50-FPN.yml
+++ b/configs/ssd/voc_ssd_320_R-50-FPN.yml
+NUM_GPUS: 1
+VIS: False
+ENABLE_TENSOR_BOARD: False
+MODEL:
+  TYPE: ssd
+  BACKBONE: resnet50.fpn
+  CLASSES: ['__background__',
+            'aeroplane', 'bicycle', 'bird', 'boat',
+            'bottle', 'bus', 'car', 'cat', 'chair',
+            'cow', 'diningtable', 'dog', 'horse',
+            'motorbike', 'person', 'pottedplant',
+            'sheep', 'sofa', 'train', 'tvmonitor']
+  NUM_CLASSES: 21
+FPN:
+  RPN_MIN_LEVEL: 3
+  RPN_MAX_LEVEL: 8
+SOLVER:
+  BASE_LR: 0.001
+  DECAY_STEPS: [80000, 100000, 120000]
+  MAX_STEPS: 120000
+  SNAPSHOT_EVERY: 5000
+  SNAPSHOT_PREFIX: voc_ssd_320
+SSD:
+  NUM_CONVS: 2
+  RESIZE:
+    HEIGHT: 320
+    WIDTH: 320
+  MULTIBOX:
+    STRIDES: [8, 16, 32, 64, 100, 300]
+    MIN_SIZES: [30, 60, 110, 162, 213, 264]
+    MAX_SIZES: [60, 110, 162, 213, 264, 315]
+    ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33],
+                    [1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]]
+TRAIN:
+  WEIGHTS: '/model/R-50.Affine.pth'
+  DATABASE: '/data/voc_0712_trainval'
+  IMS_PER_BATCH: 32
+TEST:
+  DATABASE: '/data/voc_2007_test'
+  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
+  IMS_PER_BATCH: 8
+  NMS_TOP_K: 400
+  NMS: 0.45
+  SCORE_THRESH: 0.01
+  DETECTIONS_PER_IM: 200
+
--- a/lib/core/config.py
+++ b/lib/core/config.py
@@ -20,10 +20,10 @@ from __future__ import print_function
 import os.path as osp
 import numpy as np

-from lib.utils.attrdict import AttrDict as edict
+from lib.utils.attrdict import AttrDict

-__C = edict()
-cfg = __C
+
+cfg = __C = AttrDict()


 ###########################################
@@ -33,7 +33,7 @@ cfg = __C
 ###########################################


-__C.TRAIN = edict()
+__C.TRAIN = AttrDict()

 # Initialize network with weights from this file
 __C.TRAIN.WEIGHTS = ''
@@ -82,17 +82,17 @@ __C.TRAIN.USE_DIFF = True
 __C.TRAIN.BBOX_THRESH = 0.5

 # If True, randomly scale the image by scale range
-__C.TRAIN.SCALE_JITTERING = False
-__C.TRAIN.SCALE_RANGE = [0.75, 1.0]
+__C.TRAIN.USE_SCALE_JITTER = False
+__C.TRAIN.SCALE_JITTER_RANGE = [0.75, 1.0]

 # If True, randomly distort the image by brightness, contrast, and saturation
-__C.TRAIN.COLOR_JITTERING = False
+__C.TRAIN.USE_COLOR_JITTER = False

 # IOU >= thresh: positive example
 __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
 # IOU < thresh: negative example
 __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
-# If an anchor statisfied by positive and negative conditions set to negative
+# If an anchor satisfied by positive and negative conditions set to negative
 __C.TRAIN.RPN_CLOBBER_POSITIVES = False
 # Max number of foreground examples
 __C.TRAIN.RPN_FG_FRACTION = 0.5
@@ -118,7 +118,7 @@ __C.TRAIN.RPN_STRADDLE_THRESH = 0
 ###########################################


-__C.TEST = edict()
+__C.TEST = AttrDict()

 # Database to test
 __C.TEST.DATABASE = ''
@@ -151,10 +151,10 @@ __C.TEST.SOFT_NMS_SIGMA = 0.5
 # The top-k prior boxes before nms.
 __C.TEST.NMS_TOP_K = 400

-# The threshold for predicting boxes
+# The threshold for prAttrDicting boxes
 __C.TEST.SCORE_THRESH = 0.05

-# The threshold for predicting masks
+# The threshold for prAttrDicting masks
 __C.TEST.BINARY_THRESH = 0.5

 # NMS threshold used on RPN proposals
@@ -188,37 +188,32 @@ __C.TEST.DETECTIONS_PER_IM = 100
 ###########################################


-__C.MODEL = edict()
+__C.MODEL = AttrDict()

 # The type of the model
 # ('faster_rcnn',
-#  'mask_rcnn',
 #  'ssd',
-#  'rssd',
 #  'retinanet,
 # )
 __C.MODEL.TYPE = ''

 # The float precision for training and inference
 # (FLOAT32, FLOAT16,)
-__C.MODEL.DATA_TYPE = 'FLOAT32'
+__C.MODEL.PRECISION = 'FLOAT32'

 # The backbone
 __C.MODEL.BACKBONE = ''

 # The number of classes in the dataset
 __C.MODEL.NUM_CLASSES = -1
-
-# Keep it for TaaS DataSet
+# The name for each object class
 __C.MODEL.CLASSES = ['__background__']

-# Add StopGrad at a specified stage so the bottom layers are frozen
+# Frozen the gradient since the convolution stage K
+# The value of ``K`` is usually set to 2
 __C.MODEL.FREEZE_AT = 2

-# Whether to use focal loss for one-stage detectors?
-# Enabled if model type in ('ssd',)
-# Retinanet is force to use focal loss
-__C.MODEL.USE_FOCAL_LOSS = False
+# Setting of focal loss
 __C.MODEL.FOCAL_LOSS_ALPHA = 0.25
 __C.MODEL.FOCAL_LOSS_GAMMA = 2.0

@@ -234,7 +229,7 @@ __C.MODEL.COARSEST_STRIDE = 32
 ###########################################


-__C.RPN = edict()
+__C.RPN = AttrDict()

 # Strides for multiple rpn heads
 __C.RPN.STRIDES = [4, 8, 16, 32, 64]
@@ -253,7 +248,7 @@ __C.RPN.ASPECT_RATIOS = [0.5, 1, 2]
 ###########################################


-__C.RETINANET = edict()
+__C.RETINANET = AttrDict()

 # Anchor aspect ratios to use
 __C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0)
@@ -291,7 +286,7 @@ __C.RETINANET.NEGATIVE_OVERLAP = 0.4
 ###########################################


-__C.FPN = edict()
+__C.FPN = AttrDict()

 # Channel dimension of the FPN feature levels
 __C.FPN.DIM = 256
@@ -317,7 +312,7 @@ __C.FPN.ROI_MIN_LEVEL = 2
 ###########################################


-__C.FRCNN = edict()
+__C.FRCNN = AttrDict()

 # RoI transformation function (e.g., RoIPool or RoIAlign)
 __C.FRCNN.ROI_XFORM_METHOD = 'RoIPool'
@@ -338,7 +333,7 @@ __C.FRCNN.ROI_XFORM_RESOLUTION = 7
 ###########################################


-__C.MRCNN = edict()
+__C.MRCNN = AttrDict()

 # Resolution of mask predictions
 __C.MRCNN.RESOLUTION = 28
@@ -357,10 +352,7 @@ __C.MRCNN.ROI_XFORM_RESOLUTION = 14
 ###########################################


-__C.SSD = edict()
-
-# Whether to enable FPN enhancement?
-__C.SSD.FPN_ON = False
+__C.SSD = AttrDict()

 # Convolutions to use in the cls and bbox tower
 # NOTE: this doesn't include the last conv for logits
@@ -369,7 +361,7 @@ __C.SSD.NUM_CONVS = 0
 # Weight for bbox regression loss
 __C.SSD.BBOX_REG_WEIGHT = 1.

-__C.SSD.MULTIBOX = edict()
+__C.SSD.MULTIBOX = AttrDict()
 # MultiBox configs
 __C.SSD.MULTIBOX.STRIDES = []
 __C.SSD.MULTIBOX.MIN_SIZES = []
@@ -377,25 +369,25 @@ __C.SSD.MULTIBOX.MAX_SIZES = []
 __C.SSD.MULTIBOX.ASPECT_RATIOS = []
 __C.SSD.MULTIBOX.ASPECT_ANGLES = []

-__C.SSD.OHEM = edict()
+__C.SSD.OHEM = AttrDict()
 # The threshold for selecting negative bbox in hard example mining
 __C.SSD.OHEM.NEG_OVERLAP = 0.5
 # The ratio used in hard example mining
 __C.SSD.OHEM.NEG_POS_RATIO = 3.0

 # Distort the image?
-__C.SSD.DISTORT = edict()
+__C.SSD.DISTORT = AttrDict()
 __C.SSD.DISTORT.BRIGHTNESS_PROB = 0.5
 __C.SSD.DISTORT.CONTRAST_PROB = 0.5
 __C.SSD.DISTORT.SATURATION_PROB = 0.5

 # Expand the image?
-__C.SSD.EXPAND = edict()
+__C.SSD.EXPAND = AttrDict()
 __C.SSD.EXPAND.PROB = 0.5
 __C.SSD.EXPAND.MAX_RATIO = 4.0

 # Resize the image?
-__C.SSD.RESIZE = edict()
+__C.SSD.RESIZE = AttrDict()
 __C.SSD.RESIZE.HEIGHT = 300
 __C.SSD.RESIZE.WIDTH = 300
 __C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
@@ -403,7 +395,7 @@ __C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
 # Samplers
 # Format as (min_scale, max_scale,
 #            min_aspect_ratio, max_aspect_ratio,
-#            min_jaccard_overlap, max_jaccard_overlap,
+#            min_overlap, max_overlap,
 #            max_trials, max_sample)
 __C.SSD.SAMPLERS = [
    (1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1, 1),   # Entire image
@@ -423,7 +415,7 @@ __C.SSD.SAMPLERS = [
 ###########################################


-__C.RESNET = edict()
+__C.RESNET = AttrDict()

 # Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
 __C.RESNET.NUM_GROUPS = 1
@@ -439,7 +431,7 @@ __C.RESNET.GROUP_WIDTH = 64
 ###########################################


-__C.DROPBLOCK = edict()
+__C.DROPBLOCK = AttrDict()

 # Whether to use drop block for more regulization
 __C.DROPBLOCK.DROP_ON = False
@@ -455,59 +447,46 @@ __C.DROPBLOCK.DECREMENT = 1e-6
 ###########################################


-__C.SOLVER = edict()
+__C.SOLVER = AttrDict()

-# Base learning rate for the specified schedule
-__C.SOLVER.BASE_LR = 0.001
+# The interval to display logs
+__C.SOLVER.DISPLAY = 20
+
+# The interval to snapshot a model
+__C.SOLVER.SNAPSHOT_EVERY = 5000
+# Prefix to yield the path: <prefix>_iters_XYZ.pth
+__C.SOLVER.SNAPSHOT_PREFIX = ''

 # Optional scaling factor for total loss
 # This option is helpful to scale the magnitude
 # of gradients during FP16 training
 __C.SOLVER.LOSS_SCALING = 1.

-# Schedule type (see functions in utils.lr_policy for options)
-# E.g., 'step', 'steps_with_decay', ...
-__C.SOLVER.LR_POLICY = 'steps_with_decay'
-
-# Hyperparameter used by the specified policy
-# For 'step', the current LR is multiplied by SOLVER.GAMMA at each step
-__C.SOLVER.GAMMA = 0.1
-
-# Uniform step size for 'steps' policy
-__C.SOLVER.STEP_SIZE = 30000
-
-__C.SOLVER.STEPS = []
-
 # Maximum number of SGD iterations
-__C.SOLVER.MAX_ITERS = 40000
+__C.SOLVER.MAX_STEPS = 40000
+
+# Base learning rate for the specified schedule
+__C.SOLVER.BASE_LR = 0.001
+# The uniform interval for LRScheduler
+__C.SOLVER.DECAY_STEP = 1
+# The custom intervals for LRScheduler
+__C.SOLVER.DECAY_STEPS = []
+# The decay factor for exponential LRScheduler
+__C.SOLVER.DECAY_GAMMA = 0.1
+# Warm up to ``BASE_LR`` over this number of steps
+__C.SOLVER.WARM_UP_STEPS = 500
+# Start the warm up from ``BASE_LR`` * ``FACTOR``
+__C.SOLVER.WARM_UP_FACTOR = 0.333
+# The type of LRScheduler
+__C.SOLVER.LR_POLICY = 'steps_with_decay'

 # Momentum to use with SGD
 __C.SOLVER.MOMENTUM = 0.9
-
 # L2 regularization hyper parameters
-__C.SOLVER.WEIGHT_DECAY = 0.0005
-
+__C.SOLVER.WEIGHT_DECAY = 0.0001
 # L2 norm factor for clipping gradients
 __C.SOLVER.CLIP_NORM = -1.0

-# Warm up to SOLVER.BASE_LR over this number of SGD iterations
-__C.SOLVER.WARM_UP_ITERS = 500
-
-# Start the warm up from SOLVER.BASE_LR * SOLVER.WARM_UP_FACTOR
-__C.SOLVER.WARM_UP_FACTOR = 1.0 / 3.0
-
-# The steps for accumulating gradients
-__C.SOLVER.ITER_SIZE = 1
-
-# The interval to display logs
-__C.SOLVER.DISPLAY = 20
-
-# The interval to snapshot a model
-__C.SOLVER.SNAPSHOT_ITERS = 5000
-
-# prefix to yield the path: <prefix>_iters_XYZ.caffemodel
-__C.SOLVER.SNAPSHOT_PREFIX = ''
-

 ###########################################
 #                                         #
@@ -532,9 +511,6 @@ __C.PIXEL_MEANS = [102., 115., 122.]
 # These are empirically chosen to approximately lead to unit variance targets
 __C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)

-# Default weights on (dx, dy, dw, dh, da) for normalizing rbox regression targets
-__C.RBOX_REG_WEIGHTS = (10.0, 10.0, 5., 5., 10.)
-
 # Prior prob for the positives at the beginning of training.
 # This is used to set the bias init for the logits layer
 __C.PRIOR_PROB = 0.01
@@ -581,7 +557,7 @@ def _merge_a_into_b(a, b):
        # the types must match, too
        v = _check_and_coerce_cfg_value_type(v, b[k], k)
        # recursively merge dicts
-        if type(v) is edict:
+        if type(v) is AttrDict:
            try:
                _merge_a_into_b(a[k], b[k])
            except:
@@ -595,7 +571,7 @@ def cfg_from_file(filename):
    """Load a config file and merge it into the default options."""
    import yaml
    with open(filename, 'r') as f:
-        yaml_cfg = edict(yaml.load(f))
+        yaml_cfg = AttrDict(yaml.load(f))
    global __C
    _merge_a_into_b(yaml_cfg, __C)

@@ -643,8 +619,8 @@ def _check_and_coerce_cfg_value_type(value_a, value_b, key):
        value_a = list(value_a)
    elif isinstance(value_a, list) and isinstance(value_b, tuple):
        value_a = tuple(value_a)
-    elif isinstance(value_a, dict) and isinstance(value_b, edict):
-        value_a = edict(value_a)
+    elif isinstance(value_a, dict) and isinstance(value_b, AttrDict):
+        value_a = AttrDict(value_a)
    else:
        raise ValueError(
            'Type mismatch ({} vs. {}) with values ({} vs. {}) for config '

--- a/lib/core/coordinator.py
+++ b/lib/core/coordinator.py
@@ -23,10 +23,8 @@ from lib.core.config import cfg_from_file


 class Coordinator(object):
-    """Coordinator is a simple tool to manage the
-     unique experiments from the YAML configurations.
+    """Manage the unique experiments."""

-    """
    def __init__(self, cfg_file, exp_dir=None):
        # Override the default configs
        cfg_from_file(cfg_file)
@@ -44,9 +42,14 @@ class Coordinator(object):
            self.experiment_dir = exp_dir

    def _path_at(self, file, auto_create=True):
-        path = os.path.abspath(os.path.join(self.experiment_dir, file))
-        if auto_create and not os.path.exists(path):
-            os.makedirs(path)
+        try:
+            path = os.path.abspath(os.path.join(self.experiment_dir, file))
+            if auto_create and not os.path.exists(path):
+                os.makedirs(path)
+        except OSError:
+            path = os.path.abspath(os.path.join('/tmp', file))
+            if auto_create and not os.path.exists(path):
+                os.makedirs(path)
        return path

    def checkpoints_dir(self):
@@ -55,7 +58,9 @@ class Coordinator(object):
    def exports_dir(self):
        return self._path_at('exports')

-    def results_dir(self, checkpoint=None):
+    def results_dir(self, checkpoint=None, output_dir=None):
+        if output_dir is not None:
+            return output_dir
        sub_dir = os.path.splitext(os.path.basename(checkpoint))[0] if checkpoint else ''
        return self._path_at(os.path.join('results', sub_dir))


--- a/lib/core/solver.py
+++ b/lib/core/solver.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import time
-import dragon.vm.torch as torch
-
-from lib.core.config import cfg
-from lib.modeling.detector import Detector
-from lib.utils import logger
-
-
-class Solver(object):
-    def __init__(self):
-        # Define the generic detector
-        self.detector = Detector()
-        # Define the optimizer and its arguments
-        self.optimizer = None
-        self.opt_arguments = {
-            'scale_gradient': 1. / (
-                cfg.SOLVER.LOSS_SCALING *
-                cfg.SOLVER.ITER_SIZE
-            ),
-            'clip_gradient': float(cfg.SOLVER.CLIP_NORM),
-            'weight_decay': cfg.SOLVER.WEIGHT_DECAY,
-        }
-        # Define the global step
-        self.iter = 0
-        # Define the decay step
-        self._current_step = 0
-
-    def _get_param_groups(self):
-        param_groups = [
-            {
-                'params': [],
-                'lr_mult': 1.,
-                'decay_mult': 1.,
-            },
-            # Special treatment for biases (mainly to match historical impl.
-            # details):
-            # (1) Do not apply weight decay
-            # (2) Use a 2x higher learning rate
-            {
-                'params': [],
-                'lr_mult': 2.,
-                'decay_mult': 0.,
-            }
-        ]
-        for name, param in self.detector.named_parameters():
-            if 'bias' in name:
-                param_groups[1]['params'].append(param)
-            else:
-                param_groups[0]['params'].append(param)
-        return param_groups
-
-    def set_learning_rate(self):
-        policy = cfg.SOLVER.LR_POLICY
-        if policy == 'steps_with_decay':
-            if self._current_step < len(cfg.SOLVER.STEPS) \
-                    and self.iter >= cfg.SOLVER.STEPS[self._current_step]:
-                self._current_step = self._current_step + 1
-                logger.info(
-                    'MultiStep Status: Iteration {}, step = {}'
-                    .format(self.iter, self._current_step)
-                )
-                new_lr = cfg.SOLVER.BASE_LR * (
-                        cfg.SOLVER.GAMMA ** self._current_step)
-                self.optimizer.param_groups[0]['lr'] = \
-                    self.optimizer.param_groups[1]['lr'] = new_lr
-        else:
-            raise ValueError('Unknown lr policy: ' + policy)
-
-    def one_step(self):
-        def add_loss(x, y):
-            return y if x is None else x + y
-        # Forward & Backward & Compute_loss
-        iter_size = cfg.SOLVER.ITER_SIZE
-        loss_scaling = cfg.SOLVER.LOSS_SCALING
-        stats = {'loss': {'total': 0.}, 'iter': self.iter}
-
-        run_time, tic = 0., time.time()
-
-        if iter_size > 1:
-            # Dragon is designed for manual gradients accumulating
-            # ``zero_grad`` is only required if calling ``accumulate_grad``
-            self.optimizer.zero_grad()
-
-        for i in range(iter_size):
-            outputs, total_loss = self.detector(), None
-            # Sum the partial losses
-            for k, v in outputs.items():
-                if 'loss' in k:
-                    if k not in stats['loss']:
-                        stats['loss'][k] = 0.
-                    total_loss = add_loss(total_loss, v)
-                    stats['loss'][k] += float(v) * loss_scaling
-            if loss_scaling != 1.:
-                total_loss *= loss_scaling
-            stats['loss']['total'] += float(total_loss)
-
-            total_loss.backward()
-            if iter_size > 1:
-                self.optimizer.accumulate_grad()
-
-        run_time += (time.time() - tic)
-
-        # Apply Update
-        self.set_learning_rate()
-        tic = time.time()
-        self.optimizer.step()
-        run_time += (time.time() - tic)
-        self.iter += 1
-
-        # Average loss by the iter size
-        for k in stats['loss'].keys():
-            stats['loss'][k] /= cfg.SOLVER.ITER_SIZE
-
-        # Misc stats
-        stats['lr'] = self.base_lr
-        stats['time'] = run_time
-        return stats
-
-    @property
-    def base_lr(self):
-        return self.optimizer.param_groups[0]['lr']
-
-    @base_lr.setter
-    def base_lr(self, value):
-        self.optimizer.param_groups[0]['lr'] = \
-            self.optimizer.param_groups[1]['lr'] = value
-
-
-class SGDSolver(Solver):
-    def __init__(self):
-        super(SGDSolver, self).__init__()
-        self.opt_arguments.update(**{
-            'lr': cfg.SOLVER.BASE_LR,
-            'momentum': cfg.SOLVER.MOMENTUM,
-        })
-        self.optimizer = torch.optim.SGD(
-            self._get_param_groups(), **self.opt_arguments)
-
-
-class NesterovSolver(Solver):
-    def __init__(self):
-        super(NesterovSolver, self).__init__()
-        self.opt_arguments.update(**{
-            'lr': cfg.SOLVER.BASE_LR,
-            'momentum': cfg.SOLVER.MOMENTUM,
-            'nesterov': True,
-        })
-        self.optimizer = torch.optim.SGD(
-            self._get_param_groups(), **self.opt_arguments)
-
-
-class RMSPropSolver(Solver):
-    def __init__(self):
-        super(RMSPropSolver, self).__init__()
-        self.opt_arguments.update(**{
-            'lr': cfg.SOLVER.BASE_LR,
-            'alpha': 0.9,
-            'eps': 1e-5,
-        })
-        self.optimizer = torch.optim.RMSprop(
-            self._get_param_groups(), **self.opt_arguments)
-
-
-class AdamSolver(Solver):
-    def __init__(self):
-        super(AdamSolver, self).__init__()
-        self.opt_arguments.update(**{
-            'lr': cfg.SOLVER.BASE_LR,
-            'beta1': 0.9,
-            'beta2': 0.999,
-            'eps': 1e-5,
-        })
-        self.optimizer = torch.optim.RMSprop(
-            self._get_param_groups(), **self.opt_arguments)
-
-
-def get_solver_func(type):
-    if type == 'MomentumSGD':
-        return SGDSolver
-    elif type == 'Nesterov':
-        return NesterovSolver
-    elif type == 'RMSProp':
-        return RMSPropSolver
-    elif type == 'Adam':
-        return AdamSolver
-    else:
-        raise ValueError(
-            'Unsupported solver type: {}.\n'
-            'Excepted in (MomentumSGD, Nesterov, RMSProp, Adam).'
-            .format(type)
-        )
--- a/lib/core/test.py
+++ b/lib/core/test.py
@@ -34,7 +34,7 @@ class TestServer(object):
        self.data_reader = dragon.io.DataReader(
            dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source))
        self.data_transformer = DataTransformer()
-        self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH)
+        self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH * 5)
        self.data_reader.start()
        self.gt_recs = collections.OrderedDict()
        self.output_dir = output_dir
@@ -70,11 +70,14 @@ class TestServer(object):
        return self.gt_recs

    def evaluate_detections(self, all_boxes):
-        self.imdb.evaluate_detections(
-            all_boxes,
-            self.get_records(),
-            self.output_dir,
-        )
+        if cfg.TEST.PROTOCOL == 'null':
+            self.imdb.dump_detections(all_boxes, self.output_dir)
+        else:
+            self.imdb.evaluate_detections(
+                all_boxes,
+                self.get_records(),
+                self.output_dir,
+            )

    def evaluate_segmentations(self, all_boxes, all_masks):
        self.imdb.evaluate_segmentations(

--- a/lib/core/train.py
+++ b/lib/core/train.py
@@ -18,53 +18,48 @@ from __future__ import division
 from __future__ import print_function

 import collections
-import datetime
 import os

 import dragon.vm.torch as torch

 from lib.core.config import cfg
-from lib.core.solver import get_solver_func
+from lib.solver.sgd import SGDSolver
 from lib.utils import logger
+from lib.utils import time_util
 from lib.utils.stats import SmoothedValue
-from lib.utils.timer import Timer


 class SolverWrapper(object):
    def __init__(self, coordinator):
+        self.solver = SGDSolver()
+        self.detector = self.solver.detector
        self.output_dir = coordinator.checkpoints_dir()
-        self.solver = get_solver_func('MomentumSGD')()

-        # Load the pre-trained weights
-        init_weights = cfg.TRAIN.WEIGHTS
-        if init_weights != '':
-            if os.path.exists(init_weights):
-                logger.info('Loading weights from {}.'.format(init_weights))
-                self.solver.detector.load_weights(init_weights)
-            else:
-                raise ValueError('Invalid path of weights: {}'.format(init_weights))
-
-        # Mixed precision training?
-        if cfg.MODEL.DATA_TYPE.lower() == 'float16':
-            self.solver.detector.half()  # Powerful FP16 Support
-
-        self.solver.detector.cuda(cfg.GPU_ID)
+        # Setup the detector
+        self.detector.load_weights(cfg.TRAIN.WEIGHTS)
+        if cfg.MODEL.PRECISION.lower() == 'float16':
+            # Mixed precision training
+            self.detector.half()
+        self.detector.cuda(cfg.GPU_ID)

        # Plan the metrics
+        self.board = None
        self.metrics = collections.OrderedDict()
        if cfg.ENABLE_TENSOR_BOARD and logger.is_root():
-            from dragon.tools.tensorboard import TensorBoard
-            self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs')
+            try:
+                from dragon.tools.tensorboard import TensorBoard
+                log_dir = coordinator.experiment_dir + '/logs'
+                self.board = TensorBoard(log_dir=log_dir)
+            except ImportError:
+                pass

    def snapshot(self):
-        if not logger.is_root():
-            return None
-        filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}'
-                    .format(self.solver.iter) + '.pth')
+        filename = cfg.SOLVER.SNAPSHOT_PREFIX + \
+                   '_iter_{}.pth'.format(self.solver.iter)
        filename = os.path.join(self.output_dir, filename)
-        torch.save(self.solver.detector.state_dict(), filename)
-        logger.info('Wrote snapshot to: {:s}'.format(filename))
-        return filename
+        if logger.is_root() and not os.path.exists(filename):
+            torch.save(self.detector.state_dict(), filename)
+            logger.info('Wrote snapshot to: {:s}'.format(filename))

    def add_metrics(self, stats):
        for k, v in stats['loss'].items():
@@ -73,7 +68,7 @@ class SolverWrapper(object):
            self.metrics[k].AddValue(v)

    def send_metrics(self, stats):
-        if hasattr(self, 'board'):
+        if self.board is not None:
            self.board.scalar_summary('lr', stats['lr'], stats['iter'])
            self.board.scalar_summary('time', stats['time'], stats['iter'])
            for k, v in self.metrics.items():
@@ -90,10 +85,12 @@ class SolverWrapper(object):
                        stats['iter'],
                    )

-    def step(self, display=False):
+    def step(self):
+        display = self.solver.iter % cfg.SOLVER.DISPLAY == 0
        stats = self.solver.one_step()
        self.add_metrics(stats)
        self.send_metrics(stats)
+
        if display:
            logger.info(
                'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (
@@ -110,43 +107,28 @@ class SolverWrapper(object):

    def train_model(self):
        """Network training loop."""
-        last_snapshot_iter = -1
-        timer = Timer()
-        model_paths = []
-        start_lr = self.solver.base_lr
-        while self.solver.iter < cfg.SOLVER.MAX_ITERS:
-            if self.solver.iter < cfg.SOLVER.WARM_UP_ITERS:
-                alpha = (self.solver.iter + 1.0) / cfg.SOLVER.WARM_UP_ITERS
-                self.solver.base_lr = \
-                    start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha)
+        timer = time_util.Timer()
+        max_steps = cfg.SOLVER.MAX_STEPS

+        while self.solver.iter < max_steps:
            # Apply 1-step SGD update
            with timer.tic_and_toc():
-                self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0)
-
-            if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0:
-                average_time = timer.average_time
-                eta_seconds = average_time * (
-                    cfg.SOLVER.MAX_ITERS - self.solver.iter)
-                eta = str(datetime.timedelta(seconds=int(eta_seconds)))
-                progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS
+                _, global_step = self.step(), self.solver.iter
+
+            if global_step % (10 * cfg.SOLVER.DISPLAY) == 0:
                logger.info(
-                    '< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
-                    .format(progress, timer.average_time, eta)
+                    time_util.get_progress_info(
+                        timer, global_step, max_steps
+                    )
                )

-            if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0:
-                last_snapshot_iter = self.solver.iter
-                model_paths.append(self.snapshot())
-
-        if last_snapshot_iter != self.solver.iter:
-            model_paths.append(self.snapshot())
-        return model_paths
+            if global_step % cfg.SOLVER.SNAPSHOT_EVERY == 0:
+                self.snapshot()


 def train_net(coordinator, start_iter=0):
    sw = SolverWrapper(coordinator)
    sw.solver.iter = start_iter
    logger.info('Solving...')
-    model_paths = sw.train_model()
-    return model_paths
+    sw.train_model()
+    sw.snapshot()
--- a/lib/datasets/imdb.py
+++ b/lib/datasets/imdb.py
@@ -14,6 +14,7 @@
 # ------------------------------------------------------------

 import os
+import shutil
 import dragon

 from lib.core.config import cfg
@@ -59,6 +60,35 @@ class imdb(object):
    def num_images(self):
        return dragon.io.SeetaRecordDataset(self.source).size

+    def dump_detections(self, all_boxes, output_dir):
+        dataset = dragon.io.SeetaRecordDataset(self.source)
+        for file in ('data.data', 'data.index', 'data.meta'):
+            file = os.path.join(output_dir, file)
+            if os.path.exists(file):
+                os.remove(file)
+        writer = dragon.io.SeetaRecordWriter(output_dir, dataset.protocol)
+        for i in range(len(dataset)):
+            example = dataset.get()
+            example['object'] = []
+            for cls_ind, cls in enumerate(self.classes):
+                if cls == '__background__':
+                    continue
+                detections = all_boxes[cls_ind][i]
+                if len(detections) == 0:
+                    continue
+                for k in range(detections.shape[0]):
+                    if detections[k, -1] < cfg.VIS_TH:
+                        continue
+                    example['object'].append({
+                        'name': cls,
+                        'xmin': float(detections[k][0]),
+                        'ymin': float(detections[k][1]),
+                        'xmax': float(detections[k][2]),
+                        'ymax': float(detections[k][3]),
+                        'difficult': 0,
+                    })
+            writer.write(example)
+
    def evaluate_detections(self, all_boxes, gt_recs, output_dir):
        pass


--- a/lib/datasets/taas.py
+++ b/lib/datasets/taas.py
@@ -109,36 +109,6 @@ class TaaS(imdb):
    #                                            #
    ##############################################

-    def _write_xml_bbox_results(self, all_boxes, gt_recs, output_dir):
-        from xml.dom import minidom
-        import xml.etree.ElementTree as ET
-        ix = 0
-        for image_id, rec in gt_recs.items():
-            root = ET.Element('annotation')
-            ET.SubElement(root, 'filename').text = str(image_id)
-            for cls_ind, cls in enumerate(self.classes):
-                if cls == '__background__':
-                    continue
-                detections = all_boxes[cls_ind][ix]
-                if len(detections) == 0:
-                    continue
-                for k in range(detections.shape[0]):
-                    if detections[k, -1] < cfg.VIS_TH:
-                        continue
-                    object = ET.SubElement(root, 'object')
-                    ET.SubElement(object, 'name').text = cls
-                    ET.SubElement(object, 'difficult').text = '0'
-                    bnd_box = ET.SubElement(object, 'bndbox')
-                    ET.SubElement(bnd_box, 'xmin').text = str(detections[k][0])
-                    ET.SubElement(bnd_box, 'ymin').text = str(detections[k][1])
-                    ET.SubElement(bnd_box, 'xmax').text = str(detections[k][2])
-                    ET.SubElement(bnd_box, 'ymax').text = str(detections[k][3])
-            ix += 1
-            rawText = ET.tostring(root)
-            dom = minidom.parseString(rawText)
-            with open('{}/{}.xml'.format(output_dir, image_id), 'w') as f:
-                dom.writexml(f, "", "\t", "\n", "utf-8")
-
    def _write_voc_bbox_results(self, all_boxes, gt_recs, output_dir):
        for cls_ind, cls in enumerate(self.classes):
            if cls == '__background__':
@@ -486,10 +456,6 @@ class TaaS(imdb):
                self._do_voc_bbox_eval(
                    gt_recs, output_dir, IoU=0.7,
                    use_07_metric='2007' in protocol)
-        elif 'xml' in protocol:
-            if cfg.EXP_DIR != '':
-                output_dir = cfg.EXP_DIR
-            self._write_xml_bbox_results(all_boxes, gt_recs, output_dir)
        elif 'coco' in protocol:
            from lib.pycocotools.coco import COCO
            if os.path.exists(cfg.TEST.JSON_FILE):

--- a/lib/faster_rcnn/anchor_target_layer.py
+++ b/lib/faster_rcnn/anchor_target_layer.py
@@ -20,7 +20,7 @@ import dragon.vm.torch as torch
 from lib.core.config import cfg
 from lib.faster_rcnn.generate_anchors import generate_anchors
 from lib.utils import logger
-from lib.utils.blob import blob_to_tensor
+from lib.utils.blob import array2tensor
 from lib.utils.boxes import bbox_overlaps
 from lib.utils.boxes import bbox_transform
 from lib.utils.boxes import dismantle_gt_boxes
@@ -194,8 +194,8 @@ class AnchorTargetLayer(torch.nn.Module):
            .transpose(0, 3, 1, 2)

        return {
-            'labels': blob_to_tensor(labels),
-            'bbox_targets': blob_to_tensor(bbox_targets),
-            'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
-            'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
+            'labels': array2tensor(labels),
+            'bbox_targets': array2tensor(bbox_targets),
+            'bbox_inside_weights': array2tensor(bbox_inside_weights),
+            'bbox_outside_weights': array2tensor(bbox_outside_weights),
        }
--- a/lib/faster_rcnn/data_layer.py
+++ b/lib/faster_rcnn/data_layer.py
@@ -92,7 +92,7 @@ class DataBatch(mp.Process):
        if self._num_transformers == -1:
            self._num_transformers = 2
            # Add 1 transformer for color augmentation
-            if cfg.TRAIN.COLOR_JITTERING:
+            if cfg.TRAIN.USE_COLOR_JITTER:
                self._num_transformers += 1
        self._num_transformers = min(
            self._num_transformers, self._max_transformers)

--- a/lib/faster_rcnn/data_transformer.py
+++ b/lib/faster_rcnn/data_transformer.py
@@ -19,8 +19,10 @@ import cv2
 import numpy as np

 from lib.core.config import cfg
+from lib.utils import rotated_boxes
 from lib.utils.blob import prep_im_for_blob
 from lib.utils.boxes import flip_boxes
+from lib.utils.image import get_image_with_target_size


 class DataTransformer(multiprocessing.Process):
@@ -101,23 +103,29 @@ class DataTransformer(multiprocessing.Process):
    def get_annotations(cls, example):
        objects = []
        for ix, obj in enumerate(example['object']):
-            if 'xmin' in obj:
-                objects.append({
-                    'name': obj['name'],
-                    'difficult': obj.get('difficult', 0),
-                    'bbox': [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']],
-                })
+            if 'x3' in obj:
+                bbox = rotated_boxes.vertices2box(
+                    [obj['x1'], obj['y1'],
+                     obj['x2'], obj['y2'],
+                     obj['x3'], obj['y3'],
+                     obj['x4'], obj['y4']]
+                )
+            elif 'x2' in obj:
+                bbox = [obj['x1'], obj['y1'], obj['x2'], obj['y2']]
+            elif 'xmin' in obj:
+                bbox = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']]
            else:
-                objects.append({
-                    'name': obj['name'],
-                    'difficult': obj.get('difficult', 0),
-                    'bbox': obj['bbox'],
-                })
+                bbox = obj['bbox']
+            objects.append({
+                'name': obj['name'],
+                'difficult': obj.get('difficult', 0),
+                'bbox': bbox,
+            })
        return example['id'], objects

    def get(self, example):
        img = np.frombuffer(example['content'], np.uint8)
-        img = cv2.imdecode(img, -1)
+        img = cv2.imdecode(img, 1)

        # Scale
        scale_indices = np.random.randint(len(cfg.TRAIN.SCALES))
@@ -137,10 +145,10 @@ class DataTransformer(multiprocessing.Process):
            if jitter != 1.0:
                # To a rectangle (scale, max_size)
                target_size = (np.array(im.shape[0:2]) / jitter).astype(np.int)
-                im, offsets = _get_image_with_target_size(target_size, im)
+                im, offsets = get_image_with_target_size(target_size, im)
        else:
            # To a square (target_size, target_size)
-            im, offsets = _get_image_with_target_size([target_size] * 2, im)
+            im, offsets = get_image_with_target_size([target_size] * 2, im)

        # Example -> RoIDict
        roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets)
@@ -166,29 +174,3 @@ class DataTransformer(multiprocessing.Process):
                self.q1_out.put(outputs)
            else:
                self.q2_out.put(outputs)
-
-
-def _get_image_with_target_size(target_size, img):
-    im_shape = list(img.shape)
-    height_diff = target_size[0] - im_shape[0]
-    width_diff = target_size[1] - im_shape[1]
-
-    ofs_crop_width = np.random.randint(max(-width_diff, 0) + 1)
-    ofs_pad_width = np.random.randint(max(width_diff, 0) + 1)
-    ofs_crop_height = np.random.randint(max(-height_diff, 0) + 1)
-    ofs_pad_height = np.random.randint(max(height_diff, 0) + 1)
-
-    im_shape[:2] = target_size
-    new_img = np.empty(im_shape, dtype=img.dtype)
-    new_img[:] = cfg.PIXEL_MEANS
-
-    new_img[ofs_pad_height:ofs_pad_height + img.shape[0],
-            ofs_pad_width:ofs_pad_width + img.shape[1]] = \
-        img[ofs_crop_height:ofs_crop_height + target_size[0],
-            ofs_crop_width:ofs_crop_width + target_size[1]]
-
-    return new_img, (
-        ofs_pad_width - ofs_crop_width,
-        ofs_pad_height - ofs_crop_height,
-        target_size,
-    )
--- a/lib/faster_rcnn/proposal_layer.py
+++ b/lib/faster_rcnn/proposal_layer.py
@@ -18,19 +18,15 @@ import numpy as np

 from lib.core.config import cfg
 from lib.faster_rcnn.generate_anchors import generate_anchors
-from lib.nms.nms_wrapper import nms
-from lib.utils.blob import blob_to_tensor
+from lib.nms import nms_wrapper
+from lib.utils.blob import array2tensor
 from lib.utils.boxes import bbox_transform_inv
 from lib.utils.boxes import clip_tiled_boxes
 from lib.utils.boxes import filter_boxes


 class ProposalLayer(torch.nn.Module):
-    """
-    Compute proposals by applying estimated bounding-box
-    transformations to a set of regular boxes (called "anchors").
-
-    """
+    """Compute proposals by applying transformations to anchors."""

    def __init__(self):
        super(ProposalLayer, self).__init__()
@@ -48,8 +44,8 @@ class ProposalLayer(torch.nn.Module):

    def forward(self, features, cls_prob, bbox_pred, ims_info):
        cfg_key = 'TRAIN' if self.training else 'TEST'
-        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
-        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
+        pre_nms_top_n = cfg[cfg_key].RPN_PRE_NMS_TOP_N
+        post_nms_top_n = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

@@ -86,14 +82,15 @@ class ProposalLayer(torch.nn.Module):
            scores = batch_scores[ix].reshape((-1, 1))  # [1, n] -> [n, 1]
            deltas = batch_deltas[ix].reshape((-1, 4))

-            if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
+            if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
                order = np.argsort(-scores.squeeze())
            else:
                # Avoid sorting possibly large arrays; First partition to get top K
                # unsorted and then sort just those (~20x faster for 200k scores)
-                inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
+                inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
                order = np.argsort(-scores[inds].squeeze())
                order = inds[order]
+
            deltas = deltas[order]
            anchors = all_anchors[order]
            scores = scores[order]
@@ -111,11 +108,11 @@ class ProposalLayer(torch.nn.Module):
            scores = scores[keep]

            # 6. Apply nms (e.g. threshold = 0.7)
-            # 7. Take after_nms_topN (e.g. 300)
+            # 7. Take after_nms_top_n (e.g. 300)
            # 8. Return the top proposals (-> RoIs top)
-            keep = nms(np.hstack((proposals, scores)), nms_thresh)
-            if post_nms_topN > 0:
-                keep = keep[:post_nms_topN]
+            keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
+            if post_nms_top_n > 0:
+                keep = keep[:post_nms_top_n]
            proposals = proposals[keep, :]

            # Output rois blob
@@ -129,4 +126,4 @@ class ProposalLayer(torch.nn.Module):
        if cfg_key == 'TRAIN':
            return rpn_rois
        else:
-            return [blob_to_tensor(rpn_rois)]
+            return [array2tensor(rpn_rois)]
--- a/lib/faster_rcnn/proposal_target_layer.py
+++ b/lib/faster_rcnn/proposal_target_layer.py
@@ -18,7 +18,7 @@ import numpy as np
 import numpy.random as npr

 from lib.core.config import cfg
-from lib.utils.blob import blob_to_tensor
+from lib.utils.blob import array2tensor
 from lib.utils.boxes import bbox_overlaps
 from lib.utils.boxes import bbox_transform
 from lib.utils.boxes import dismantle_gt_boxes
@@ -73,11 +73,11 @@ class ProposalTargetLayer(torch.nn.Module):
            batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)

        return {
-            'rois': [blob_to_tensor(batch_outputs['rois'])],
-            'labels': blob_to_tensor(batch_outputs['labels']),
-            'bbox_targets': blob_to_tensor(batch_outputs['bbox_targets']),
-            'bbox_inside_weights': blob_to_tensor(batch_outputs['bbox_inside_weights']),
-            'bbox_outside_weights': blob_to_tensor(batch_outputs['bbox_outside_weights']),
+            'rois': [array2tensor(batch_outputs['rois'])],
+            'labels': array2tensor(batch_outputs['labels']),
+            'bbox_targets': array2tensor(batch_outputs['bbox_targets']),
+            'bbox_inside_weights': array2tensor(batch_outputs['bbox_inside_weights']),
+            'bbox_outside_weights': array2tensor(batch_outputs['bbox_outside_weights']),
        }



--- a/lib/faster_rcnn/test.py
+++ b/lib/faster_rcnn/test.py
@@ -17,14 +17,13 @@ import dragon.vm.torch as torch
 import numpy as np

 from lib.core.config import cfg
-from lib.nms.nms_wrapper import nms
-from lib.nms.nms_wrapper import soft_nms
+from lib.nms import nms_wrapper
+from lib.utils import framework
+from lib.utils import time_util
 from lib.utils.blob import im_list_to_blob
 from lib.utils.boxes import bbox_transform_inv
 from lib.utils.boxes import clip_tiled_boxes
 from lib.utils.image import scale_image
-from lib.utils.timer import Timer
-from lib.utils.graph import FrozenGraph
 from lib.utils.vis import vis_one_image


@@ -48,13 +47,14 @@ def im_detect(detector, raw_image):
        with torch.no_grad():
            with torch.jit.Recorder(retain_ops=True):
                outputs = detector.forward(inputs)
-                detector.frozen_graph = FrozenGraph(
-                    {'data': inputs['data'],
-                     'ims_info': inputs['ims_info']},
-                    {'rois': outputs['rois'],
-                     'cls_prob': outputs['cls_prob'],
-                     'bbox_pred': outputs['bbox_pred']},
-                )
+                detector.frozen_graph = \
+                    framework.FrozenGraph(
+                        {'data': inputs['data'],
+                         'ims_info': inputs['ims_info']},
+                        {'rois': outputs['rois'],
+                         'cls_prob': outputs['cls_prob'],
+                         'bbox_pred': outputs['bbox_pred']},
+                    )
    outputs = detector.frozen_graph(**blobs)

    # Decode results
@@ -88,14 +88,13 @@ def test_net(detector, server):
    num_classes = server.num_classes
    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]

-    _t = {'im_detect': Timer(), 'misc': Timer()}
+    _t = {'im_detect': time_util.Timer(), 'misc': time_util.Timer()}

    for i in range(num_images):
        image_id, raw_image = server.get_image()

-        _t['im_detect'].tic()
-        scores, boxes = im_detect(detector, raw_image)
-        _t['im_detect'].toc()
+        with _t['im_detect'].tic_and_toc():
+            scores, boxes = im_detect(detector, raw_image)

        _t['misc'].tic()
        boxes_this_image = [[]]
@@ -107,21 +106,30 @@ def test_net(detector, server):
                (cls_boxes, cls_scores[:, np.newaxis])
            ).astype(np.float32, copy=False)
            if cfg.TEST.USE_SOFT_NMS:
-                keep = soft_nms(
-                    cls_detections, cfg.TEST.NMS,
+                keep = nms_wrapper.soft_nms(
+                    cls_detections,
+                    thresh=cfg.TEST.NMS,
                    method=cfg.TEST.SOFT_NMS_METHOD,
                    sigma=cfg.TEST.SOFT_NMS_SIGMA,
                )
            else:
-                keep = nms(cls_detections, cfg.TEST.NMS, force_cpu=True)
+                keep = nms_wrapper.nms(
+                    cls_detections,
+                    thresh=cfg.TEST.NMS,
+                    force_cpu=True,
+                )
            cls_detections = cls_detections[keep, :]
            all_boxes[j][i] = cls_detections
            boxes_this_image.append(cls_detections)

        if cfg.VIS or cfg.VIS_ON_FILE:
            vis_one_image(
-                raw_image, classes, boxes_this_image,
-                thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
+                raw_image,
+                classes,
+                boxes_this_image,
+                thresh=cfg.VIS_TH,
+                box_alpha=1.,
+                show_class=True,
                filename=server.get_save_filename(image_id),
            )

@@ -129,7 +137,8 @@ def test_net(detector, server):
        if cfg.TEST.DETECTIONS_PER_IM > 0:
            image_scores = []
            for j in range(1, num_classes):
-                if len(all_boxes[j][i]) < 1: continue
+                if len(all_boxes[j][i]) < 1:
+                    continue
                image_scores.append(all_boxes[j][i][:, -1])
            if len(image_scores) > 0:
                image_scores = np.hstack(image_scores)

--- a/lib/fpn/anchor_target_layer.py
+++ b/lib/fpn/anchor_target_layer.py
@@ -14,6 +14,7 @@ from __future__ import division
 from __future__ import print_function

 import collections
+
 import dragon.vm.torch as torch
 import numpy as np
 import numpy.random as npr
@@ -21,7 +22,7 @@ import numpy.random as npr
 from lib.core.config import cfg
 from lib.faster_rcnn.generate_anchors import generate_anchors
 from lib.utils import logger
-from lib.utils.blob import blob_to_tensor
+from lib.utils.blob import array2tensor
 from lib.utils.boxes import bbox_overlaps
 from lib.utils.boxes import bbox_transform
 from lib.utils.boxes import dismantle_gt_boxes
@@ -180,8 +181,8 @@ class AnchorTargetLayer(torch.nn.Module):
        bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))

        return {
-            'labels': blob_to_tensor(labels),
-            'bbox_targets': blob_to_tensor(bbox_targets),
-            'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
-            'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
+            'labels': array2tensor(labels),
+            'bbox_targets': array2tensor(bbox_targets),
+            'bbox_inside_weights': array2tensor(bbox_inside_weights),
+            'bbox_outside_weights': array2tensor(bbox_outside_weights),
        }
--- a/lib/fpn/proposal_layer.py
+++ b/lib/fpn/proposal_layer.py
@@ -19,20 +19,16 @@ import numpy as np

 from lib.core.config import cfg
 from lib.faster_rcnn.generate_anchors import generate_anchors
-from lib.nms.nms_wrapper import nms
+from lib.nms import nms_wrapper
 from lib.utils import logger
-from lib.utils.blob import blob_to_tensor
+from lib.utils.blob import array2tensor
 from lib.utils.boxes import bbox_transform_inv
 from lib.utils.boxes import clip_tiled_boxes
 from lib.utils.boxes import filter_boxes


 class ProposalLayer(torch.nn.Module):
-    """
-    Compute proposals by applying estimated bounding-box
-    transformations to a set of regular boxes (called "anchors").
-
-    """
+    """Compute proposals by applying transformations anchors."""

    def __init__(self):
        super(ProposalLayer, self).__init__()
@@ -86,8 +82,8 @@ class ProposalLayer(torch.nn.Module):

    def forward(self, features, cls_prob, bbox_pred, ims_info):
        cfg_key = 'TRAIN' if self.training else 'TEST'
-        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
-        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
+        pre_nms_top_n = cfg[cfg_key].RPN_PRE_NMS_TOP_N
+        post_nms_top_n = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

@@ -110,14 +106,15 @@ class ProposalLayer(torch.nn.Module):
            scores = batch_scores[ix].reshape((-1, 1))  # [1, n] -> [n, 1]
            deltas = batch_deltas[ix]  # [n, 4]

-            if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
+            if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
                order = np.argsort(-scores.squeeze())
            else:
                # Avoid sorting possibly large arrays; First partition to get top K
                # unsorted and then sort just those (~20x faster for 200k scores)
-                inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
+                inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
                order = np.argsort(-scores[inds].squeeze())
                order = inds[order]
+
            deltas = deltas[order]
            anchors = all_anchors[order]
            scores = scores[order]
@@ -136,9 +133,9 @@ class ProposalLayer(torch.nn.Module):
            # 6. Apply nms (e.g. threshold = 0.7)
            # 7. Take after_nms_topN (e.g. 300)
            # 8. Return the top proposals (-> RoIs top)
-            keep = nms(np.hstack((proposals, scores)), nms_thresh)
-            if post_nms_topN > 0:
-                keep = keep[:post_nms_topN]
+            keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
+            if post_nms_top_n > 0:
+                keep = keep[:post_nms_top_n]
            proposals = proposals[keep, :]

            # Output rois blob
@@ -156,16 +153,16 @@ class ProposalLayer(torch.nn.Module):
            # Distribute rois into K levels
            min_level = cfg.FPN.ROI_MIN_LEVEL
            max_level = cfg.FPN.ROI_MAX_LEVEL
-            K = max_level - min_level + 1
+            k = max_level - min_level + 1
            fpn_levels = _map_rois_to_fpn_levels(rpn_rois, min_level, max_level)
            all_rois = []
-            for i in range(K):
+            for i in range(k):
                lv_indices = np.where(fpn_levels == (i + min_level))[0]
                if len(lv_indices) == 0:
                    # Fake a tiny roi to avoid empty roi pooling
-                    all_rois.append(blob_to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
+                    all_rois.append(array2tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
                else:
-                    all_rois.append(blob_to_tensor(rpn_rois[lv_indices]))
+                    all_rois.append(array2tensor(rpn_rois[lv_indices]))
            return all_rois



--- a/lib/fpn/proposal_target_layer.py
+++ b/lib/fpn/proposal_target_layer.py
@@ -13,12 +13,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import dragon.vm.torch as torch
 import numpy as np
 import numpy.random as npr
-import dragon.vm.torch as torch

 from lib.core.config import cfg
-from lib.utils.blob import blob_to_tensor
+from lib.utils.blob import array2tensor
 from lib.utils.boxes import bbox_overlaps
 from lib.utils.boxes import bbox_transform
 from lib.utils.boxes import dismantle_gt_boxes
@@ -87,9 +87,9 @@ class ProposalTargetLayer(torch.nn.Module):
        # Distribute rois into K levels
        min_level = cfg.FPN.ROI_MIN_LEVEL
        max_level = cfg.FPN.ROI_MAX_LEVEL
-        K = max_level - min_level + 1
+        k = max_level - min_level + 1
        fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
-        lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)]
+        lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(k)]
        _fmap_rois(
            inputs=[batch_outputs[key] for key in keys],
            fake_outputs=self.fake_outputs,
@@ -99,11 +99,11 @@ class ProposalTargetLayer(torch.nn.Module):
        )

        return {
-            'rois': [blob_to_tensor(outputs['rois'][i]) for i in range(K)],
-            'labels': blob_to_tensor(np.concatenate(outputs['labels'], axis=0)),
-            'bbox_targets': blob_to_tensor(np.vstack(outputs['bbox_targets'])),
-            'bbox_inside_weights': blob_to_tensor(np.vstack(outputs['bbox_inside_weights'])),
-            'bbox_outside_weights': blob_to_tensor(np.vstack(outputs['bbox_outside_weights'])),
+            'rois': [array2tensor(outputs['rois'][i]) for i in range(k)],
+            'labels': array2tensor(np.concatenate(outputs['labels'], axis=0)),
+            'bbox_targets': array2tensor(np.vstack(outputs['bbox_targets'])),
+            'bbox_inside_weights': array2tensor(np.vstack(outputs['bbox_inside_weights'])),
+            'bbox_outside_weights': array2tensor(np.vstack(outputs['bbox_outside_weights'])),
        }



--- a/lib/modeling/detector.py
+++ b/lib/modeling/detector.py
@@ -29,7 +29,7 @@ from lib.utils.logger import is_root


 class Detector(torch.nn.Module):
-    """The "Detector" organizes the detection pipelines.
+    """Organize the detection pipelines.

    A bunch of classic algorithms are integrated, see the
    ``lib.core.config`` for their hyper-parameters.
@@ -112,9 +112,10 @@ class Detector(torch.nn.Module):

        # 1. Extract features
        # Process the data:
-        #   1) NHWC => NCHW
-        #   2) uint8 => float32 or float16
-        #   3) Mean subtraction
+        #  0) CPU => CUDA
+        #  1) NHWC => NCHW
+        #  2) uint8 => float32 or float16
+        #  3) Mean subtraction
        image_data = self.bootstrap(inputs['data'])
        features = self.body(image_data)


--- a/lib/modeling/fpn.py
+++ b/lib/modeling/fpn.py
@@ -30,17 +30,18 @@ class FPN(torch.nn.Module):
        super(FPN, self).__init__()
        self.C = torch.nn.ModuleList()
        self.P = torch.nn.ModuleList()
-        self.apply_func = self.apply_on_rcnn
        for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1):
            self.C.append(conv1x1(feature_dims[lvl - 1], cfg.FPN.DIM, bias=True))
            self.P.append(conv3x3(cfg.FPN.DIM, cfg.FPN.DIM, bias=True))
-        if 'retinanet' in cfg.MODEL.TYPE or 'ssd' in cfg.MODEL.TYPE:
+        if 'rcnn' in cfg.MODEL.TYPE:
+            self.apply_func = self.apply_on_rcnn
+            self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True)
+        else:
+            self.apply_func = self.apply_on_generic
+            self.relu = torch.nn.ReLU(inplace=False)
            for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
                dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else cfg.FPN.DIM
                self.P.append(conv3x3(dim_in, cfg.FPN.DIM, stride=2, bias=True))
-            self.apply_func = self.apply_on_retinanet
-        self.relu = torch.nn.ReLU(inplace=False)
-        self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True)
        self.reset_parameters()
        self.feature_dims = [cfg.FPN.DIM]

@@ -69,7 +70,7 @@ class FPN(torch.nn.Module):
            outputs.insert(0, self.P[i - min_lvl](fpn_input))
        return outputs

-    def apply_on_retinanet(self, features):
+    def apply_on_generic(self, features):
        fpn_input = self.C[-1](features[-1])
        min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
        outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]

--- a/lib/nms/nms_wrapper.py
+++ b/lib/nms/nms_wrapper.py
@@ -37,7 +37,7 @@ def nms(detections, thresh, force_cpu=False):
    if detections.shape[0] == 0:
        return []
    if detections.shape[1] == 6:
-        return rotated_boxes.nms(detections, thresh)
+        return rotated_boxes.cpu_nms(detections, thresh)
    if cfg.USE_GPU_NMS and not force_cpu:
        return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
    else:

--- a/lib/ops/modules.py
+++ b/lib/ops/modules.py
@@ -17,7 +17,6 @@ import dragon.vm.torch as torch

 from lib.core.config import cfg
 from lib.ops import functional as F
-from lib.utils.blob import blob_to_tensor


 class Bootstrap(torch.nn.Module):
@@ -25,7 +24,7 @@ class Bootstrap(torch.nn.Module):

    def __init__(self):
        super(Bootstrap, self).__init__()
-        self.dtype = cfg.MODEL.DATA_TYPE.lower()
+        self.dtype = cfg.MODEL.PRECISION.lower()
        self.mean_values = cfg.PIXEL_MEANS
        self.dummy_buffer = torch.ones(1)


--- a/lib/retinanet/anchor_target_layer.py
+++ b/lib/retinanet/anchor_target_layer.py
@@ -19,7 +19,7 @@ import numpy as np
 from lib.core.config import cfg
 from lib.faster_rcnn.generate_anchors import generate_anchors_v2
 from lib.utils import logger
-from lib.utils.blob import blob_to_tensor
+from lib.utils.blob import array2tensor
 from lib.utils.boxes import bbox_overlaps
 from lib.utils.boxes import bbox_transform
 from lib.utils.boxes import dismantle_gt_boxes
@@ -145,8 +145,8 @@ class AnchorTargetLayer(torch.nn.Module):
        bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))

        return {
-            'labels': blob_to_tensor(labels),
-            'bbox_targets': blob_to_tensor(bbox_targets),
-            'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
-            'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
+            'labels': array2tensor(labels),
+            'bbox_targets': array2tensor(bbox_targets),
+            'bbox_inside_weights': array2tensor(bbox_inside_weights),
+            'bbox_outside_weights': array2tensor(bbox_outside_weights),
        }
--- a/lib/retinanet/test.py
+++ b/lib/retinanet/test.py
@@ -17,44 +17,14 @@ import dragon.vm.torch as torch
 import numpy as np

 from lib.core.config import cfg
-from lib.nms.nms_wrapper import nms
-from lib.nms.nms_wrapper import soft_nms
+from lib.nms import nms_wrapper
+from lib.utils import framework
+from lib.utils import time_util
 from lib.utils.blob import im_list_to_blob
-from lib.utils.graph import FrozenGraph
 from lib.utils.image import scale_image
-from lib.utils.timer import Timer
 from lib.utils.vis import vis_one_image


-def im_detect(detector, raw_image):
-    """Detect a image, with single or multiple scales."""
-    ims, ims_scale = scale_image(raw_image)
-
-    # Prepare blobs
-    blobs = {'data': im_list_to_blob(ims)}
-    blobs['ims_info'] = np.array([
-        list(blobs['data'].shape[1:3]) + [im_scale]
-        for im_scale in ims_scale
-    ], dtype=np.float32)
-
-    # Do Forward
-    if not hasattr(detector, 'frozen_graph'):
-        inputs = {
-            'data': torch.from_numpy(blobs['data']),
-            'ims_info': torch.from_numpy(blobs['ims_info']),
-        }
-        with torch.no_grad():
-            with torch.jit.Recorder(retain_ops=True):
-                outputs = detector.forward(inputs)
-                detector.frozen_graph = FrozenGraph(
-                    {'data': inputs['data'],
-                     'ims_info': inputs['ims_info']},
-                    {'detections': outputs['detections']},
-                )
-    outputs = detector.frozen_graph(**blobs)
-    return outputs['detections'][:, 1:]
-
-
 def ims_detect(detector, raw_images):
    """Detect images, with single or multiple scales."""
    ims, ims_scale = scale_image(raw_images[0])
@@ -81,11 +51,12 @@ def ims_detect(detector, raw_images):
        with torch.no_grad():
            with torch.jit.Recorder(retain_ops=True):
                outputs = detector.forward(inputs)
-                detector.frozen_graph = FrozenGraph(
-                    {'data': inputs['data'],
-                     'ims_info': inputs['ims_info']},
-                    {'detections': outputs['detections']},
-                )
+                detector.frozen_graph = \
+                    framework.FrozenGraph(
+                        {'data': inputs['data'],
+                         'ims_info': inputs['ims_info']},
+                        {'detections': outputs['detections']},
+                    )
    outputs = detector.frozen_graph(**blobs)

    # Unpack results
@@ -111,24 +82,21 @@ def test_net(detector, server):
    num_classes = server.num_classes
    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]

-    _t = {'im_detect': Timer(), 'misc': Timer()}
+    _t = {'im_detect': time_util.Timer(), 'misc': time_util.Timer()}

    for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
        # Collect raw images and ground-truths
        image_ids, raw_images = [], []
        for item_idx in range(cfg.TEST.IMS_PER_BATCH):
-            if batch_idx + item_idx >= num_images: continue
+            if batch_idx + item_idx >= num_images:
+                continue
            image_id, raw_image = server.get_image()
            image_ids.append(image_id)
            raw_images.append(raw_image)

        # Run detecting on specific scales
-        _t['im_detect'].tic()
-        if cfg.TEST.IMS_PER_BATCH > 1:
+        with _t['im_detect'].tic_and_toc():
            results = ims_detect(detector, raw_images)
-        else:
-            results = [im_detect(detector, raw_images[0])]
-        _t['im_detect'].toc()

        # Post-Processing
        _t['misc'].tic()
@@ -139,22 +107,22 @@ def test_net(detector, server):
            detections = np.array(detections)
            for j in range(1, num_classes):
                cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0]
-                cls_boxes = detections[cls_indices, 0:4]
+                cls_boxes = detections[cls_indices, :4]
                cls_scores = detections[cls_indices, 4]
                cls_detections = np.hstack((
                    cls_boxes, cls_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                if cfg.TEST.USE_SOFT_NMS:
-                    keep = soft_nms(
+                    keep = nms_wrapper.soft_nms(
                        cls_detections,
-                        cfg.TEST.NMS,
+                        thresh=cfg.TEST.NMS,
                        method=cfg.TEST.SOFT_NMS_METHOD,
                        sigma=cfg.TEST.SOFT_NMS_SIGMA,
                    )
                else:
-                    keep = nms(
+                    keep = nms_wrapper.nms(
                        cls_detections,
-                        cfg.TEST.NMS,
+                        thresh=cfg.TEST.NMS,
                        force_cpu=True,
                    )
                cls_detections = cls_detections[keep, :]

--- a/lib/solver/__init__.py
+++ b/lib/solver/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
--- a/lib/solver/lr_scheduler.py
+++ b/lib/solver/lr_scheduler.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+from lib.core.config import cfg
+
+
+class _LRScheduler(object):
+    def __init__(
+        self,
+        lr_max,
+        lr_min=0.,
+        warmup_steps=0,
+        warmup_factor=0.,
+    ):
+        self._step_count = 0
+        self._lr_max, self._lr_min = lr_max, lr_min
+        self._warmup_steps = warmup_steps
+        self._warmup_factor = warmup_factor
+        self._last_lr = self._lr_max
+        self._last_steps = self._warmup_steps
+
+    def step(self):
+        self._step_count += 1
+
+    def get_lr(self):
+        if self._step_count < self._warmup_steps:
+            alpha = (self._step_count + 1.) / self._warmup_steps
+            decay_factor = self._warmup_factor * (1 - alpha) + alpha
+            self._last_lr = self._lr_max * decay_factor
+            return self._last_lr
+        return self.schedule_impl()
+
+    def schedule_impl(self):
+        raise NotImplementedError
+
+
+class StepLR(_LRScheduler):
+    def __init__(
+        self,
+        lr_max,
+        decay_step,
+        decay_gamma,
+        warmup_steps=0,
+        warmup_factor=0.,
+    ):
+        super(StepLR, self).__init__(
+            lr_max=lr_max,
+            warmup_steps=warmup_steps,
+            warmup_factor=warmup_factor,
+        )
+        self._decay_step = decay_step
+        self._decay_gamma = decay_gamma
+
+    def schedule_impl(self):
+        step_count = self._step_count - self._last_steps
+        if step_count % self._decay_step == 0:
+            decay_factor = step_count // self._decay_step
+            self._last_lr = self._lr_max * (
+                self._decay_gamma ** decay_factor)
+        return self._last_lr
+
+
+class MultiStepLR(_LRScheduler):
+    def __init__(
+        self,
+        lr_max,
+        decay_steps,
+        decay_gamma,
+        warmup_steps=0,
+        warmup_factor=0.,
+    ):
+        super(MultiStepLR, self).__init__(
+            lr_max=lr_max,
+            warmup_steps=warmup_steps,
+            warmup_factor=warmup_factor,
+        )
+        self._decay_steps = decay_steps
+        self._decay_gamma = decay_gamma
+        self._stage_count, self._num_stages = 0, len(self._decay_steps)
+
+    def schedule_impl(self):
+        if self._stage_count < self._num_stages:
+            k = self._decay_steps[self._stage_count]
+            while self._step_count >= k:
+                self._stage_count += 1
+                if self._stage_count >= self._num_stages:
+                    break
+                k = self._decay_steps[self._stage_count]
+            self._last_lr = self._lr_max * (
+                self._decay_gamma ** self._stage_count)
+        return self._last_lr
+
+
+class LinearLR(_LRScheduler):
+    def __init__(
+        self,
+        lr_max,
+        decay_step,
+        max_steps,
+        warmup_steps=0,
+        warmup_factor=0.,
+    ):
+        super(LinearLR, self).__init__(
+            lr_max=lr_max,
+            lr_min=0.,
+            warmup_steps=warmup_steps,
+            warmup_factor=warmup_factor,
+        )
+        self._decay_step = decay_step
+        self._max_steps = max_steps - warmup_steps
+
+    def schedule_impl(self):
+        step_count = self._step_count - self._last_steps
+        if step_count % self._decay_step == 0:
+            decay_factor = 1. - float(step_count) / self._max_steps
+            self._last_lr = self._lr_max * decay_factor
+        return self._last_lr
+
+
+class CosineLR(_LRScheduler):
+    def __init__(
+        self,
+        lr_max,
+        lr_min,
+        decay_step,
+        max_steps,
+        warmup_steps=0,
+        warmup_factor=0.,
+    ):
+        super(CosineLR, self).__init__(
+            lr_max=lr_max,
+            lr_min=lr_min,
+            warmup_steps=warmup_steps,
+            warmup_factor=warmup_factor,
+        )
+        self._decay_step = decay_step
+        self._max_steps = max_steps - warmup_steps
+
+    def schedule_impl(self):
+        step_count = self._step_count - self._last_steps
+        if step_count % self._decay_step == 0:
+            decay_factor = 0.5 * (1. + math.cos(
+                math.pi * step_count / self._max_steps))
+            self._last_lr = self._lr_min + (
+                    self._lr_max - self._lr_min
+            ) * decay_factor
+        return self._last_lr
+
+
+def get_scheduler():
+    lr_policy = cfg.SOLVER.LR_POLICY
+    if lr_policy == 'step':
+        return StepLR(
+            lr_max=cfg.SOLVER.BASE_LR,
+            decay_step=cfg.SOLVER.DECAY_STEP,
+            decay_gamma=cfg.SOLVER.DECAY_GAMMA,
+            warmup_steps=cfg.SOLVER.WARM_UP_STEPS,
+            warmup_factor=cfg.SOLVER.WARM_UP_FACTOR,
+        )
+    elif lr_policy == 'steps_with_decay':
+        return MultiStepLR(
+            lr_max=cfg.SOLVER.BASE_LR,
+            decay_steps=cfg.SOLVER.DECAY_STEPS,
+            decay_gamma=cfg.SOLVER.DECAY_GAMMA,
+            warmup_steps=cfg.SOLVER.WARM_UP_STEPS,
+            warmup_factor=cfg.SOLVER.WARM_UP_FACTOR,
+        )
+    elif lr_policy == 'cosine_decay':
+        return CosineLR(
+            lr_max=cfg.SOLVER.BASE_LR,
+            lr_min=0.,
+            decay_step=cfg.SOLVER.DECAY_STEP,
+            max_steps=cfg.SOLVER.MAX_STEPS,
+            warmup_steps=cfg.SOLVER.WARM_UP_STEPS,
+            warmup_factor=cfg.SOLVER.WARM_UP_FACTOR,
+        )
+    else:
+        raise ValueError('Unknown lr policy: ' + lr_policy)
+
+
+if __name__ == '__main__':
+    def extract_label(scheduler):
+        class_name = scheduler.__class__.__name__
+        label = class_name + '('
+        if class_name == 'StepLR':
+            label += 'α=' + str(scheduler._decay_step) + ', '
+            label += 'γ=' + str(scheduler._decay_gamma)
+        elif class_name == 'MultiStepLR':
+            label += 'α=' + str(scheduler._decay_steps) + ', '
+            label += 'γ=' + str(scheduler._decay_gamma)
+        elif class_name == 'CosineLR':
+            label += 'α=' + str(scheduler._decay_step)
+        label += ')'
+        return label
+
+    vis = True
+    max_steps = 240
+    shared_args = {
+        'lr_max': 0.4,
+        'warmup_steps': 5,
+        'warmup_factor': 0.,
+    }
+    schedulers = [
+        StepLR(decay_step=1, decay_gamma=0.97, **shared_args),
+        MultiStepLR(decay_steps=[60, 120, 180], decay_gamma=0.1, **shared_args),
+        CosineLR(lr_min=0., decay_step=1, max_steps=max_steps, **shared_args),
+        LinearLR(decay_step=1, max_steps=max_steps, **shared_args),
+    ]
+
+    for i in range(max_steps):
+        info = 'Step = %d\n' % i
+        for scheduler in schedulers:
+            if i == 0:
+                scheduler.lr_seq = []
+            info += '  * {}: {}\n'.format(
+                extract_label(scheduler),
+                scheduler.get_lr())
+            scheduler.lr_seq.append(scheduler.get_lr())
+            scheduler.step()
+        if not vis:
+            print(info)
+
+    if vis:
+        import matplotlib.pyplot as plt
+        plt.figure(1)
+        plt.title('Visualization of different LR Schedulers')
+        plt.xlabel('Step')
+        plt.ylabel('Learning Rate')
+        line = '--'
+        colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k']
+        for i, scheduler in enumerate(schedulers):
+            plt.plot(
+                range(max_steps),
+                scheduler.lr_seq,
+                colors[i] + line,
+                linewidth=1.,
+                label=extract_label(scheduler),
+            )
+        plt.legend()
+        plt.show()
--- a/lib/solver/sgd.py
+++ b/lib/solver/sgd.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dragon.vm.torch as torch
+
+from lib.core.config import cfg
+from lib.modeling.detector import Detector
+from lib.solver import lr_scheduler
+from lib.utils import framework
+from lib.utils import time_util
+
+
+class SGDSolver(object):
+    def __init__(self):
+        # Define the generic detector
+        self.detector = Detector()
+        # Define the optimizer and its arguments
+        self.optimizer = torch.optim.SGD(
+            framework.get_param_groups(self.detector),
+            lr=cfg.SOLVER.BASE_LR,
+            momentum=cfg.SOLVER.MOMENTUM,
+            weight_decay=cfg.SOLVER.WEIGHT_DECAY,
+            clip_gradient=float(cfg.SOLVER.CLIP_NORM),
+            scale_gradient=1. / cfg.SOLVER.LOSS_SCALING,
+        )
+        self.lr_scheduler = lr_scheduler.get_scheduler()
+
+    def one_step(self):
+        def add_loss(x, y):
+            return y if x is None else x + y
+
+        stats = {
+            'iter': self.iter,
+            'loss': {'total': 0.},
+            'time': time_util.Timer(),
+        }
+
+        with stats['time'].tic_and_toc():
+            # Forward pass
+            outputs = self.detector()
+
+            # Backward pass
+            total_loss = None
+            loss_scaling = cfg.SOLVER.LOSS_SCALING
+            for k, v in outputs.items():
+                if 'loss' in k:
+                    if k not in stats['loss']:
+                        stats['loss'][k] = 0.
+                    total_loss = add_loss(total_loss, v)
+                    stats['loss'][k] += float(v) * loss_scaling
+            if loss_scaling != 1.:
+                total_loss *= loss_scaling
+            stats['loss']['total'] += float(total_loss)
+            total_loss.backward()
+
+            # Apply Update
+            self.base_lr = self.lr_scheduler.get_lr()
+            self.optimizer.step()
+            self.lr_scheduler.step()
+
+        # Misc stats
+        stats['lr'] = self.base_lr
+        stats['time'] = stats['time'].total_time
+        return stats
+
+    @property
+    def base_lr(self):
+        return self.optimizer.param_groups[0]['lr']
+
+    @base_lr.setter
+    def base_lr(self, value):
+        for group in self.optimizer.param_groups:
+            group['lr'] = value
+
+    @property
+    def iter(self):
+        return self.lr_scheduler._step_count
+
+    @iter.setter
+    def iter(self, value):
+        self.lr_scheduler._step_count = value
--- a/lib/ssd/data_transformer.py
+++ b/lib/ssd/data_transformer.py
@@ -83,7 +83,7 @@ class DataTransformer(multiprocessing.Process):
                    ]
                else:
                    roi_dict['boxes'][object_idx, :] = \
-                        rotated_boxes.canonicalize(
+                        rotated_boxes.vertices2box(
                            [obj['x1'], obj['y1'],
                             obj['x2'], obj['y2'],
                             obj['x3'], obj['y3'],
@@ -108,7 +108,7 @@ class DataTransformer(multiprocessing.Process):

    def get(self, example):
        img = np.frombuffer(example['content'], np.uint8)
-        img = cv2.imdecode(img, -1)
+        img = cv2.imdecode(img, 1)

        # Flip
        flip = False

--- a/lib/ssd/hard_mining_layer.py
+++ b/lib/ssd/hard_mining_layer.py
@@ -17,7 +17,7 @@ import dragon.vm.torch as torch
 import numpy as np

 from lib.core.config import cfg
-from lib.utils.blob import blob_to_tensor
+from lib.utils.blob import array2tensor


 class HardMiningLayer(torch.nn.Module):
@@ -63,4 +63,4 @@ class HardMiningLayer(torch.nn.Module):
            labels_wide[ix][bg_inds] = 0  # Use hard negatives as bg indices

        # Feed labels to compute cls loss
-        return {'labels': blob_to_tensor(labels_wide)}
+        return {'labels': array2tensor(labels_wide)}
--- a/lib/ssd/multibox_layer.py
+++ b/lib/ssd/multibox_layer.py
@@ -17,7 +17,7 @@ import numpy as np
 import dragon.vm.torch as torch

 from lib.core.config import cfg
-from lib.utils.blob import blob_to_tensor
+from lib.utils.blob import array2tensor
 from lib.utils.boxes import bbox_overlaps
 from lib.utils.boxes import bbox_transform
 from lib.utils.boxes import dismantle_gt_boxes
@@ -121,7 +121,7 @@ class MultiBoxTargetLayer(torch.nn.Module):
            bbox_outside_weights_wide[ix][ex_inds] = bbox_reg_weight

        return {
-            'bbox_targets': blob_to_tensor(bbox_targets_wide),
-            'bbox_inside_weights': blob_to_tensor(bbox_inside_weights_wide),
-            'bbox_outside_weights': blob_to_tensor(bbox_outside_weights_wide),
+            'bbox_targets': array2tensor(bbox_targets_wide),
+            'bbox_inside_weights': array2tensor(bbox_inside_weights_wide),
+            'bbox_outside_weights': array2tensor(bbox_outside_weights_wide),
        }
--- a/lib/ssd/test.py
+++ b/lib/ssd/test.py
@@ -18,12 +18,11 @@ import dragon.vm.torch as torch
 import numpy as np

 from lib.core.config import cfg
-from lib.nms.nms_wrapper import nms
-from lib.nms.nms_wrapper import soft_nms
+from lib.nms import nms_wrapper
+from lib.utils import framework
+from lib.utils import time_util
 from lib.utils.boxes import bbox_transform_inv
 from lib.utils.boxes import clip_boxes
-from lib.utils.timer import Timer
-from lib.utils.graph import FrozenGraph
 from lib.utils.vis import vis_one_image


@@ -49,12 +48,13 @@ def ims_detect(detector, ims):
        with torch.no_grad():
            with torch.jit.Recorder(retain_ops=True):
                outputs = detector.forward(inputs={'data': image})
-                detector.frozen_graph = FrozenGraph(
-                    {'data': image},
-                    {'cls_prob': outputs['cls_prob'],
-                     'bbox_pred': outputs['bbox_pred']},
-                    {'prior_boxes': outputs['prior_boxes']},
-                )
+                detector.frozen_graph = \
+                    framework.FrozenGraph(
+                        {'data': image},
+                        {'cls_prob': outputs['cls_prob'],
+                         'bbox_pred': outputs['bbox_pred']},
+                        {'prior_boxes': outputs['prior_boxes']},
+                    )
    outputs = detector.frozen_graph(data=data)

    # Decode results
@@ -81,21 +81,21 @@ def test_net(detector, server):
    num_classes = server.num_classes
    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]

-    _t = {'im_detect': Timer(), 'misc': Timer()}
+    _t = {'im_detect': time_util.Timer(), 'misc': time_util.Timer()}

    for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
        # Collect raw images and ground-truths
        image_ids, raw_images = [], []

        for item_idx in range(cfg.TEST.IMS_PER_BATCH):
-            if batch_idx + item_idx >= num_images: continue
+            if batch_idx + item_idx >= num_images:
+                continue
            image_id, raw_image = server.get_image()
            image_ids.append(image_id)
            raw_images.append(raw_image)

-        _t['im_detect'].tic()
-        batch_scores, batch_boxes = ims_detect(detector, raw_images)
-        _t['im_detect'].toc()
+        with _t['im_detect'].tic_and_toc():
+            batch_scores, batch_boxes = ims_detect(detector, raw_images)

        _t['misc'].tic()
        for item_idx in range(len(batch_scores)):
@@ -114,16 +114,16 @@ def test_net(detector, server):
                    (cls_boxes, cls_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                if cfg.TEST.USE_SOFT_NMS:
-                    keep = soft_nms(
+                    keep = nms_wrapper.soft_nms(
                        cls_detections,
-                        cfg.TEST.NMS,
+                        thresh=cfg.TEST.NMS,
                        method=cfg.TEST.SOFT_NMS_METHOD,
                        sigma=cfg.TEST.SOFT_NMS_SIGMA,
                    )
                else:
-                    keep = nms(
+                    keep = nms_wrapper.nms(
                        cls_detections,
-                        cfg.TEST.NMS,
+                        thresh=cfg.TEST.NMS,
                        force_cpu=True,
                    )
                cls_detections = cls_detections[keep, :]

--- a/lib/ssd/transforms.py
+++ b/lib/ssd/transforms.py
@@ -47,18 +47,16 @@ class Distort(object):

    def apply(self, img, boxes=None):
        img = PIL.Image.fromarray(img)
-        if npr.uniform() < self._brightness_prob:
-            delta = npr.uniform(-0.3, 0.3) + 1.
-            img = PIL.ImageEnhance.Brightness(img)
-            img = img.enhance(delta)
-        if npr.uniform() < self._contrast_prob:
-            delta = npr.uniform(-0.3, 0.3) + 1.
-            img = PIL.ImageEnhance.Contrast(img)
-            img = img.enhance(delta)
-        if npr.uniform() < self._saturation_prob:
-            delta = npr.uniform(-0.3, 0.3) + 1.
-            img = PIL.ImageEnhance.Color(img)
-            img = img.enhance(delta)
+        transforms = [
+            (PIL.ImageEnhance.Brightness, self._brightness_prob),
+            (PIL.ImageEnhance.Contrast, self._contrast_prob),
+            (PIL.ImageEnhance.Color, self._saturation_prob),
+        ]
+        npr.shuffle(transforms)
+        for transform_fn, prob in transforms:
+            if npr.uniform() < prob:
+                img = transform_fn(img)
+                img = img.enhance(1. + npr.uniform(-.4, .4))
        return np.array(img), boxes



--- a/lib/utils/blob.py
+++ b/lib/utils/blob.py
@@ -21,7 +21,8 @@ import numpy as np
 import dragon.vm.torch as torch

 from lib.core.config import cfg
-from lib.utils.image import resize_image, distort_image
+from lib.utils.image import distort_image
+from lib.utils.image import resize_image


 def im_list_to_blob(ims):
@@ -60,17 +61,17 @@ def mask_list_to_blob(masks):
    return blob


-def prep_im_for_blob(im, target_size, max_size):
+def prep_im_for_blob(img, target_size, max_size):
    """Scale an image for use in a blob."""
-    im_shape, jitter = im.shape, 1.
+    im_shape, jitter = img.shape, 1.

-    if cfg.TRAIN.COLOR_JITTERING:
-        im = distort_image(im)
+    if cfg.TRAIN.USE_COLOR_JITTER:
+        img = distort_image(img)

    if max_size > 0:
        # Scale image along the shortest side
-        im_size_min = np.min(im_shape[0:2])
-        im_size_max = np.max(im_shape[0:2])
+        im_size_min = np.min(im_shape[:2])
+        im_size_max = np.max(im_shape[:2])
        im_scale = float(target_size) / float(im_size_min)

        # Prevent the biggest axis from being more than MAX_SIZE
@@ -78,31 +79,31 @@ def prep_im_for_blob(im, target_size, max_size):
            im_scale = float(max_size) / float(im_size_max)
    else:
        # Scale image along the longest side
-        im_size_max = np.max(im_shape[0:2])
+        im_size_max = np.max(im_shape[:2])
        im_scale = float(target_size) / float(im_size_max)

-    if cfg.TRAIN.SCALE_JITTERING:
-        r = cfg.TRAIN.SCALE_RANGE
+    if cfg.TRAIN.USE_SCALE_JITTER:
+        r = cfg.TRAIN.SCALE_JITTER_RANGE
        jitter = r[0] + np.random.rand() * (r[1] - r[0])
        im_scale *= jitter

-    return resize_image(im, im_scale, im_scale), im_scale, jitter
+    return resize_image(img, im_scale, im_scale), im_scale, jitter


-def blob_to_tensor(blob, enforce_cpu=False):
-    if isinstance(blob, np.ndarray):
+def array2tensor(array, enforce_cpu=False):
+    if isinstance(array, np.ndarray):
        # Zero-Copy from numpy
-        cpu_tensor = torch.from_numpy(blob)
+        cpu_tensor = torch.from_numpy(array)
    else:
-        cpu_tensor = blob
+        cpu_tensor = array
    return cpu_tensor if enforce_cpu else \
        cpu_tensor.cuda(cfg.GPU_ID)


-def tensor_to_blob(blob, copy=False):
-    if isinstance(blob, torch.Tensor):
+def tensor2array(tensor, copy=False):
+    if isinstance(tensor, torch.Tensor):
        # Zero-Copy from numpy
-        array = blob.numpy(True)
+        array = tensor.numpy(True)
    else:
-        array = blob
+        array = tensor
    return array.copy() if copy else array
--- a/lib/utils/graph.py
+++ b/lib/utils/graph.py
@@ -16,8 +16,100 @@ from __future__ import print_function
 import collections

 import dragon
+import dragon.vm.torch as torch
 from dragon.core.framework import tensor_util
-from dragon.vm.torch.jit.recorder import get_default_recorder
+from dragon.core.util import six
+
+
+def get_param_groups(module, bias_lr=1., bias_decay=0.):
+    """Separate weight and bias into parameters groups.
+
+    Parameters
+    ----------
+    module : dragon.vm.torch.nn.Module
+        The module to collect parameters.
+    bias_lr : float, optional, default=1.
+        The lr multiplier of bias.
+    bias_decay : float, optional, default=0.
+        The decay multiplier of bias.
+
+    Returns
+    -------
+    Sequence[ParamGroup]
+        The parameter groups.
+
+    """
+    param_groups = [
+        {
+            'params': [],
+            'lr_mult': 1.,
+            'decay_mult': 1.,
+        },
+        {
+            'params': [],
+            'lr_mult': bias_lr,
+            'decay_mult': bias_decay,
+        }
+    ]
+    for name, param in module.named_parameters():
+        gi = 1 if 'bias' in name else 0
+        param_groups[gi]['params'].append(param)
+    if len(param_groups[1]['params']) == 0:
+        param_groups.pop()  # Remove empty group
+    return param_groups
+
+
+def get_workspace():
+    """Return the current default workspace.
+
+    Returns
+    -------
+    dragon.Workspace
+        The default workspace.
+
+    """
+    return dragon.workspace.get_default()
+
+
+def new_workspace(merge_default=True):
+    """Create a new workspace.
+
+    Parameters
+    ----------
+    merge_default : bool, optional, default=True
+        **True** to merge tensors from default workspace.
+
+    Returns
+    -------
+    dragon.Workspace
+        The new workspace.
+
+    """
+    workspace = dragon.Workspace()
+    if merge_default:
+        workspace.merge_from(get_workspace())
+    return workspace
+
+
+def reset_workspace(workspace=None, merge_default=True):
+    """Reset a workspace and return a new one.
+
+    Parameters
+    ----------
+    workspace : dragon.Workspace, optional
+        The workspace to reset.
+    merge_default : bool, optional, default=True
+        **True** to merge tensors from default workspace.
+
+    Returns
+    -------
+    dragon.Workspace
+        The new workspace.
+
+    """
+    if workspace is not None:
+        workspace.Clear()  # Block the GIL
+    return new_workspace(merge_default)


 class FrozenGraph(object):
@@ -41,9 +133,8 @@ class FrozenGraph(object):
        self._inputs = canonicalize(inputs)
        self._outputs = canonicalize(outputs)
        self._constants = canonicalize(constants)
-        self._graph = dragon.Workspace() \
-            .merge_from(dragon.workspace.get_default())
-        self._tape = get_default_recorder()
+        self._graph = new_workspace()
+        self._tape = torch.jit.get_default_recorder()

    def forward(self, **kwargs):
        # Assign inputs
@@ -70,3 +161,7 @@ class FrozenGraph(object):
    def __call__(self, **kwargs):
        with self._graph.as_default():
            return self.forward(**kwargs)
+
+
+# Aliases
+pickle = six.moves.pickle
--- a/lib/utils/image.py
+++ b/lib/utils/image.py
@@ -21,9 +21,50 @@ import PIL.ImageEnhance
 from lib.core.config import cfg


-def resize_image(im, fx, fy):
+def distort_image(img):
+    img = PIL.Image.fromarray(img)
+    transforms = [
+        PIL.ImageEnhance.Brightness,
+        PIL.ImageEnhance.Contrast,
+        PIL.ImageEnhance.Color,
+    ]
+    np.random.shuffle(transforms)
+    for transform in transforms:
+        if np.random.uniform() < .5:
+            img = transform(img)
+            img = img.enhance(1. + np.random.uniform(-.4, .4))
+    return np.array(img)
+
+
+def get_image_with_target_size(target_size, img):
+    im_shape = list(img.shape)
+    height_diff = target_size[0] - im_shape[0]
+    width_diff = target_size[1] - im_shape[1]
+
+    ofs_crop_width = np.random.randint(max(-width_diff, 0) + 1)
+    ofs_pad_width = np.random.randint(max(width_diff, 0) + 1)
+    ofs_crop_height = np.random.randint(max(-height_diff, 0) + 1)
+    ofs_pad_height = np.random.randint(max(height_diff, 0) + 1)
+
+    im_shape[:2] = target_size
+    new_img = np.empty(im_shape, dtype=img.dtype)
+    new_img[:] = cfg.PIXEL_MEANS
+
+    new_img[ofs_pad_height:ofs_pad_height + img.shape[0],
+            ofs_pad_width:ofs_pad_width + img.shape[1]] = \
+        img[ofs_crop_height:ofs_crop_height + target_size[0],
+            ofs_crop_width:ofs_crop_width + target_size[1]]
+
+    return new_img, (
+        ofs_pad_width - ofs_crop_width,
+        ofs_pad_height - ofs_crop_height,
+        target_size,
+    )
+
+
+def resize_image(img, fx, fy):
    return cv2.resize(
-        im,
+        img,
        dsize=None,
        fx=fx, fy=fy,
        interpolation=cv2.INTER_LINEAR,
@@ -36,29 +77,12 @@ def resize_mask(mask, size):
    return np.array(mask.resize(size, PIL.Image.NEAREST))


-def distort_image(im):
-    im = PIL.Image.fromarray(im)
-    if np.random.uniform() < 0.5:
-        delta_brightness = np.random.uniform(-0.3, 0.3) + 1.
-        im = PIL.ImageEnhance.Brightness(im)
-        im = im.enhance(delta_brightness)
-    if np.random.uniform() < 0.5:
-        delta_contrast = np.random.uniform(-0.3, 0.3) + 1.
-        im = PIL.ImageEnhance.Contrast(im)
-        im = im.enhance(delta_contrast)
-    if np.random.uniform() < 0.5:
-        delta_saturation = np.random.uniform(-0.3, 0.3) + 1.
-        im = PIL.ImageEnhance.Color(im)
-        im = im.enhance(delta_saturation)
-    return np.array(im)
-
-
-def scale_image(im):
+def scale_image(img):
    processed_ims, ims_scales = [], []

    if cfg.TEST.MAX_SIZE > 0:
-        im_size_min = np.min(im.shape[:2])
-        im_size_max = np.max(im.shape[:2])
+        im_size_min = np.min(img.shape[:2])
+        im_size_max = np.max(img.shape[:2])
        for target_size in cfg.TEST.SCALES:
            im_scale = float(target_size) / float(im_size_min)
            # Prevent the biggest axis from being more than MAX_SIZE
@@ -66,7 +90,7 @@ def scale_image(im):
                im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
            processed_ims.append(
                cv2.resize(
-                    im,
+                    img,
                    dsize=None,
                    fx=im_scale, fy=im_scale,
                    interpolation=cv2.INTER_LINEAR,
@@ -74,12 +98,12 @@ def scale_image(im):
            ims_scales.append(im_scale)
    else:
        # Scale image along the longest side
-        im_size_max = np.max(im.shape[0:2])
+        im_size_max = np.max(img.shape[:2])
        for target_size in cfg.TEST.SCALES:
            im_scale = float(target_size) / float(im_size_max)
            processed_ims.append(
                cv2.resize(
-                    im,
+                    img,
                    dsize=None,
                    fx=im_scale, fy=im_scale,
                    interpolation=cv2.INTER_LINEAR,

--- a/lib/utils/rotated_boxes.py
+++ b/lib/utils/rotated_boxes.py
--- a/lib/utils/timer.py
+++ b/lib/utils/timer.py
-# ------------------------------------------------------------
-# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
-#
-# Licensed under the BSD 2-Clause License.
-# You should have received a copy of the BSD 2-Clause License
-# along with the software. If not, See,
-#
-#      <https://opensource.org/licenses/BSD-2-Clause>
-#
-# Codes are based on:
-#
-#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/utils/timer.py>
-#
-# ------------------------------------------------------------
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import contextlib
-import time
-
-
-class Timer(object):
-    """A simple timer."""
-    def __init__(self):
-        self.total_time = 0.
-        self.calls = 0
-        self.start_time = 0.
-        self.diff = 0.
-        self.average_time = 0.
-
-    def tic(self):
-        # Using time.time instead of time.clock because time time.clock
-        # does not normalize for multi-threading
-        self.start_time = time.time()
-
-    def toc(self, average=True):
-        self.diff = time.time() - self.start_time
-        self.total_time += self.diff
-        self.calls += 1
-        self.average_time = self.total_time / self.calls
-        if average:
-            return self.average_time
-        else:
-            return self.diff
-
-    @contextlib.contextmanager
-    def tic_and_toc(self):
-        try:
-            yield self.tic()
-        finally:
-            self.toc()
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/utils/timer.py>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import contextlib
+import datetime
+import time
+
+
+class Timer(object):
+    """A simple timer."""
+    def __init__(self):
+        self.total_time = 0.
+        self.calls = 0
+        self.start_time = 0.
+        self.diff = 0.
+        self.average_time = 0.
+
+    @contextlib.contextmanager
+    def tic_and_toc(self):
+        try:
+            yield self.tic()
+        finally:
+            self.toc()
+
+    def tic(self):
+        # Using time.time instead of time.clock because time time.clock
+        # does not normalize for multithreading
+        self.start_time = time.time()
+
+    def toc(self, average=True):
+        self.diff = time.time() - self.start_time
+        self.total_time += self.diff
+        self.calls += 1
+        self.average_time = self.total_time / self.calls
+        if average:
+            return self.average_time
+        else:
+            return self.diff
+
+
+def get_progress_info(timer, curr_step, max_steps):
+    """Return a info of current progress.
+
+    Parameters
+    ----------
+    timer : Timer
+        The timer to get progress.
+    curr_step : int
+        The current step.
+    max_steps : int
+        The total number of steps.
+
+    Returns
+    -------
+    str
+        The progress info.
+
+    """
+    average_time = timer.average_time
+    eta_seconds = average_time * (max_steps - curr_step)
+    eta = str(datetime.timedelta(seconds=int(eta_seconds)))
+    progress = (curr_step + 1.) / max_steps
+    return '< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >' \
+            .format(progress, timer.average_time, eta)
--- a/scripts/rotated/__init__.py
+++ b/scripts/rotated/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
--- a/scripts/rotated/im2rec.py
+++ b/scripts/rotated/im2rec.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from os import path as osp
+from maker import make_record
+
+
+if __name__ == '__main__':
+    voc_root = '/data/VOC'
+
+    make_record(
+        record_file=osp.join(voc_root, 'voc_0712_trainval'),
+        images_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'),
+                     osp.join(voc_root, 'VOCdevkit2012/VOC2012/JPEGImages')],
+        annotations_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
+                          osp.join(voc_root, 'VOCdevkit2012/VOC2012/Annotations')],
+        imagesets_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
+                        osp.join(voc_root, 'VOCdevkit2012/VOC2012/ImageSets/Main')],
+        splits=['trainval', 'trainval']
+    )
+
+    make_record(
+        record_file=osp.join(voc_root, 'voc_2007_test'),
+        images_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'),
+        annotations_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
+        imagesets_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
+        splits=['test']
+   )
--- a/scripts/rotated/maker.py
+++ b/scripts/rotated/maker.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import time
+
+import cv2
+import dragon
+import numpy as np
+import xml.etree.ElementTree as ET
+
+
+def make_example(image_file, xml_file):
+    tree = ET.parse(xml_file)
+    filename = os.path.split(xml_file)[-1]
+    objs = tree.findall('object')
+    example = {'id': filename.split('.')[0], 'object': []}
+    with open(image_file, 'rb') as f:
+        img_bytes = bytes(f.read())
+    img = cv2.imdecode(np.frombuffer(img_bytes, 'uint8'), 1)
+    example['height'], example['width'], example['depth'] = img.shape
+    example['content'] = img_bytes
+    for ix, obj in enumerate(objs):
+        bbox = obj.find('bndbox')
+        is_diff = 0
+        if obj.find('difficult') is not None:
+            is_diff = int(obj.find('difficult').text) == 1
+        example['object'].append({
+            'name': obj.find('name').text.strip(),
+            'x1': float(bbox.find('x1').text),
+            'y1': float(bbox.find('y1').text),
+            'x2': float(bbox.find('x2').text),
+            'y2': float(bbox.find('y2').text),
+            'x3': float(bbox.find('x3').text),
+            'y3': float(bbox.find('y3').text),
+            'x4': float(bbox.find('x4').text),
+            'y4': float(bbox.find('y4').text),
+            'difficult': is_diff,
+        })
+
+    return example
+
+
+def make_record(
+    record_file,
+    images_path,
+    annotations_path,
+    imagesets_path,
+    splits
+):
+    if os.path.exists(record_file):
+        raise ValueError('The record file is already exist.')
+    os.makedirs(record_file)
+
+    if not isinstance(images_path, list):
+        images_path = [images_path]
+    if not isinstance(annotations_path, list):
+        annotations_path = [annotations_path]
+    if not isinstance(imagesets_path, list):
+        imagesets_path = [imagesets_path]
+    assert len(splits) == len(imagesets_path)
+    assert len(splits) == len(images_path)
+    assert len(splits) == len(annotations_path)
+
+    print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
+
+    writer = dragon.io.SeetaRecordWriter(
+        path=record_file,
+        protocol={
+            'id': 'string',
+            'content': 'bytes',
+            'height': 'int64',
+            'width': 'int64',
+            'depth': 'int64',
+            'object': [{
+                'name': 'string',
+                'x1': 'float64',
+                'y1': 'float64',
+                'x2': 'float64',
+                'y2': 'float64',
+                'x3': 'float64',
+                'y3': 'float64',
+                'x4': 'float64',
+                'y4': 'float64',
+                'difficult': 'int64',
+            }]
+        }
+    )
+
+    count, total_line = 0, 0
+    start_time = time.time()
+
+    for db_idx, split in enumerate(splits):
+        split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
+        assert os.path.exists(split_file)
+        with open(split_file, 'r') as f:
+            lines = f.readlines()
+            total_line += len(lines)
+        for line in lines:
+            count += 1
+            if count % 2000 == 0:
+                now_time = time.time()
+                print('{} / {} in {:.2f} sec'.format(
+                    count, total_line, now_time - start_time))
+            filename = line.strip()
+            image_file = os.path.join(images_path[db_idx], filename + '.jpg')
+            xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
+            writer.write(make_example(image_file, xml_file))
+
+    now_time = time.time()
+    print('{} / {} in {:.2f} sec'.format(count, total_line, now_time - start_time))
+    writer.close()
+
+    end_time = time.time()
+    data_size = os.path.getsize(record_file + '/data.data') * 1e-6
+    print('{} images take {:.2f} MB in {:.2f} sec.'
+          .format(total_line, data_size, end_time - start_time))
--- a/scripts/voc/__init__.py
+++ b/scripts/voc/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
--- a/scripts/voc/im2rec.py
+++ b/scripts/voc/im2rec.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from os import path as osp
+from maker import make_record
+
+
+if __name__ == '__main__':
+    voc_root = '/data/VOC'
+
+    make_record(
+        record_file=osp.join(voc_root, 'voc_0712_trainval'),
+        images_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'),
+                     osp.join(voc_root, 'VOCdevkit2012/VOC2012/JPEGImages')],
+        annotations_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
+                          osp.join(voc_root, 'VOCdevkit2012/VOC2012/Annotations')],
+        imagesets_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
+                        osp.join(voc_root, 'VOCdevkit2012/VOC2012/ImageSets/Main')],
+        splits=['trainval', 'trainval']
+    )
+
+    make_record(
+        record_file=osp.join(voc_root, 'voc_2007_test'),
+        images_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'),
+        annotations_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
+        imagesets_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
+        splits=['test']
+   )
--- a/scripts/voc/maker.py
+++ b/scripts/voc/maker.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import time
+
+import cv2
+import dragon
+import numpy as np
+import xml.etree.ElementTree as ET
+
+
+def make_example(image_file, xml_file):
+    tree = ET.parse(xml_file)
+    filename = os.path.split(xml_file)[-1]
+    objs = tree.findall('object')
+    example = {'id': filename.split('.')[0], 'object': []}
+    with open(image_file, 'rb') as f:
+        img_bytes = bytes(f.read())
+    img = cv2.imdecode(np.frombuffer(img_bytes, 'uint8'), 1)
+    example['height'], example['width'], example['depth'] = img.shape
+    example['content'] = img_bytes
+    for ix, obj in enumerate(objs):
+        bbox = obj.find('bndbox')
+        is_diff = 0
+        if obj.find('difficult') is not None:
+            is_diff = int(obj.find('difficult').text) == 1
+        example['object'].append({
+            'name': obj.find('name').text.strip(),
+            'xmin': float(bbox.find('xmin').text),
+            'ymin': float(bbox.find('ymin').text),
+            'xmax': float(bbox.find('xmax').text),
+            'ymax': float(bbox.find('ymax').text),
+            'difficult': is_diff,
+        })
+
+    return example
+
+
+def make_record(
+    record_file,
+    images_path,
+    annotations_path,
+    imagesets_path,
+    splits
+):
+    if os.path.exists(record_file):
+        raise ValueError('The record file is already exist.')
+    os.makedirs(record_file)
+
+    if not isinstance(images_path, list):
+        images_path = [images_path]
+    if not isinstance(annotations_path, list):
+        annotations_path = [annotations_path]
+    if not isinstance(imagesets_path, list):
+        imagesets_path = [imagesets_path]
+    assert len(splits) == len(imagesets_path)
+    assert len(splits) == len(images_path)
+    assert len(splits) == len(annotations_path)
+
+    print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
+
+    writer = dragon.io.SeetaRecordWriter(
+        path=record_file,
+        protocol={
+            'id': 'string',
+            'content': 'bytes',
+            'height': 'int64',
+            'width': 'int64',
+            'depth': 'int64',
+            'object': [{
+                'name': 'string',
+                'xmin': 'float64',
+                'ymin': 'float64',
+                'xmax': 'float64',
+                'ymax': 'float64',
+                'difficult': 'int64',
+            }]
+        }
+    )
+
+    count, total_line = 0, 0
+    start_time = time.time()
+
+    for db_idx, split in enumerate(splits):
+        split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
+        assert os.path.exists(split_file)
+        with open(split_file, 'r') as f:
+            lines = f.readlines()
+            total_line += len(lines)
+        for line in lines:
+            count += 1
+            if count % 2000 == 0:
+                now_time = time.time()
+                print('{} / {} in {:.2f} sec'.format(
+                    count, total_line, now_time - start_time))
+            filename = line.strip()
+            image_file = os.path.join(images_path[db_idx], filename + '.jpg')
+            xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
+            writer.write(make_example(image_file, xml_file))
+
+    now_time = time.time()
+    print('{} / {} in {:.2f} sec'.format(count, total_line, now_time - start_time))
+    writer.close()
+
+    end_time = time.time()
+    data_size = os.path.getsize(record_file + '/data.data') * 1e-6
+    print('{} images take {:.2f} MB in {:.2f} sec.'
+          .format(total_line, data_size, end_time - start_time))
--- a/tools/export.py
+++ b/tools/export.py
@@ -74,7 +74,7 @@ if __name__ == '__main__':
    detector.optimize_for_inference()

    # Mixed precision training?
-    if cfg.MODEL.DATA_TYPE.lower() == 'float16':
+    if cfg.MODEL.PRECISION.lower() == 'float16':
        detector.half()  # Powerful FP16 Support

    data = torch.zeros(*args.input_shape).byte()

--- a/tools/test.py
+++ b/tools/test.py
@@ -37,8 +37,14 @@ def parse_args():
    parser.add_argument('--exp_dir', dest='exp_dir',
                        help='experiment dir',
                        default=None, type=str)
+    parser.add_argument('--output_dir', dest='output_dir',
+                        help='output dir',
+                        default=None, type=str)
    parser.add_argument('--iter', dest='iter', help='global step',
-                        default=0, type=int)
+                        default=None, type=int)
+    parser.add_argument('--dump', dest='dump',
+                        help='dump the result back to record?',
+                        action='store_true')
    parser.add_argument('--wait', dest='wait',
                        help='wait the checkpoint?',
                        action='store_true')
@@ -75,19 +81,19 @@ if __name__ == '__main__':

    # Inspect the database
    database = get_imdb(cfg.TEST.DATABASE)
+    cfg.TEST.PROTOCOL = 'null' if args.dump else cfg.TEST.PROTOCOL
    logger.info('Database({}): {} images will be used to test.'
                .format(cfg.TEST.DATABASE, database.num_images))

    # Ready to test the network
-    logger.info('Results will be saved to `{:s}`'
-                .format(coordinator.results_dir(checkpoint)))
+    output_dir = coordinator.results_dir(checkpoint, args.output_dir)
+    logger.info('Results will be saved to `{:s}`'.format(output_dir))
    detector = Detector().eval().cuda(cfg.GPU_ID)
    detector.load_weights(checkpoint)
    detector.optimize_for_inference()

    # Mixed precision training?
-    if cfg.MODEL.DATA_TYPE.lower() == 'float16':
+    if cfg.MODEL.PRECISION.lower() == 'float16':
        detector.half()  # Powerful FP16 Support

-    server = TestServer(coordinator.results_dir(checkpoint))
-    test_engine.test_net(detector, server)
+    test_engine.test_net(detector, TestServer(output_dir))