Init repository

Ting PAN
Commit 19c489b6 authored Mar 11, 2019 by Ting PAN
Showing with 15016 additions and 0 deletions
.gitignore
CHANGES
README.md
compile/CMake/FindNumPy.cmake
compile/CMake/FindPythonLibs.cmake
compile/CMakeLists.txt
compile/bbox.pyx
compile/cpu_nms.pyx
compile/gpu_nms.h
compile/gpu_nms.pyx
compile/make.sh
compile/nms_kernel.cu
compile/setup.py
configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_1x.yml
configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_2x.yml
configs/faster_rcnn/voc_faster_rcnn_R-50-FPN.yml
configs/faster_rcnn/voc_faster_rcnn_VGG-16-C4.yml
configs/retinanet/coco_retinanet_400_R-50-FPN_1x.yml
configs/retinanet/coco_retinanet_400_R-50-FPN_4x.yml
configs/ssd/voc_ssd_300_AirNet-5b.yml
--- a/.gitignore
+++ b/.gitignore
+## General
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.cuo
+# Compiled Dynamic libraries
+# *.so
+*.dylib
+# Compiled Static libraries
+*.lai
+*.la
+#*.a
+# Compiled python
+*.pyc
+__pycache__
+# Compiled MATLAB
+*.mex*
+# IPython notebook checkpoints
+.ipynb_checkpoints
+# Editor temporaries
+*.swp
+*~
+# Sublime Text settings
+*.sublime-workspace
+*.sublime-project
+# Eclipse Project settings
+*.*project
+.settings
+# QtCreator files
+*.user
+# PyCharm files
+.idea
+# OSX dir files
+.DS_Store
\ No newline at end of file
--- a/CHANGES
+++ b/CHANGES
+------------------------------------------------------------------------
+The list of most significant changes made over time in SeetaDet.
+SeetaDet 0.1.0 (20190311)
+Recommended docker for Dragon:
+seetaresearch/dragon:0.3.0.0-rc4-cuda9.1-ubuntu16.04
+Changes:
+Preview Features:
+- Init repository.
+Bugs fixed:
+- None
--- a/README.md
+++ b/README.md
+## SeetaDet
+## WHAT's SeetaDet?
+SeetaDet contains many useful object detectors, including R-CNN series, SSD,
+and the recent RetinaNet. We have achieved the same or higher performance than
+the baseline reported by the original paper.
+This repository is based on our [Dragon](https://github.com/seetaresearch/Dragon),
+while the style of codes is PyTorch. The torch-style codes help us to simplify the
+hierarchical pipeline of modern detection.
+## Installation
+#### 1. Install the required python packages
+```bash
+pip install cython pyyaml matplotlib
+pip install opencv-python Pillow
+```
+#### 2. Compile the C Extensions
+```bash
+cd SeeTADet/compile
+bash ./make.sh
+```
+## Resources
+#### Pre-trained ImageNet models
+| Model | Usage |
+| :------: | :------: |
+| [VGG16.SSD](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/VGG16.SSD.pth)| SSD |
+| [VGG16.RCNN](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/VGG16.RCNN.pth)| R-CNN |
+| [R-50.Affine](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/R-50.Affine.pth)| R-CNN, RetinaNet |
+| [R-101.Affine](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/R-101.Affine.pth)| R-CNN, RetinaNet |
+| [AirNet.SSD](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/AirNet.SSD.pth)| SSD |
+## References
+[1] [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](https://arxiv.org/abs/1506.01497). Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. NIPS, 2015.
+[2] [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385). Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. CVPR, 2016.
+[3] [SSD: Single Shot MultiBox Detector](https://arxiv.org/abs/1512.02325). Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, and Alexander C. Berg. ECCV, 2016.
+[4] [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144). Tsung-Yi Lin, Piotr Dollár, Ross Girshick, Kaiming He, Bharath Hariharan, and Serge Belongie. CVPR, 2017.
+[5] [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002). Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He, and Piotr Dollár. ICCV, 2017.
+[6] [Mask R-CNN](https://arxiv.org/abs/1703.06870). Kaiming He, Georgia Gkioxari, Piotr Dollár and Ross Girshick. ICCV, 2017.
+[7] [Detectron](https://github.com/facebookresearch/Detectron). Ross Girshick, Ilija Radosavovic, Georgia Gkioxari, Piotr Dollar and Kaiming He. 2018.
\ No newline at end of file
--- a/compile/CMake/FindNumPy.cmake
+++ b/compile/CMake/FindNumPy.cmake
+# - Find the NumPy libraries
+# This module finds if NumPy is installed, and sets the following variables
+# indicating where it is.
+#
+# TODO: Update to provide the libraries and paths for linking npymath lib.
+#
+#  NUMPY_FOUND               - was NumPy found
+#  NUMPY_VERSION             - the version of NumPy found as a string
+#  NUMPY_VERSION_MAJOR       - the major version number of NumPy
+#  NUMPY_VERSION_MINOR       - the minor version number of NumPy
+#  NUMPY_VERSION_PATCH       - the patch version number of NumPy
+#  NUMPY_VERSION_DECIMAL     - e.g. version 1.6.1 is 10601
+#  NUMPY_INCLUDE_DIR         - path to the NumPy include files
+unset(NUMPY_VERSION)
+unset(NUMPY_INCLUDE_DIR)
+if(PYTHONINTERP_FOUND)
+  execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
+    "import numpy as n; print(n.__version__); print(n.get_include());"
+    RESULT_VARIABLE __result
+    OUTPUT_VARIABLE __output
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+  if(__result MATCHES 0)
+    string(REGEX REPLACE ";" "\\\\;" __values ${__output})
+    string(REGEX REPLACE "\r?\n" ";"    __values ${__values})
+    list(GET __values 0 NUMPY_VERSION)
+    list(GET __values 1 NUMPY_INCLUDE_DIR)
+    string(REGEX MATCH "^([0-9])+\\.([0-9])+\\.([0-9])+" __ver_check "${NUMPY_VERSION}")
+    if(NOT "${__ver_check}" STREQUAL "")
+      set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1})
+      set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2})
+      set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3})
+      math(EXPR NUMPY_VERSION_DECIMAL
+        "(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}")
+      string(REGEX REPLACE "\\\\" "/"  NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR})
+    else()
+     unset(NUMPY_VERSION)
+     unset(NUMPY_INCLUDE_DIR)
+     message(STATUS "Requested NumPy version and include path, but got instead:\n${__output}\n")
+    endif()
+  endif()
+else()
+	message("Can not find Python interpretator.")
+	message(FATAL_ERROR "Do you set PYTHON_EXECUTABLE correctly?")
+endif()
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(NumPy REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION
+                                        VERSION_VAR   NUMPY_VERSION)
+if(NUMPY_FOUND)
+  message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})")
+endif()
\ No newline at end of file
--- a/compile/CMake/FindPythonLibs.cmake
+++ b/compile/CMake/FindPythonLibs.cmake
+# - Find python libraries
+# This module finds the libraries corresponding to the Python interpeter
+# FindPythonInterp provides.
+# This code sets the following variables:
+#
+#  PYTHONLIBS_FOUND           - have the Python libs been found
+#  PYTHON_PREFIX              - path to the Python installation
+#  PYTHON_LIBRARIES           - path to the python library
+#  PYTHON_INCLUDE_DIRS        - path to where Python.h is found
+#  PYTHON_MODULE_EXTENSION    - lib extension, e.g. '.so' or '.pyd'
+#  PYTHON_MODULE_PREFIX       - lib name prefix: usually an empty string
+#  PYTHON_SITE_PACKAGES       - path to installation site-packages
+#  PYTHON_IS_DEBUG            - whether the Python interpreter is a debug build
+#
+# Thanks to talljimbo for the patch adding the 'LDVERSION' config
+# variable usage.
+#=============================================================================
+# Copyright 2001-2009 Kitware, Inc.
+# Copyright 2012 Continuum Analytics, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the names of Kitware, Inc., the Insight Software Consortium,
+# nor the names of their contributors may be used to endorse or promote
+# products derived from this software without specific prior written
+# permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#=============================================================================
+# Checking for the extension makes sure that `LibsNew` was found and not just `Libs`.
+if(PYTHONLIBS_FOUND AND PYTHON_MODULE_EXTENSION)
+    return()
+endif()
+# Use the Python interpreter to find the libs.
+if(PythonLibsNew_FIND_REQUIRED)
+    find_package(PythonInterp ${PythonLibsNew_FIND_VERSION} REQUIRED)
+else()
+    find_package(PythonInterp ${PythonLibsNew_FIND_VERSION})
+endif()
+if(NOT PYTHONINTERP_FOUND)
+    set(PYTHONLIBS_FOUND FALSE)
+    return()
+endif()
+# According to http://stackoverflow.com/questions/646518/python-how-to-detect-debug-interpreter
+# testing whether sys has the gettotalrefcount function is a reliable, cross-platform
+# way to detect a CPython debug interpreter.
+#
+# The library suffix is from the config var LDVERSION sometimes, otherwise
+# VERSION. VERSION will typically be like "2.7" on unix, and "27" on windows.
+execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
+    "from distutils import sysconfig as s;import sys;import struct;
+print('.'.join(str(v) for v in sys.version_info));
+print(sys.prefix);
+print(s.get_python_inc(plat_specific=True));
+print(s.get_python_lib(plat_specific=True));
+print(s.get_config_var('SO'));
+print(hasattr(sys, 'gettotalrefcount')+0);
+print(struct.calcsize('@P'));
+print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
+print(s.get_config_var('LIBDIR') or '');
+print(s.get_config_var('MULTIARCH') or '');
+"
+    RESULT_VARIABLE _PYTHON_SUCCESS
+    OUTPUT_VARIABLE _PYTHON_VALUES
+    ERROR_VARIABLE _PYTHON_ERROR_VALUE)
+if(NOT _PYTHON_SUCCESS MATCHES 0)
+    if(PythonLibsNew_FIND_REQUIRED)
+        message(FATAL_ERROR
+            "Python config failure:\n${_PYTHON_ERROR_VALUE}")
+    endif()
+    set(PYTHONLIBS_FOUND FALSE)
+    return()
+endif()
+# Convert the process output into a list
+string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES})
+string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES})
+list(GET _PYTHON_VALUES 0 _PYTHON_VERSION_LIST)
+list(GET _PYTHON_VALUES 1 PYTHON_PREFIX)
+list(GET _PYTHON_VALUES 2 PYTHON_INCLUDE_DIR)
+list(GET _PYTHON_VALUES 3 PYTHON_SITE_PACKAGES)
+list(GET _PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION)
+list(GET _PYTHON_VALUES 5 PYTHON_IS_DEBUG)
+list(GET _PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P)
+list(GET _PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX)
+list(GET _PYTHON_VALUES 8 PYTHON_LIBDIR)
+list(GET _PYTHON_VALUES 9 PYTHON_MULTIARCH)
+# Make sure the Python has the same pointer-size as the chosen compiler
+# Skip if CMAKE_SIZEOF_VOID_P is not defined
+if(CMAKE_SIZEOF_VOID_P AND (NOT "${PYTHON_SIZEOF_VOID_P}" STREQUAL "${CMAKE_SIZEOF_VOID_P}"))
+    if(PythonLibsNew_FIND_REQUIRED)
+        math(EXPR _PYTHON_BITS "${PYTHON_SIZEOF_VOID_P} * 8")
+        math(EXPR _CMAKE_BITS "${CMAKE_SIZEOF_VOID_P} * 8")
+        message(FATAL_ERROR
+            "Python config failure: Python is ${_PYTHON_BITS}-bit, "
+            "chosen compiler is  ${_CMAKE_BITS}-bit")
+    endif()
+    set(PYTHONLIBS_FOUND FALSE)
+    return()
+endif()
+# The built-in FindPython didn't always give the version numbers
+string(REGEX REPLACE "\\." ";" _PYTHON_VERSION_LIST ${_PYTHON_VERSION_LIST})
+list(GET _PYTHON_VERSION_LIST 0 PYTHON_VERSION_MAJOR)
+list(GET _PYTHON_VERSION_LIST 1 PYTHON_VERSION_MINOR)
+list(GET _PYTHON_VERSION_LIST 2 PYTHON_VERSION_PATCH)
+# Make sure all directory separators are '/'
+string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX})
+string(REGEX REPLACE "\\\\" "/" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDE_DIR})
+string(REGEX REPLACE "\\\\" "/" PYTHON_SITE_PACKAGES ${PYTHON_SITE_PACKAGES})
+if(CMAKE_HOST_WIN32)
+    set(PYTHON_LIBRARY
+        "${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
+    # when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the
+    # original python installation. They may be found relative to PYTHON_INCLUDE_DIR.
+    if(NOT EXISTS "${PYTHON_LIBRARY}")
+        get_filename_component(_PYTHON_ROOT ${PYTHON_INCLUDE_DIR} DIRECTORY)
+        set(PYTHON_LIBRARY
+            "${_PYTHON_ROOT}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
+    endif()
+    # raise an error if the python libs are still not found.
+    if(NOT EXISTS "${PYTHON_LIBRARY}")
+        message(FATAL_ERROR "Python libraries not found")
+    endif()
+else()
+    if(PYTHON_MULTIARCH)
+        set(_PYTHON_LIBS_SEARCH "${PYTHON_LIBDIR}/${PYTHON_MULTIARCH}" "${PYTHON_LIBDIR}")
+    else()
+        set(_PYTHON_LIBS_SEARCH "${PYTHON_LIBDIR}")
+    endif()
+    #message(STATUS "Searching for Python libs in ${_PYTHON_LIBS_SEARCH}")
+    # Probably this needs to be more involved. It would be nice if the config
+    # information the python interpreter itself gave us were more complete.
+    find_library(PYTHON_LIBRARY
+        NAMES "python${PYTHON_LIBRARY_SUFFIX}"
+        PATHS ${_PYTHON_LIBS_SEARCH}
+        NO_DEFAULT_PATH)
+    # If all else fails, just set the name/version and let the linker figure out the path.
+    if(NOT PYTHON_LIBRARY)
+        set(PYTHON_LIBRARY python${PYTHON_LIBRARY_SUFFIX})
+    endif()
+endif()
+MARK_AS_ADVANCED(
+  PYTHON_LIBRARY
+  PYTHON_INCLUDE_DIR
+)
+# We use PYTHON_INCLUDE_DIR, PYTHON_LIBRARY and PYTHON_DEBUG_LIBRARY for the
+# cache entries because they are meant to specify the location of a single
+# library. We now set the variables listed by the documentation for this
+# module.
+SET(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}")
+SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}")
+SET(PYTHON_DEBUG_LIBRARIES "${PYTHON_DEBUG_LIBRARY}")
+find_package_message(PYTHON
+    "Found PythonLibs: ${PYTHON_LIBRARY}"
+    "${PYTHON_EXECUTABLE}${PYTHON_VERSION}")
+set(PYTHONLIBS_FOUND TRUE)
--- a/compile/CMakeLists.txt
+++ b/compile/CMakeLists.txt
+PROJECT(gpu_nms)
+CMAKE_MINIMUM_REQUIRED(VERSION 3.0.2)
+# ---------------- User Config ----------------
+# Set your python "interpreter" if necessary
+# if not, a default interpreter will be used
+# here, provide several examples:
+# set(PYTHON_EXECUTABLE /usr/bin/python) # Linux & OSX, Builtin Python
+# set(PYTHON_EXECUTABLE /X/anaconda/bin/python) # Linux & OSX, Anaconda
+# set(PYTHON_EXECUTABLE X:/Anaconda/python) # Win, Anaconda
+# Set CUDA compiling architecture
+# Remove "compute_70/sm_70" if using CUDA 8.0
+set(CUDA_ARCH    -gencode arch=compute_30,code=sm_30
+                 -gencode arch=compute_35,code=sm_35
+                 -gencode arch=compute_50,code=sm_50
+                 -gencode arch=compute_60,code=sm_60
+                 -gencode arch=compute_70,code=sm_70)
+# ---------------- User Config ----------------
+# ---[ Dependencies
+include(${PROJECT_SOURCE_DIR}/CMake/FindPythonLibs.cmake)
+include(${PROJECT_SOURCE_DIR}/CMake/FindNumPy.cmake)
+FIND_PACKAGE(CUDA REQUIRED)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+message(STATUS "C++11 support has been enabled by default.")
+# ---[ Config types
+set(CMAKE_BUILD_TYPE Release CACHE STRING "set build type to release")
+set(CMAKE_CONFIGURATION_TYPES  Release CACHE STRING "set build type to release" FORCE)
+# ---[ Includes
+set(INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
+include_directories(${INCLUDE_DIR})
+include_directories(${PROJECT_SOURCE_DIR}/src)
+include_directories(${PYTHON_INCLUDE_DIRS})
+include_directories(${NUMPY_INCLUDE_DIR})
+include_directories(${CUDA_INCLUDE_DIRS})
+# ---[ libs
+link_directories(${PYTHON_LIBRARIES})
+# ---[ Install
+set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR} CACHE STRING "set install prefix" FORCE)
+set(CMAKE_SHARED_LIBRARY_PREFIX "")
+# ---[ Flags
+set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${CUDA_ARCH}")
+if(WIN32)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP /O2 /Oi /GL /Ot /Gy")
+endif()
+if(UNIX)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -s -fPIC")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -s -w -fPIC -O3 -m64 -std=c++11")
+endif()
+# ---[ Files
+set(HEADER_FILES gpu_nms.h)
+set(SRC_FILES gpu_nms.cpp nms_kernel.cu)
+# ---[ Add Target
+CUDA_ADD_LIBRARY(${PROJECT_NAME} SHARED ${HEADER_FILES} ${SRC_FILES})
+# ---[ Link Libs
+TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${CUDA_LIBRARIES} ${CUDA_cublas_LIBRARY} ${CUDA_curand_LIBRARY})
+if(WIN32)
+    TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${PYTHON_LIBRARIES})
+endif()
+# ---[ Install Target
+set_target_properties(${PROJECT_NAME} PROPERTIES OUTPUT_NAME "gpu_nms")
+install (TARGETS ${PROJECT_NAME} DESTINATION ${PROJECT_BINARY_DIR}/../install/lib/nms)
--- a/compile/bbox.pyx
+++ b/compile/bbox.pyx
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Sergey Karayev
+# --------------------------------------------------------
+cimport cython
+import numpy as np
+cimport numpy as np
+DTYPE = np.float
+ctypedef np.float_t DTYPE_t
+@cython.boundscheck(False)
+def bbox_overlaps(
+        np.ndarray[DTYPE_t, ndim=2] boxes,
+        np.ndarray[DTYPE_t, ndim=2] query_boxes):
+    """
+    Parameters
+    ----------
+    boxes: (N, 4) ndarray of float
+    query_boxes: (K, 4) ndarray of float
+    Returns
+    -------
+    overlaps: (N, K) ndarray of overlap between boxes and query_boxes
+    """
+    cdef unsigned int N = boxes.shape[0]
+    cdef unsigned int K = query_boxes.shape[0]
+    cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
+    cdef DTYPE_t iw, ih, box_area
+    cdef DTYPE_t ua
+    cdef unsigned int k, n
+    with nogil:
+        for k in range(K):
+            box_area = (
+                (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
+                (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+            )
+            for n in range(N):
+                iw = (
+                    min(boxes[n, 2], query_boxes[k, 2]) -
+                    max(boxes[n, 0], query_boxes[k, 0]) + 1
+                )
+                if iw > 0:
+                    ih = (
+                        min(boxes[n, 3], query_boxes[k, 3]) -
+                        max(boxes[n, 1], query_boxes[k, 1]) + 1
+                    )
+                    if ih > 0:
+                        ua = float(
+                            (boxes[n, 2] - boxes[n, 0] + 1) *
+                            (boxes[n, 3] - boxes[n, 1] + 1) +
+                            box_area - iw * ih
+                        )
+                        overlaps[n, k] = iw * ih / ua
+    return overlaps
\ No newline at end of file
--- a/compile/cpu_nms.pyx
+++ b/compile/cpu_nms.pyx
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+cimport cython
+import numpy as np
+cimport numpy as np
+cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
+    return a if a >= b else b
+cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
+    return a if a <= b else b
+@cython.boundscheck(False)
+@cython.cdivision(True)
+@cython.wraparound(False)
+def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
+    cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
+    cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
+    cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
+    cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
+    cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
+    cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    cdef np.ndarray[np.intp_t, ndim=1] order = scores.argsort()[::-1]
+    cdef int ndets = dets.shape[0]
+    cdef np.ndarray[np.int_t, ndim=1] suppressed = \
+            np.zeros((ndets), dtype=np.int)
+    # nominal indices
+    cdef int _i, _j
+    # sorted indices
+    cdef int i, j
+    # temp variables for box i's (the box currently under consideration)
+    cdef np.float32_t ix1, iy1, ix2, iy2, iarea
+    # variables for computing overlap with box j (lower scoring box)
+    cdef np.float32_t xx1, yy1, xx2, yy2
+    cdef np.float32_t w, h
+    cdef np.float32_t inter, ovr
+    keep = []
+    for _i in range(ndets):
+        i = order[_i]
+        if suppressed[i] == 1:
+            continue
+        keep.append(i)
+        ix1 = x1[i]
+        iy1 = y1[i]
+        ix2 = x2[i]
+        iy2 = y2[i]
+        iarea = areas[i]
+        for _j in range(_i + 1, ndets):
+            j = order[_j]
+            if suppressed[j] == 1:
+                continue
+            xx1 = max(ix1, x1[j])
+            yy1 = max(iy1, y1[j])
+            xx2 = min(ix2, x2[j])
+            yy2 = min(iy2, y2[j])
+            w = max(0.0, xx2 - xx1 + 1)
+            h = max(0.0, yy2 - yy1 + 1)
+            inter = w * h
+            ovr = inter / (iarea + areas[j] - inter)
+            if ovr >= thresh:
+                suppressed[j] = 1
+    return keep
+@cython.boundscheck(False)
+@cython.cdivision(True)
+@cython.wraparound(False)
+def cpu_soft_nms(np.ndarray[float, ndim=2] boxes, float thresh,
+                 unsigned int method=0, float sigma=0.5, float score_thresh=0.001):
+    cdef unsigned int N = boxes.shape[0]
+    cdef float iw, ih, box_area
+    cdef float ua
+    cdef int pos = 0
+    cdef float maxscore = 0
+    cdef int maxpos = 0
+    cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
+    for i in range(N):
+        maxscore = boxes[i, 4]
+        maxpos = i
+        tx1 = boxes[i,0]
+        ty1 = boxes[i,1]
+        tx2 = boxes[i,2]
+        ty2 = boxes[i,3]
+        ts = boxes[i,4]
+        pos = i + 1
+        # get max box
+        while pos < N:
+            if maxscore < boxes[pos, 4]:
+                maxscore = boxes[pos, 4]
+                maxpos = pos
+            pos = pos + 1
+        # add max box as a detection
+        boxes[i,0] = boxes[maxpos,0]
+        boxes[i,1] = boxes[maxpos,1]
+        boxes[i,2] = boxes[maxpos,2]
+        boxes[i,3] = boxes[maxpos,3]
+        boxes[i,4] = boxes[maxpos,4]
+        # swap ith box with position of max box
+        boxes[maxpos,0] = tx1
+        boxes[maxpos,1] = ty1
+        boxes[maxpos,2] = tx2
+        boxes[maxpos,3] = ty2
+        boxes[maxpos,4] = ts
+        tx1 = boxes[i,0]
+        ty1 = boxes[i,1]
+        tx2 = boxes[i,2]
+        ty2 = boxes[i,3]
+        ts = boxes[i,4]
+        pos = i + 1
+        # NMS iterations, note that N changes if detection boxes fall below threshold
+        while pos < N:
+            x1 = boxes[pos, 0]
+            y1 = boxes[pos, 1]
+            x2 = boxes[pos, 2]
+            y2 = boxes[pos, 3]
+            s = boxes[pos, 4]
+            area = (x2 - x1 + 1) * (y2 - y1 + 1)
+            iw = (min(tx2, x2) - max(tx1, x1) + 1)
+            if iw > 0:
+                ih = (min(ty2, y2) - max(ty1, y1) + 1)
+                if ih > 0:
+                    ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
+                    ov = iw * ih / ua #iou between max box and detection box
+                    if method == 1: # linear
+                        if ov > thresh:
+                            weight = 1 - ov
+                        else:
+                            weight = 1
+                    elif method == 2: # gaussian
+                        weight = np.exp(-(ov * ov) / sigma)
+                    else: # original NMS
+                        if ov > thresh:
+                            weight = 0
+                        else:
+                            weight = 1
+                    boxes[pos, 4] = weight * boxes[pos, 4]
+            # if box score falls below threshold, discard the box by swapping with last box
+            # update N
+                    if boxes[pos, 4] < score_thresh:
+                        boxes[pos,0] = boxes[N-1, 0]
+                        boxes[pos,1] = boxes[N-1, 1]
+                        boxes[pos,2] = boxes[N-1, 2]
+                        boxes[pos,3] = boxes[N-1, 3]
+                        boxes[pos,4] = boxes[N-1, 4]
+                        N = N - 1
+                        pos = pos - 1
+            pos = pos + 1
+    keep = [i for i in range(N)]
+    return keep
\ No newline at end of file
--- a/compile/gpu_nms.h
+++ b/compile/gpu_nms.h
+void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
+          int boxes_dim, float nms_overlap_thresh, int device_id);
--- a/compile/gpu_nms.pyx
+++ b/compile/gpu_nms.pyx
+# --------------------------------------------------------
+# Faster R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+import numpy as np
+cimport numpy as np
+assert sizeof(int) == sizeof(np.int32_t)
+cdef extern from "gpu_nms.h":
+    void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
+def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, float thresh, int device_id=0):
+    cdef int boxes_num = dets.shape[0]
+    cdef int boxes_dim = dets.shape[1]
+    cdef int num_out
+    cdef np.ndarray[np.int32_t, ndim=1] \
+        keep = np.zeros(boxes_num, dtype=np.int32)
+    cdef np.ndarray[np.float32_t, ndim=1] \
+        scores = dets[:, 4]
+    cdef np.ndarray[np.intp_t, ndim=1] \
+        order = scores.argsort()[::-1]
+    cdef np.ndarray[np.float32_t, ndim=2] \
+        sorted_dets = dets[order, :]
+    _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
+    keep = keep[:num_out]
+    return list(order[keep])
--- a/compile/make.sh
+++ b/compile/make.sh
+# delete cache
+rm -r build install *.c *.cpp
+# compile cython modules
+python setup.py build_ext --inplace
+# compile cuda modules
+cd build
+cmake .. && make install && cd ..
+# setup
+cp -r install/lib ../
--- a/compile/nms_kernel.cu
+++ b/compile/nms_kernel.cu
+// ------------------------------------------------------------
+// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+//
+// Licensed under the BSD 2-Clause License.
+// You should have received a copy of the BSD 2-Clause License
+// along with the software. If not, See,
+//
+//      <https://opensource.org/licenses/BSD-2-Clause>
+//
+// ------------------------------------------------------------
+#include <vector>
+#include "gpu_nms.h"
+#define CUDA_CHECK(condition) \
+  /* Code block avoids redefinition of cudaError_t error */ \
+  do { \
+    cudaError_t error = condition; \
+    if (error != cudaSuccess) { \
+      \
+    } \
+  } while (0)
+void SetDevice(int device_id) {
+    int current_device;
+    CUDA_CHECK(cudaGetDevice(&current_device));
+    if (current_device == device_id) return;
+    CUDA_CHECK(cudaSetDevice(device_id));
+}
+#define DIV_UP(m,n) ((m) / (n) + ((m) % (n) > 0))
+#define NMS_BLOCK_SIZE 64
+template <typename T>
+__device__  T iou(const T* A, const T* B) {
+    const T x1 = max(A[0], B[0]);
+    const T y1 = max(A[1], B[1]);
+    const T x2 = min(A[2], B[2]);
+    const T y2 = min(A[3], B[3]);
+    const T width = max((T)0, x2 - x1 + 1);
+    const T height = max((T)0, y2 - y1 + 1);
+    const T area = width * height;
+    const T A_area = (A[2] - A[0] + 1) * (A[3] - A[1] + 1);
+    const T B_area = (B[2] - B[0] + 1) * (B[3] - B[1] + 1);
+    return area / (A_area + B_area - area);
+}
+template <typename T>
+__global__ void nms_mask(const int num_boxes, const T nms_thresh,
+			 const T* boxes, unsigned long long* mask) {
+    const int i_start = blockIdx.x * NMS_BLOCK_SIZE;
+    const int di_end = min(num_boxes - i_start, NMS_BLOCK_SIZE);
+    const int j_start = blockIdx.y * NMS_BLOCK_SIZE;
+    const int dj_end = min(num_boxes - j_start, NMS_BLOCK_SIZE);
+    const int num_blocks = DIV_UP(num_boxes, NMS_BLOCK_SIZE);
+    const int bid = blockIdx.x;
+    const int tid = threadIdx.x;
+    __shared__ T boxes_i[NMS_BLOCK_SIZE * 4];
+    if (tid < di_end) {
+        boxes_i[tid * 4 + 0] = boxes[(i_start + tid) * 5 + 0];
+        boxes_i[tid * 4 + 1] = boxes[(i_start + tid) * 5 + 1];
+        boxes_i[tid * 4 + 2] = boxes[(i_start + tid) * 5 + 2];
+        boxes_i[tid * 4 + 3] = boxes[(i_start + tid) * 5 + 3];
+    }
+    __syncthreads();
+    if (tid < dj_end) {
+        const T* const box_j = boxes + (j_start + tid) * 5;
+        unsigned long long mask_j = 0;
+        const int di_start = (i_start == j_start) ? (tid + 1) : 0;
+        for (int di = di_start; di < di_end; ++di)
+            if (iou(box_j, boxes_i + di * 4) > nms_thresh) 
+		mask_j |= 1ULL << di;
+        mask[(j_start + tid) * num_blocks + bid] = mask_j;
+    }
+}
+template <typename T>
+void ApplyNMS(const int num_boxes, const int max_keeps, const float thresh,
+              const T* boxes, int* keep_indices, int& num_keep) {
+    const int num_blocks = DIV_UP(num_boxes, NMS_BLOCK_SIZE);
+    const dim3 blocks(num_blocks, num_blocks);
+    size_t mask_nbytes = num_boxes * num_blocks * sizeof(unsigned long long);
+    size_t boxes_nbytes = num_boxes * 5 * sizeof(T);
+    void* boxes_dev, *mask_dev;
+    CUDA_CHECK(cudaMalloc(&boxes_dev, boxes_nbytes));
+    CUDA_CHECK(cudaMalloc(&mask_dev, mask_nbytes));
+    CUDA_CHECK(cudaMemcpy(boxes_dev, boxes, boxes_nbytes, cudaMemcpyHostToDevice));
+    nms_mask<T> << <blocks, NMS_BLOCK_SIZE >> > (num_boxes, thresh,
+					             (T*)boxes_dev,
+                         	    (unsigned long long*)mask_dev);
+    CUDA_CHECK(cudaPeekAtLastError());
+    std::vector<unsigned long long> mask_host(num_boxes * num_blocks);
+    CUDA_CHECK(cudaMemcpy(&mask_host[0], mask_dev, mask_nbytes, cudaMemcpyDeviceToHost));
+    std::vector<unsigned long long> dead_bit(num_blocks);
+    memset(&dead_bit[0], 0, sizeof(unsigned long long) * num_blocks);
+    int num_selected = 0;
+    for (int i = 0; i < num_boxes; ++i) {
+        const int nblock = i / NMS_BLOCK_SIZE;
+        const int inblock = i % NMS_BLOCK_SIZE;
+        if (!(dead_bit[nblock] & (1ULL << inblock))) {
+            keep_indices[num_selected++] = i;
+            unsigned long long* mask_i = &mask_host[0] + i * num_blocks;
+            for (int j = nblock; j < num_blocks; ++j) dead_bit[j] |= mask_i[j];
+            if (num_selected == max_keeps) break;
+        }
+    }
+    num_keep = num_selected;
+    CUDA_CHECK(cudaFree(mask_dev)); 
+    CUDA_CHECK(cudaFree(boxes_dev));
+}
+void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
+    	  int boxes_dim, float nms_overlap_thresh, int device_id) {
+    //  set the device to use
+    SetDevice(device_id);
+    //  apply gpu nms
+    ApplyNMS<float>(boxes_num, boxes_num, nms_overlap_thresh,
+                             boxes_host, keep_out, *num_out);
+}
\ No newline at end of file
--- a/compile/setup.py
+++ b/compile/setup.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from distutils.extension import Extension
+from distutils.core import setup
+from Cython.Distutils import build_ext
+import numpy as np
+numpy_include = np.get_include()
+ext_modules = [
+Extension(
+        "install.lib.utils.cython_bbox",
+        ["bbox.pyx"],
+        extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
+        include_dirs = [numpy_include]),
+Extension(
+        "install.lib.nms.cpu_nms",
+        ["cpu_nms.pyx"],
+        extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
+        include_dirs = [numpy_include]),
+Extension(
+        "install.deprecated.gpu_nms",
+        ["gpu_nms.pyx"],
+        extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
+        language='c++',
+        include_dirs = [numpy_include]),
+Extension(
+        'install.lib.pycocotools._mask',
+        ['../lib/pycocotools/maskApi.c', '../lib/pycocotools/_mask.pyx'],
+        include_dirs=[numpy_include, 'pycocotools'],
+        extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99']),
+]
+setup(name='Detectron',ext_modules=ext_modules,cmdclass = {'build_ext': build_ext})
--- a/configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_1x.yml
+++ b/configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_1x.yml
+NUM_GPUS: 8
+VIS: False
+ENABLE_TENSOR_BOARD: False
+MODEL:
+  TYPE: faster_rcnn
+  BACKBONE: resnet101.fpn
+  CLASSES: ['__background__',
+            'person', 'bicycle', 'car', 'motorcycle', 'airplane',
+            'bus', 'train', 'truck', 'boat', 'traffic light',
+            'fire hydrant', 'stop sign', 'parking meter', 'bench',
+            'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
+            'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
+            'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+            'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+            'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife',
+            'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
+            'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+            'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
+            'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
+            'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
+            'teddy bear', 'hair drier', 'toothbrush']
+  NUM_CLASSES: 81
+SOLVER:
+  BASE_LR: 0.02
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  STEPS: [60000, 80000]
+  MAX_ITERS: 90000
+  SNAPSHOT_ITERS: 5000
+  SNAPSHOT_PREFIX: coco_faster_rcnn
+FRCNN:
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+TRAIN:
+  WEIGHTS: '../data/imagenet_models/R-101.Affine.pth'
+  DATABASE: 'taas:/data/coco_2014_trainval35k_lmdb'
+  IMS_PER_BATCH: 2
+  USE_DIFF: False # Do not use crowd objects
+  BATCH_SIZE: 512
+  SCALES: [800]
+  MAX_SIZE: 1333
+TEST:
+  DATABASE: 'taas:/data/coco_2014_minival_lmdb'
+  JSON_FILE: '/data/instances_minival2014.json'
+  PROTOCOL: 'coco'
+  RPN_POST_NMS_TOP_N: 1000
+  SCALES: [800]
+  MAX_SIZE: 1333
+  NMS: 0.5
--- a/configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_2x.yml
+++ b/configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_2x.yml
+NUM_GPUS: 8
+VIS: False
+ENABLE_TENSOR_BOARD: False
+MODEL:
+  TYPE: faster_rcnn
+  BACKBONE: resnet101.fpn
+  CLASSES: ['__background__',
+            'person', 'bicycle', 'car', 'motorcycle', 'airplane',
+            'bus', 'train', 'truck', 'boat', 'traffic light',
+            'fire hydrant', 'stop sign', 'parking meter', 'bench',
+            'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
+            'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
+            'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+            'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+            'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife',
+            'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
+            'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+            'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
+            'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
+            'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
+            'teddy bear', 'hair drier', 'toothbrush']
+  NUM_CLASSES: 81
+SOLVER:
+  BASE_LR: 0.02
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  STEPS: [120000, 160000]
+  MAX_ITERS: 180000
+  SNAPSHOT_ITERS: 5000
+  SNAPSHOT_PREFIX: coco_faster_rcnn
+FRCNN:
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+TRAIN:
+  WEIGHTS: '../data/imagenet_models/R-101.Affine.pth'
+  DATABASE: 'taas:/data/coco_2014_trainval35k_lmdb'
+  IMS_PER_BATCH: 2
+  USE_DIFF: False # Do not use crowd objects
+  BATCH_SIZE: 512
+  SCALES: [800]
+  MAX_SIZE: 1333
+TEST:
+  DATABASE: 'taas:/data/coco_2014_minival_lmdb'
+  JSON_FILE: '/data/instances_minival2014.json'
+  PROTOCOL: 'coco'
+  RPN_POST_NMS_TOP_N: 1000
+  SCALES: [800]
+  MAX_SIZE: 1333
+  NMS: 0.5
--- a/configs/faster_rcnn/voc_faster_rcnn_R-50-FPN.yml
+++ b/configs/faster_rcnn/voc_faster_rcnn_R-50-FPN.yml
+NUM_GPUS: 1
+VIS: False
+ENABLE_TENSOR_BOARD: False
+MODEL:
+  TYPE: faster_rcnn
+  BACKBONE: resnet50.fpn
+  CLASSES: ['__background__',
+            'aeroplane', 'bicycle', 'bird', 'boat',
+            'bottle', 'bus', 'car', 'cat', 'chair',
+            'cow', 'diningtable', 'dog', 'horse',
+            'motorbike', 'person', 'pottedplant',
+            'sheep', 'sofa', 'train', 'tvmonitor']
+  NUM_CLASSES: 21
+SOLVER:
+  BASE_LR: 0.002
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  STEPS: [100000, 140000]
+  MAX_ITERS: 140000
+  SNAPSHOT_ITERS: 5000
+  SNAPSHOT_PREFIX: voc_faster_rcnn
+FRCNN:
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+TRAIN:
+  WEIGHTS: '../data/imagenet_models/R-50.Affine.pth'
+  DATABASE: 'taas:/data/voc_0712_trainval_lmdb'
+  IMS_PER_BATCH: 2
+  BATCH_SIZE: 128
+  SCALES: [600]
+  MAX_SIZE: 1000
+TEST:
+  DATABASE: 'taas:/data/voc_2007_test_lmdb'
+  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
+  RPN_POST_NMS_TOP_N: 1000
+  SCALES: [600]
+  MAX_SIZE: 1000
+  NMS: 0.45
\ No newline at end of file
--- a/configs/faster_rcnn/voc_faster_rcnn_VGG-16-C4.yml
+++ b/configs/faster_rcnn/voc_faster_rcnn_VGG-16-C4.yml
+NUM_GPUS: 1
+VIS: False
+ENABLE_TENSOR_BOARD: False
+MODEL:
+  TYPE: faster_rcnn
+  BACKBONE: vgg16.c4
+  CLASSES: ['__background__',
+            'aeroplane', 'bicycle', 'bird', 'boat',
+            'bottle', 'bus', 'car', 'cat', 'chair',
+            'cow', 'diningtable', 'dog', 'horse',
+            'motorbike', 'person', 'pottedplant',
+            'sheep', 'sofa', 'train', 'tvmonitor']
+  NUM_CLASSES: 21
+SOLVER:
+  BASE_LR: 0.001
+  WEIGHT_DECAY: 0.0005
+  LR_POLICY: steps_with_decay
+  STEPS: [100000, 140000]
+  MAX_ITERS: 140000
+  SNAPSHOT_ITERS: 5000
+  SNAPSHOT_PREFIX: voc_faster_rcnn
+RPN:
+  STRIDES: [16]
+  SCALES: [8, 16, 32] # RField: [128, 256, 512]
+  ASPECT_RATIOS: [0.5, 1.0, 2.0]
+FRCNN:
+  ROI_XFORM_METHOD: RoIPool
+  ROI_XFORM_RESOLUTION: 7
+  MLP_HEAD_DIM: 4096
+TRAIN:
+  WEIGHTS: '../data/imagenet_models/VGG16.RCNN.pth'
+  DATABASE: 'taas:/data/voc_0712_trainval_lmdb'
+  RPN_MIN_SIZE: 16
+  IMS_PER_BATCH: 2
+  BATCH_SIZE: 128
+  SCALES: [600]
+  MAX_SIZE: 1000
+TEST:
+  DATABASE: 'taas:/data/voc_2007_test_lmdb'
+  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
+  RPN_MIN_SIZE: 16
+  RPN_POST_NMS_TOP_N: 300
+  SCALES: [600]
+  MAX_SIZE: 1000
+  NMS: 0.45
\ No newline at end of file
--- a/configs/retinanet/coco_retinanet_400_R-50-FPN_1x.yml
+++ b/configs/retinanet/coco_retinanet_400_R-50-FPN_1x.yml
+NUM_GPUS: 4
+VIS: False
+ENABLE_TENSOR_BOARD: False
+MODEL:
+  TYPE: retinanet
+  BACKBONE: resnet50.fpn
+  CLASSES: ['__background__',
+            'person', 'bicycle', 'car', 'motorcycle', 'airplane',
+            'bus', 'train', 'truck', 'boat', 'traffic light',
+            'fire hydrant', 'stop sign', 'parking meter', 'bench',
+            'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
+            'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
+            'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+            'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+            'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife',
+            'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
+            'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+            'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
+            'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
+            'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
+            'teddy bear', 'hair drier', 'toothbrush']
+  NUM_CLASSES: 81
+SOLVER:
+  BASE_LR: 0.02
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  STEPS: [30000, 40000]
+  MAX_ITERS: 45000
+  SNAPSHOT_ITERS: 5000
+  SNAPSHOT_PREFIX: coco_retinanet_400
+FPN:
+  RPN_MIN_LEVEL: 3
+  RPN_MAX_LEVEL: 7
+TRAIN:
+  WEIGHTS: '../data/imagenet_models/R-50.Affine.pth'
+  DATABASE: 'taas:/data/coco_2014_trainval35k_lmdb'
+  IMS_PER_BATCH: 8
+  SCALES: [400]
+  MAX_SIZE: 666
+TEST:
+  DATABASE: 'taas:/data/coco_2014_minival_lmdb'
+  JSON_FILE: '/data/instances_minival2014.json'
+  PROTOCOL: 'coco'
+  IMS_PER_BATCH: 1
+  SCALES: [400]
+  MAX_SIZE: 666
+  NMS: 0.5
\ No newline at end of file
--- a/configs/retinanet/coco_retinanet_400_R-50-FPN_4x.yml
+++ b/configs/retinanet/coco_retinanet_400_R-50-FPN_4x.yml
+NUM_GPUS: 4
+VIS: False
+ENABLE_TENSOR_BOARD: False
+MODEL:
+  TYPE: retinanet
+  BACKBONE: resnet50.fpn
+  CLASSES: ['__background__',
+            'person', 'bicycle', 'car', 'motorcycle', 'airplane',
+            'bus', 'train', 'truck', 'boat', 'traffic light',
+            'fire hydrant', 'stop sign', 'parking meter', 'bench',
+            'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
+            'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
+            'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+            'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+            'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife',
+            'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
+            'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+            'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
+            'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
+            'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
+            'teddy bear', 'hair drier', 'toothbrush']
+  NUM_CLASSES: 81
+SOLVER:
+  BASE_LR: 0.02
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  STEPS: [120000, 160000]
+  MAX_ITERS: 180000
+  SNAPSHOT_ITERS: 5000
+  SNAPSHOT_PREFIX: coco_retinanet_400
+FPN:
+  RPN_MIN_LEVEL: 3
+  RPN_MAX_LEVEL: 7
+DROPBLOCK:
+  DROP_ON: True
+  DECREMENT: 0.000005 # * 20000 = 0.1
+TRAIN:
+  WEIGHTS: '../data/imagenet_models/R-50.Affine.pth'
+  DATABASE: 'taas:/data/coco_2014_trainval35k_lmdb'
+  IMS_PER_BATCH: 8
+  SCALES: [400]
+  MAX_SIZE: 666
+  SCALE_JITTERING: True
+  COLOR_JITTERING: True
+  SCALE_RANGE: [0.8, 1.2]
+TEST:
+  DATABASE: 'taas:/data/coco_2014_minival_lmdb'
+  JSON_FILE: '/data/instances_minival2014.json'
+  PROTOCOL: 'coco'
+  IMS_PER_BATCH: 1
+  SCALES: [400]
+  MAX_SIZE: 666
+  NMS: 0.5
\ No newline at end of file
--- a/configs/ssd/voc_ssd_300_AirNet-5b.yml
+++ b/configs/ssd/voc_ssd_300_AirNet-5b.yml
+NUM_GPUS: 1
+VIS: False
+ENABLE_TENSOR_BOARD: False
+MODEL:
+  TYPE: ssd
+  BACKBONE: airnet5b.mbox
+  CLASSES: ['__background__',
+            'aeroplane', 'bicycle', 'bird', 'boat',
+            'bottle', 'bus', 'car', 'cat', 'chair',
+            'cow', 'diningtable', 'dog', 'horse',
+            'motorbike', 'person', 'pottedplant',
+            'sheep', 'sofa', 'train', 'tvmonitor']
+  NUM_CLASSES: 21
+SOLVER:
+  BASE_LR: 0.001
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  STEPS: [80000, 100000, 120000]
+  MAX_ITERS: 120000
+  SNAPSHOT_ITERS: 5000
+  SNAPSHOT_PREFIX: voc_ssd_300
+SSD:
+  RESIZE:
+    HEIGHT: 300
+    WIDTH: 300
+  MULTIBOX:
+    MIN_SIZES: [30, 90, 150]
+    MAX_SIZES: [90, 150, 210]
+    STRIDES: [8, 16, 32]
+    ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]]
+TRAIN:
+  WEIGHTS: '../data/imagenet_models/AirNet.SSD.pth'
+  DATABASE: 'taas:/data/voc_0712_trainval_lmdb'
+  IMS_PER_BATCH: 32
+TEST:
+  DATABASE: 'taas:/data/voc_2007_test_lmdb'
+  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
+  IMS_PER_BATCH: 8
+  NMS_TOP_K: 400
+  NMS: 0.45
+  SCORE_THRESH: 0.01
+  DETECTIONS_PER_IM: 200
\ No newline at end of file
--- a/configs/ssd/voc_ssd_300_VGG-16.yml
+++ b/configs/ssd/voc_ssd_300_VGG-16.yml
+NUM_GPUS: 1
+VIS: False
+ENABLE_TENSOR_BOARD: False
+MODEL:
+  TYPE: ssd
+  BACKBONE: vgg16_reduced_300.mbox
+  FREEZE_AT: 0
+  CLASSES: ['__background__',
+            'aeroplane', 'bicycle', 'bird', 'boat',
+            'bottle', 'bus', 'car', 'cat', 'chair',
+            'cow', 'diningtable', 'dog', 'horse',
+            'motorbike', 'person', 'pottedplant',
+            'sheep', 'sofa', 'train', 'tvmonitor']
+  NUM_CLASSES: 21
+SOLVER:
+  BASE_LR: 0.002
+  WARM_UP_FACTOR: 0.
+  WEIGHT_DECAY: 0.0005
+  LR_POLICY: steps_with_decay
+  STEPS: [80000, 100000, 120000]
+  MAX_ITERS: 120000
+  SNAPSHOT_ITERS: 5000
+  SNAPSHOT_PREFIX: voc_ssd_300
+SSD:
+  RESIZE:
+    HEIGHT: 300
+    WIDTH: 300
+  MULTIBOX:
+    STRIDES: [8, 16, 32, 64, 100, 300]
+    MIN_SIZES: [30, 60, 110, 162, 213, 264]
+    MAX_SIZES: [60, 110, 162, 213, 264, 315]
+    ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33],
+                    [1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]]
+TRAIN:
+  WEIGHTS: '../data/imagenet_models/VGG16.SSD.pth'
+  DATABASE: 'taas:/data/voc_0712_trainval_lmdb'
+  IMS_PER_BATCH: 32
+TEST:
+  DATABASE: 'taas:/data/voc_2007_test_lmdb'
+  PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
+  IMS_PER_BATCH: 8
+  NMS_TOP_K: 400
+  NMS: 0.45
+  SCORE_THRESH: 0.01
+  DETECTIONS_PER_IM: 200
--- a/database/__init__.py
+++ b/database/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
\ No newline at end of file
--- a/database/frcnn/__init__.py
+++ b/database/frcnn/__init__.py
+# --------------------------------------------------------
+# Detectron @ Dragon
+# Copyright(c) 2017 SeetaTech
+# Written by Ting Pan
+# --------------------------------------------------------
\ No newline at end of file
--- a/database/frcnn/pascal_voc/make_lmdb.py
+++ b/database/frcnn/pascal_voc/make_lmdb.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import sys
+import os.path as osp
+sys.path.insert(0, '../../../')
+from database.frcnn.utils.make_from_xml import make_db
+if __name__ == '__main__':
+    VOC_ROOT_DIR = '/home/workspace/datasets/VOC'
+    # train database: voc_2007_trainval + voc_2012_trainval
+    make_db(database_file=osp.join(VOC_ROOT_DIR, 'cache/voc_0712_trainval_lmdb'),
+            images_path=[osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/JPEGImages'),
+                         osp.join(VOC_ROOT_DIR, 'VOCdevkit2012/VOC2012/JPEGImages')],
+            annotations_path=[osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/Annotations'),
+                              osp.join(VOC_ROOT_DIR, 'VOCdevkit2012/VOC2012/Annotations')],
+            imagesets_path=[osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
+                            osp.join(VOC_ROOT_DIR, 'VOCdevkit2012/VOC2012/ImageSets/Main')],
+            splits=['trainval', 'trainval'])
+    # test database: voc_2007_test
+    make_db(database_file=osp.join(VOC_ROOT_DIR, 'cache/voc_2007_test_lmdb'),
+            images_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/JPEGImages'),
+            annotations_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/Annotations'),
+            imagesets_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
+            splits=['test'])
\ No newline at end of file
--- a/database/frcnn/utils/__init__.py
+++ b/database/frcnn/utils/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
\ No newline at end of file
--- a/database/frcnn/utils/anno.proto
+++ b/database/frcnn/utils/anno.proto
+syntax = "proto2";
+message Datum {
+  optional int32 channels = 1;
+  optional int32 height = 2;
+  optional int32 width = 3;
+  optional bytes data = 4;
+  optional int32 label = 5;
+  repeated float float_data = 6;
+  optional bool encoded = 7 [default = false];
+}
+message Annotation {
+  optional float x1 = 1;
+  optional float y1 = 2;
+  optional float x2 = 3;
+  optional float y2 = 4;
+  optional string name = 5;
+  optional bool difficult = 6 [default = false];
+  optional bool crowd = 7 [default = false];
+  optional string mask = 8;
+}
+message AnnotatedDatum {
+  optional Datum datum = 1;
+  optional string filename = 2;
+  repeated Annotation annotation = 3;
+}
--- a/database/frcnn/utils/anno_pb2.py
+++ b/database/frcnn/utils/anno_pb2.py
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: anno.proto
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+_sym_db = _symbol_database.Default()
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='anno.proto',
+  package='',
+  serialized_pb=_b('\n\nanno.proto\"\x81\x01\n\x05\x44\x61tum\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\r\n\x05width\x18\x03 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\x0c\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x12\n\nfloat_data\x18\x06 \x03(\x02\x12\x16\n\x07\x65ncoded\x18\x07 \x01(\x08:\x05\x66\x61lse\"\x88\x01\n\nAnnotation\x12\n\n\x02x1\x18\x01 \x01(\x02\x12\n\n\x02y1\x18\x02 \x01(\x02\x12\n\n\x02x2\x18\x03 \x01(\x02\x12\n\n\x02y2\x18\x04 \x01(\x02\x12\x0c\n\x04name\x18\x05 \x01(\t\x12\x18\n\tdifficult\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x14\n\x05\x63rowd\x18\x07 \x01(\x08:\x05\x66\x61lse\x12\x0c\n\x04mask\x18\x08 \x01(\t\"Z\n\x0e\x41nnotatedDatum\x12\x15\n\x05\x64\x61tum\x18\x01 \x01(\x0b\x32\x06.Datum\x12\x10\n\x08\x66ilename\x18\x02 \x01(\t\x12\x1f\n\nannotation\x18\x03 \x03(\x0b\x32\x0b.Annotation')
+)
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+_DATUM = _descriptor.Descriptor(
+  name='Datum',
+  full_name='Datum',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='channels', full_name='Datum.channels', index=0,
+      number=1, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='height', full_name='Datum.height', index=1,
+      number=2, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='width', full_name='Datum.width', index=2,
+      number=3, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='data', full_name='Datum.data', index=3,
+      number=4, type=12, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b(""),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='label', full_name='Datum.label', index=4,
+      number=5, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='float_data', full_name='Datum.float_data', index=5,
+      number=6, type=2, cpp_type=6, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='encoded', full_name='Datum.encoded', index=6,
+      number=7, type=8, cpp_type=7, label=1,
+      has_default_value=True, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=15,
+  serialized_end=144,
+)
+_ANNOTATION = _descriptor.Descriptor(
+  name='Annotation',
+  full_name='Annotation',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='x1', full_name='Annotation.x1', index=0,
+      number=1, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='y1', full_name='Annotation.y1', index=1,
+      number=2, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='x2', full_name='Annotation.x2', index=2,
+      number=3, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='y2', full_name='Annotation.y2', index=3,
+      number=4, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='name', full_name='Annotation.name', index=4,
+      number=5, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='difficult', full_name='Annotation.difficult', index=5,
+      number=6, type=8, cpp_type=7, label=1,
+      has_default_value=True, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='crowd', full_name='Annotation.crowd', index=6,
+      number=7, type=8, cpp_type=7, label=1,
+      has_default_value=True, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='mask', full_name='Annotation.mask', index=7,
+      number=8, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=147,
+  serialized_end=283,
+)
+_ANNOTATEDDATUM = _descriptor.Descriptor(
+  name='AnnotatedDatum',
+  full_name='AnnotatedDatum',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='datum', full_name='AnnotatedDatum.datum', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='filename', full_name='AnnotatedDatum.filename', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='annotation', full_name='AnnotatedDatum.annotation', index=2,
+      number=3, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=285,
+  serialized_end=375,
+)
+_ANNOTATEDDATUM.fields_by_name['datum'].message_type = _DATUM
+_ANNOTATEDDATUM.fields_by_name['annotation'].message_type = _ANNOTATION
+DESCRIPTOR.message_types_by_name['Datum'] = _DATUM
+DESCRIPTOR.message_types_by_name['Annotation'] = _ANNOTATION
+DESCRIPTOR.message_types_by_name['AnnotatedDatum'] = _ANNOTATEDDATUM
+Datum = _reflection.GeneratedProtocolMessageType('Datum', (_message.Message,), dict(
+  DESCRIPTOR = _DATUM,
+  __module__ = 'anno_pb2'
+  # @@protoc_insertion_point(class_scope:Datum)
+  ))
+_sym_db.RegisterMessage(Datum)
+Annotation = _reflection.GeneratedProtocolMessageType('Annotation', (_message.Message,), dict(
+  DESCRIPTOR = _ANNOTATION,
+  __module__ = 'anno_pb2'
+  # @@protoc_insertion_point(class_scope:Annotation)
+  ))
+_sym_db.RegisterMessage(Annotation)
+AnnotatedDatum = _reflection.GeneratedProtocolMessageType('AnnotatedDatum', (_message.Message,), dict(
+  DESCRIPTOR = _ANNOTATEDDATUM,
+  __module__ = 'anno_pb2'
+  # @@protoc_insertion_point(class_scope:AnnotatedDatum)
+  ))
+_sym_db.RegisterMessage(AnnotatedDatum)
+# @@protoc_insertion_point(module_scope)
--- a/database/frcnn/utils/make_from_dict.py
+++ b/database/frcnn/utils/make_from_dict.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import time
+import cv2
+from . import anno_pb2 as pb
+from dragon.tools.db import LMDB
+ZFILL = 8
+ENCODE_QUALITY = 95
+def set_zfill(value):
+    global ZFILL
+    ZFILL = value
+def set_quality(value):
+    global ENCODE_QUALITY
+    ENCODE_QUALITY = value
+def make_datum(image_id, image_file, objects):
+    anno_datum = pb.AnnotatedDatum()
+    datum = pb.Datum()
+    im = cv2.imread(image_file)
+    datum.height, datum.width, datum.channels = im.shape
+    datum.encoded = ENCODE_QUALITY != 100
+    if datum.encoded:
+        result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY])
+    datum.data = im.tostring()
+    anno_datum.datum.CopyFrom(datum)
+    anno_datum.filename = image_id
+    for ix, obj in enumerate(objects):
+        anno = pb.Annotation()
+        anno.x1, anno.y1, anno.x2, anno.y2 = obj['bbox']
+        anno.name = obj['name']
+        anno.difficult = obj['difficult']
+        anno_datum.annotation.add().CopyFrom(anno)
+    return anno_datum
+def make_db(database_file, images_path, gt_recs, ext='.png'):
+    if os.path.isdir(database_file) is True:
+        raise ValueError('The database path is already exist.')
+    else:
+        root_dir = database_file[:database_file.rfind('/')]
+        if not os.path.exists(root_dir):
+            os.makedirs(root_dir)
+    print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
+    db = LMDB(max_commit=10000)
+    db.open(database_file, mode='w')
+    count = 0
+    total_line = len(gt_recs)
+    start_time = time.time()
+    zfill_flag = '{0:0%d}' % (ZFILL)
+    for image_id, objects in gt_recs.items():
+        count += 1
+        if count % 10000 == 0:
+            now_time = time.time()
+            print('{0} / {1} in {2:.2f} sec'.format(
+                count, total_line, now_time - start_time))
+            db.commit()
+        image_file = os.path.join(images_path, image_id + ext)
+        datum = make_datum(image_id, image_file, objects)
+        db.put(zfill_flag.format(count - 1), datum.SerializeToString())
+    now_time = time.time()
+    print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
+    db.commit()
+    db.close()
+    # Compress the empty space
+    db.open(database_file, mode='w')
+    db.commit()
+    end_time = time.time()
+    print('{0} images have been stored in the database.'.format(total_line))
+    print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time))
+    print('The size of database is {0} MB.'.format(
+        float(os.path.getsize(database_file + '/data.mdb') / 1000 / 1000)))
\ No newline at end of file
--- a/database/frcnn/utils/make_from_xml.py
+++ b/database/frcnn/utils/make_from_xml.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import time
+import cv2
+import xml.etree.ElementTree as ET
+from dragon.tools.db import LMDB
+from . import anno_pb2 as pb
+ZFILL = 8
+ENCODE_QUALITY = 95
+def set_zfill(value):
+    global ZFILL
+    ZFILL = value
+def set_quality(value):
+    global ENCODE_QUALITY
+    ENCODE_QUALITY = value
+def make_datum(image_file, xml_file):
+    tree = ET.parse(xml_file)
+    filename = os.path.split(xml_file)[-1]
+    objs = tree.findall('object')
+    anno_datum = pb.AnnotatedDatum()
+    datum = pb.Datum()
+    im = cv2.imread(image_file)
+    datum.height, datum.width, datum.channels = im.shape
+    datum.encoded = ENCODE_QUALITY != 100
+    if datum.encoded:
+        result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY])
+    datum.data = im.tostring()
+    anno_datum.datum.CopyFrom(datum)
+    anno_datum.filename = filename.split('.')[0]
+    for ix, obj in enumerate(objs):
+        anno = pb.Annotation()
+        bbox = obj.find('bndbox')
+        x1 = float(bbox.find('xmin').text)
+        y1 = float(bbox.find('ymin').text)
+        x2 = float(bbox.find('xmax').text)
+        y2 = float(bbox.find('ymax').text)
+        cls = obj.find('name').text.strip()
+        anno.x1, anno.y1, anno.x2, anno.y2 = (x1, y1, x2, y2)
+        anno.name = cls
+        anno.difficult = False
+        if obj.find('difficult') is not None:
+            anno.difficult = int(obj.find('difficult').text) == 1
+        anno_datum.annotation.add().CopyFrom(anno)
+    return anno_datum
+def make_db(database_file,
+            images_path,
+            annotations_path,
+            imagesets_path,
+            splits):
+    if os.path.isdir(database_file) is True:
+        raise ValueError('The database path is already exist.')
+    else:
+        root_dir = database_file[:database_file.rfind('/')]
+        if not os.path.exists(root_dir):
+            os.makedirs(root_dir)
+    if not isinstance(images_path, list):
+        images_path = [images_path]
+    if not isinstance(annotations_path, list):
+        annotations_path = [annotations_path]
+    if not isinstance(imagesets_path, list):
+        imagesets_path = [imagesets_path]
+    assert len(splits) == len(imagesets_path)
+    assert len(splits) == len(images_path)
+    assert len(splits) == len(annotations_path)
+    print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
+    db = LMDB(max_commit=10000)
+    db.open(database_file, mode='w')
+    count = 0
+    total_line = 0
+    start_time = time.time()
+    zfill_flag = '{0:0%d}' % (ZFILL)
+    for db_idx, split in enumerate(splits):
+        split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
+        assert os.path.exists(split_file)
+        with open(split_file, 'r') as f:
+            lines = f.readlines()
+            total_line += len(lines)
+        for line in lines:
+            count += 1
+            if count % 10000 == 0:
+                now_time = time.time()
+                print('{0} / {1} in {2:.2f} sec'.format(
+                    count, total_line, now_time - start_time))
+                db.commit()
+            filename = line.strip()
+            image_file = os.path.join(images_path[db_idx], filename + '.jpg')
+            xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
+            datum = make_datum(image_file, xml_file)
+            db.put(zfill_flag.format(count - 1), datum.SerializeToString())
+    now_time = time.time()
+    print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
+    db.commit()
+    db.close()
+    # Compress the empty space
+    db.open(database_file, mode='w')
+    db.commit()
+    end_time = time.time()
+    print('{0} images have been stored in the database.'.format(total_line))
+    print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time))
+    print('The size of database is {0} MB.'.format(
+        float(os.path.getsize(database_file + '/data.mdb') / 1000 / 1000)))
\ No newline at end of file
--- a/database/mrcnn/__init__.py
+++ b/database/mrcnn/__init__.py
+# --------------------------------------------------------
+# Detectron @ Dragon
+# Copyright(c) 2017 SeetaTech
+# Written by Ting Pan
+# --------------------------------------------------------
\ No newline at end of file
--- a/database/mrcnn/cityscape/make_lmdb.py
+++ b/database/mrcnn/cityscape/make_lmdb.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+"""Make LMDB for cityscape dataset."""
+import os
+import sys
+import shutil
+import numpy as np
+np.random.seed(1337)
+try:
+    import cPickle
+except:
+    import pickle as cPickle
+sys.path.insert(0, '../../../')
+from database.mrcnn.utils.make import make_db
+from database.mrcnn.cityscape.make_mask import make_mask
+if __name__ == '__main__':
+    CITYSCAPE_ROOT = '/data/cityscape'
+    # make RLE masks
+    if not os.path.exists('build'): os.makedirs('build')
+    cs_train = make_mask(
+        os.path.join(CITYSCAPE_ROOT, 'gtFine_trainvaltest'),
+        os.path.join(CITYSCAPE_ROOT, 'gtFine_trainvaltest/imglists/train.lst'))
+    cs_val = make_mask(
+        os.path.join(CITYSCAPE_ROOT, 'gtFine_trainvaltest'),
+        os.path.join(CITYSCAPE_ROOT, 'gtFine_trainvaltest/imglists/val.lst'))
+    with open('build/cs_train_mask.pkl', 'wb') as f:
+         cPickle.dump(cs_train, f, cPickle.HIGHEST_PROTOCOL)
+    with open('build/cs_val_mask.pkl', 'wb') as f:
+        cPickle.dump(cs_val, f, cPickle.HIGHEST_PROTOCOL)
+    # make image splits
+    for split in ['train', 'val', 'test']:
+        with open(os.path.join(CITYSCAPE_ROOT,
+            'gtFine_trainvaltest/imglists', split + '.lst'), 'r') as f:
+            entries = [line.strip().split('\t') for line in f.readlines()]
+            if split == 'train': np.random.shuffle(entries)
+            with open(os.path.join(CITYSCAPE_ROOT,
+                'gtFine_trainvaltest/imglists', split + '.txt'), 'w') as w:
+                for entry in entries: w.write(entry[1].split('.')[0] + '\n')
+    # make database
+    make_db(database_file=os.path.join(CITYSCAPE_ROOT, 'cache/cs_train_lmdb'),
+            images_path=os.path.join(CITYSCAPE_ROOT, 'leftImg8bit_trainvaltest'),
+            mask_file='build/cs_train_mask.pkl',
+            splits_path=os.path.join(CITYSCAPE_ROOT, 'gtFine_trainvaltest/imglists'),
+            splits=['train'], ext='.png')
+    make_db(database_file=os.path.join(CITYSCAPE_ROOT, 'cache/cs_val_lmdb'),
+            images_path=os.path.join(CITYSCAPE_ROOT, 'leftImg8bit_trainvaltest'),
+            mask_file='build/cs_val_mask.pkl',
+            splits_path=os.path.join(CITYSCAPE_ROOT, 'gtFine_trainvaltest/imglists'),
+            splits=['val'], ext='.png')
+    # clean!
+    shutil.rmtree('build')
\ No newline at end of file
--- a/database/mrcnn/cityscape/make_mask.py
+++ b/database/mrcnn/cityscape/make_mask.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+"""Make masks for cityscape dataset."""
+import os
+import sys
+import cv2
+from collections import OrderedDict
+import PIL.Image as Image
+import numpy as np
+np.random.seed(1337)
+sys.path.insert(0, '../../..')
+from lib.pycocotools.mask_utils import mask_bin2rle
+from database.mrcnn.utils.process_pool import ProcessPool
+class_id = [0,
+    24, 25, 26, 27,
+    28, 31, 32, 33]
+classes = ['__background__',
+    'person', 'rider', 'car', 'truck',
+    'bus', 'train', 'motorcycle', 'bicycle']
+ind_to_class = dict(zip(range(len(classes)), classes))
+def parse_gt(gt_file, im_scale=1.0):
+    im = Image.open(gt_file)
+    pixel = list(im.getdata())
+    pixel = np.array(pixel).reshape([im.size[1], im.size[0]])
+    objects = []
+    for c in range(1, len(class_id)):
+        px = np.where((pixel >= class_id[c] * 1000) & (pixel < (class_id[c] + 1) * 1000))
+        if len(px[0]) == 0: continue
+        uids = np.unique(pixel[px])
+        for idx, uid in enumerate(uids):
+            px = np.where(pixel == uid)
+            x1 = np.min(px[1])
+            y1 = np.min(px[0])
+            x2 = np.max(px[1])
+            y2 = np.max(px[0])
+            if x2 - x1 <= 1 or y2 - y1 <= 1: continue
+            mask = np.zeros([im.size[1], im.size[0]], dtype=np.uint8)
+            mask[px] = 1
+            if im_scale != 1:
+                mask = cv2.resize(mask, None, fx=im_scale, fy=im_scale,
+                    interpolation=cv2.INTER_NEAREST)
+                x1 = min(int(x1 * im_scale), mask.shape[1])
+                y1 = min(int(y1 * im_scale), mask.shape[0])
+                x2 = min(int(x2 * im_scale), mask.shape[1])
+                y2 = min(int(y2 * im_scale), mask.shape[0])
+            objects.append({'bbox': [x1, y1, x2, y2],
+                            'mask': mask_bin2rle([mask])[0],
+                            'name': ind_to_class[c],
+                            'difficult': False})
+    return objects
+def map_func(gts, Q):
+    for image_id, gt_file in gts:
+        objects = parse_gt(gt_file)
+        Q.put((image_id, objects))
+def make_mask(gt_root, split_file):
+    # Create tasks
+    gt_tasks, gt_recs = [], OrderedDict()
+    with open(split_file, 'r') as f:
+        for line in f:
+            _, image_path, gt_path = line.strip().split('\t')
+            image_id = image_path.split('.')[0]
+            gt_file = os.path.join(gt_root, gt_path.replace('labelTrainIds', 'instanceIds'))
+            gt_tasks.append((image_id, gt_file))
+    num_tasks = len(gt_tasks)
+    # Run!
+    with ProcessPool(16) as pool:
+        pool.run(gt_tasks, func=map_func)
+        for idx in range(num_tasks):
+            image_id, objects = pool.get()
+            gt_recs[image_id] = objects
+            print('\rProcess: {} / {}'.format(idx + 1, num_tasks), end='')
+    return gt_recs
\ No newline at end of file
--- a/database/mrcnn/cityscape/make_results.py
+++ b/database/mrcnn/cityscape/make_results.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import json
+import cv2
+from collections import defaultdict
+from lib.pycocotools.mask_utils import mask_rle2im
+CITYSCAPE_ROOT = '/data/cityscape'
+def write_results(json_file, img_list):
+    with open(json_file, 'r') as f:
+        json_results = json.load(f)
+    class_id = [0, 24, 25, 26, 27, 28, 31, 32, 33]
+    category_id_to_class_id = dict(zip(range(9), class_id))
+    result_path = os.path.join(CITYSCAPE_ROOT, 'gtFine_trainvaltest', 'results', 'pred')
+    if not os.path.exists(result_path): os.makedirs(result_path)
+    counts = defaultdict(int)
+    txt_results = defaultdict(list)
+    for idx, rec in enumerate(json_results):
+        class_id = category_id_to_class_id[rec['category_id']]
+        if class_id == 0: continue
+        im_h, im_w = rec['segmentation']['size']
+        mask_rle = rec['segmentation']['counts']
+        mask_image = mask_rle2im([mask_rle], im_h, im_w)[0] * 200
+        image_name = rec['image_id'].split('_leftImg8bit')[0]
+        mask_name = image_name + '_' + str(counts[image_name]) + '.png'
+        counts[image_name] += 1
+        mask_path = os.path.join(result_path, mask_name)
+        cv2.imwrite(mask_path, mask_image)
+        txt_results[image_name].append((mask_name, class_id, rec['score']))
+        print('\rWriting masks ({} / {})'.format(idx + 1, len(json_results)), end='')
+    with open(img_list, 'r') as F:
+        for line in F.readlines():
+            image_name = line.strip().split('/')[-1].split('_leftImg8bit')[0]
+            txt_path = os.path.join(result_path, image_name + '.txt')
+            with open(txt_path, 'w') as f:
+                for rec in txt_results[image_name]:
+                    f.write('{} {} {:.8f}\n'.format(rec[0], rec[1], rec[2]))
+if __name__ == '__main__':
+    write_results(
+        '/results/segmentations.json',
+        os.path.join(CITYSCAPE_ROOT, 'gtFine_trainvaltest', 'imglists', 'val.txt')
+    )
\ No newline at end of file
--- a/database/mrcnn/coco/__init__.py
+++ b/database/mrcnn/coco/__init__.py
--- a/database/mrcnn/coco/make_lmdb.py
+++ b/database/mrcnn/coco/make_lmdb.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+"""Make LMDB for COCO dataset."""
+import os
+import sys
+import shutil
+sys.path.insert(0, '../../../')
+from database.mrcnn.utils.make import make_db
+from database.mrcnn.coco.make_mask import make_mask, merge_mask
+if __name__ == '__main__':
+    COCO_ROOT = '/data/coco'
+    # make RLE masks
+    if not os.path.exists('build'): os.makedirs('build')
+    make_mask('train', '2014', COCO_ROOT)
+    make_mask('valminusminival', '2014', COCO_ROOT)
+    make_mask('minival', '2014', COCO_ROOT)
+    merge_mask('trainval35k', '2014', [
+        'build/coco_2014_train_mask.pkl',
+        'build/coco_2014_valminusminival_mask.pkl'])
+    # train database: coco_2014_trainval35k
+    make_db(database_file=os.path.join(COCO_ROOT, 'cache/coco_2014_trainval35k_lmdb'),
+            images_path=[os.path.join(COCO_ROOT, 'images/train2014'),
+                         os.path.join(COCO_ROOT, 'images/val2014')],
+            splits_path=[os.path.join(COCO_ROOT, 'ImageSets'),
+                         os.path.join(COCO_ROOT, 'ImageSets')],
+            mask_file='build/coco_2014_trainval35k_mask.pkl',
+            splits=['train', 'valminusminival'])
+    # val database: coco_2014_minival
+    make_db(database_file=os.path.join(COCO_ROOT, 'cache/coco_2014_minival_lmdb'),
+            images_path=os.path.join(COCO_ROOT, 'images/val2014'),
+            mask_file='build/coco_2014_minival_mask.pkl',
+            splits_path=os.path.join(COCO_ROOT, 'ImageSets'),
+            splits=['minival'])
+    # clean!
+    shutil.rmtree('build')
\ No newline at end of file
--- a/database/mrcnn/coco/make_mask.py
+++ b/database/mrcnn/coco/make_mask.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+import os
+import sys
+import os.path as osp
+from collections import OrderedDict
+try:
+    import cPickle
+except:
+    import pickle as cPickle
+sys.path.insert(0, '../../..')
+from lib.pycocotools.coco import COCO
+from lib.pycocotools.mask_utils import mask_poly2rle
+class imdb(object):
+    def __init__(self, image_set, year, data_dir):
+        self._year = year
+        self._image_set = image_set
+        self._data_path = osp.join(data_dir)
+        self.invalid_cnt = 0
+        self.ignore_cnt = 0
+        #################
+        #    CLASSES    #
+        #################
+        # load COCO API, classes, class <-> id mappings
+        self._COCO = COCO(self._get_ann_file())
+        cats = self._COCO.loadCats(self._COCO.getCatIds())
+        self._classes = tuple(['__background__'] + [c['name'] for c in cats])
+        self._class_to_ind = dict(zip(self._classes, range(self.num_classes)))
+        self._ind_to_class = dict(zip(range(self.num_classes), self._classes))
+        self._class_to_coco_cat_id = dict(zip([c['name'] for c in cats],
+                                              self._COCO.getCatIds()))
+        self._coco_cat_id_to_class_ind = dict([(self._class_to_coco_cat_id[cls],
+                                          self._class_to_ind[cls]) for cls in self._classes[1:]])
+        #################
+        #      SET      #
+        #################
+        self._view_map = {
+            'minival2014': 'val2014',  # 5k val2014 subset
+            'valminusminival2014': 'val2014',  # val2014 \setminus minival2014
+        }
+        coco_name = image_set + year  # e.g., "val2014"
+        self._data_name = (self._view_map[coco_name]
+                           if coco_name in self._view_map else coco_name)
+        #################
+        #    IMAGES     #
+        #################
+        self._image_index = self._load_image_set_index()
+        self._annotations = self._load_annotations()
+    def _get_ann_file(self):
+        prefix = 'instances' if self._image_set.find('test') == -1 \
+            else 'image_info'
+        return osp.join(self._data_path, 'annotations',
+                        prefix + '_' + self._image_set + self._year + '.json')
+    def _load_image_set_index(self):
+        """
+        Load image ids.
+        """
+        image_ids = self._COCO.getImgIds()
+        return image_ids
+    def _load_annotations(self):
+        """
+        Load annotations.
+        """
+        annotations = [self._load_coco_annotation(index)
+                       for index in self._image_index]
+        return annotations
+    def image_path_from_index(self, index):
+        """
+        Construct an image path from the image's "index" identifier.
+        """
+        # Example image path for index=119993:
+        #   images/train2014/COCO_train2014_000000119993.jpg
+        file_name = ('COCO_' + self._data_name + '_' +
+                     str(index).zfill(12) + '.jpg')
+        image_path = osp.join(self._data_path, 'images',
+                              self._data_name, file_name)
+        assert osp.exists(image_path), \
+            'Path does not exist: {}'.format(image_path)
+        return image_path
+    def image_path_at(self, i):
+        """
+        Return the absolute path to image i in the image sequence.
+        """
+        return self.image_path_from_index(self._image_index[i])
+    def annotation_at(self, i):
+        """
+        Return the absolute path to image i in the image sequence.
+        """
+        return self._annotations[i]
+    def _load_coco_annotation(self, index):
+        """
+        Loads COCO bounding-box instance annotations. Crowd instances are
+        handled by marking their overlaps (with all categories) to -1. This
+        overlap value means that crowd "instances" are excluded from training.
+        """
+        im_ann = self._COCO.loadImgs(index)[0]
+        width = im_ann['width']
+        height = im_ann['height']
+        annIds = self._COCO.getAnnIds(imgIds=index, iscrowd=None)
+        objs = self._COCO.loadAnns(annIds)
+        # Sanitize boxes -- some are invalid
+        valid_objs = []
+        for obj in objs:
+            x1 = int(max(0, obj['bbox'][0]))
+            y1 = int(max(0, obj['bbox'][1]))
+            x2 = int(min(width - 1, x1 + max(0, obj['bbox'][2] - 1)))
+            y2 = int(min(height - 1, y1 + max(0, obj['bbox'][3] - 1)))
+            if type(obj['segmentation']) is list:
+                for p in obj['segmentation']:
+                    if len(p) < 6: print('Remove invalid segm.')
+                # Valid polygons have >= 3 points, so require >= 6 coordinates
+                obj['segmentation'] = [p for p in obj['segmentation'] if len(p) >= 6]
+                rle_masks = mask_poly2rle([obj['segmentation']], height, width)
+            else:
+                # crowd masks
+                rle_masks = [obj['segmentation']]
+            if obj['area'] > 0 and x2 > x1 and y2 > y1:
+                obj['clean_bbox'] = [x1, y1, x2, y2]
+                # Exclude the crowd masks
+                # TODO(PhyscalX): You may encounter crashes when decoding crowd masks.
+                mask = rle_masks[0] if not obj['iscrowd'] else ''
+                valid_objs.append(
+                    {'bbox': [x1, y1, x2, y2],
+                     'mask': mask,
+                     'category_id': obj['category_id'],
+                     'class_id': self._coco_cat_id_to_class_ind[obj['category_id']],
+                     'crowd': obj['iscrowd']})
+                valid_objs[-1]['name'] = self._ind_to_class[valid_objs[-1]['class_id']]
+        return height, width, valid_objs
+    @property
+    def num_images(self):
+        return len(self._image_index)
+    @property
+    def num_classes(self):
+        return len(self._classes)
+def make_mask(split, year, data_dir):
+    coco = imdb(split, year, data_dir)
+    print('Preparing to make split: {}, total {} images'.format(split, coco.num_images))
+    if not osp.exists(osp.join(coco._data_path, 'ImageSets')):
+        os.makedirs(osp.join(coco._data_path, 'ImageSets'))
+    gt_recs = OrderedDict()
+    for i in range(coco.num_images):
+        filename = (coco.image_path_at(i).split('/')[-1]).split('.')[0]
+        h, w, objects = coco.annotation_at(i)
+        gt_recs[filename] = objects
+    with open(osp.join('build',
+        'coco_' + year + '_' + split + '_mask.pkl'), 'wb') as f:
+            cPickle.dump(gt_recs, f, cPickle.HIGHEST_PROTOCOL)
+    with open(osp.join(coco._data_path, 'ImageSets', split + '.txt'), 'w') as f:
+        for i in range(coco.num_images):
+            filename = (coco.image_path_at(i).split('/')[-1]).split('.')[0]
+            if i != coco.num_images - 1: filename += '\n'
+            f.write(filename)
+def merge_mask(split, year, mask_files):
+    gt_recs = OrderedDict()
+    data_path = os.path.dirname(mask_files[0])
+    for mask_file in mask_files:
+        with open(mask_file, 'rb') as f:
+            recs = cPickle.load(f)
+            gt_recs.update(recs)
+    with open(osp.join(data_path,
+        'coco_' + year + '_' + split + '_mask.pkl'), 'wb') as f:
+            cPickle.dump(gt_recs, f, cPickle.HIGHEST_PROTOCOL)
\ No newline at end of file
--- a/database/mrcnn/coco/make_test.py
+++ b/database/mrcnn/coco/make_test.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+import os
+import sys
+import time
+import json
+import cv2
+from dragon.tools.db import LMDB, wrapper_str
+sys.path.insert(0, '../../../')
+import database.mrcnn.utils.anno_pb2 as pb
+IMAGE_INFO = '/data/image_info_test-dev2017.json'
+def load_image_list(image_info):
+    num_images = len(image_info['images'])
+    image_list = []
+    print('The split has {} images.'.format(num_images))
+    for image in image_info['images']:
+        image_list.append(image['file_name'])
+    return image_list
+def make_datum(image_file):
+    anno_datum = pb.AnnotatedDatum()
+    datum = pb.Datum()
+    im = cv2.imread(image_file)
+    datum.height, datum.width, datum.channels = im.shape
+    datum.encoded = True
+    if datum.encoded:
+        result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
+    datum.data = im.tostring()
+    anno_datum.datum.CopyFrom(datum)
+    anno_datum.filename = os.path.split(image_file)[-1]
+    return anno_datum
+def make_db(database_file, images_path, image_list):
+    if os.path.isdir(database_file) is True:
+        raise ValueError('The database path is already exist.')
+    else:
+        root_dir = database_file[:database_file.rfind('/')]
+        if not os.path.exists(root_dir):
+            os.makedirs(root_dir)
+    print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
+    db = LMDB(max_commit=10000)
+    db.open(database_file, mode='w')
+    count = 0
+    start_time = time.time()
+    zfill_flag = '{0:0%d}' % (8)
+    for image_file in image_list:
+        count += 1
+        if count % 10000 == 0:
+            now_time = time.time()
+            print('{0} / {1} in {2:.2f} sec'.format(
+                count, len(image_list), now_time - start_time))
+            db.commit()
+        datum = make_datum(os.path.join(images_path, image_file))
+        db.put(zfill_flag.format(count - 1), datum.SerializeToString())
+    now_time = time.time()
+    print('{0} / {1} in {2:.2f} sec'.format(count, len(image_list), now_time - start_time))
+    db.commit()
+    db.close()
+    end_time = time.time()
+    print('{0} images have been stored in the database.'.format(len(image_list)))
+    print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time))
+    print('The size of database is {0} MB.'.format(
+        float(os.path.getsize(database_file + '/data.mdb') / 1000 / 1000)))
+if __name__ == '__main__':
+    image_info = json.load(open(IMAGE_INFO, 'r'))
+    image_list = load_image_list(image_info)
+    make_db('/data/coco_2017_test-dev_lmdb',
+            '/data/test2017', image_list)
\ No newline at end of file
--- a/database/mrcnn/utils/__init__.py
+++ b/database/mrcnn/utils/__init__.py
+# --------------------------------------------------------
+# FPN @ Dragon
+# Copyright(c) 2017 SeetaTech
+# Written by Ting Pan
+# --------------------------------------------------------
+from .make import set_zfill, set_quality, make_db
\ No newline at end of file
--- a/database/mrcnn/utils/anno.proto
+++ b/database/mrcnn/utils/anno.proto
+syntax = "proto2";
+message Datum {
+  optional int32 channels = 1;
+  optional int32 height = 2;
+  optional int32 width = 3;
+  optional bytes data = 4;
+  optional int32 label = 5;
+  repeated float float_data = 6;
+  optional bool encoded = 7 [default = false];
+  repeated int32 labels = 8;
+}
+message Annotation {
+  optional float x1 = 1;
+  optional float y1 = 2;
+  optional float x2 = 3;
+  optional float y2 = 4;
+  optional string name = 5;
+  optional bool difficult = 6 [default = false];
+  optional string mask = 7;
+}
+message AnnotatedDatum {
+  optional Datum datum = 1;
+  optional string filename = 2;
+  repeated Annotation annotation = 3;
+}
--- a/database/mrcnn/utils/anno_pb2.py
+++ b/database/mrcnn/utils/anno_pb2.py
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: anno.proto
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+_sym_db = _symbol_database.Default()
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='anno.proto',
+  package='',
+  serialized_pb=_b('\n\nanno.proto\"\x91\x01\n\x05\x44\x61tum\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\r\n\x05width\x18\x03 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\x0c\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x12\n\nfloat_data\x18\x06 \x03(\x02\x12\x16\n\x07\x65ncoded\x18\x07 \x01(\x08:\x05\x66\x61lse\x12\x0e\n\x06labels\x18\x08 \x03(\x05\"r\n\nAnnotation\x12\n\n\x02x1\x18\x01 \x01(\x02\x12\n\n\x02y1\x18\x02 \x01(\x02\x12\n\n\x02x2\x18\x03 \x01(\x02\x12\n\n\x02y2\x18\x04 \x01(\x02\x12\x0c\n\x04name\x18\x05 \x01(\t\x12\x18\n\tdifficult\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x0c\n\x04mask\x18\x07 \x01(\t\"Z\n\x0e\x41nnotatedDatum\x12\x15\n\x05\x64\x61tum\x18\x01 \x01(\x0b\x32\x06.Datum\x12\x10\n\x08\x66ilename\x18\x02 \x01(\t\x12\x1f\n\nannotation\x18\x03 \x03(\x0b\x32\x0b.Annotation')
+)
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+_DATUM = _descriptor.Descriptor(
+  name='Datum',
+  full_name='Datum',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='channels', full_name='Datum.channels', index=0,
+      number=1, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='height', full_name='Datum.height', index=1,
+      number=2, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='width', full_name='Datum.width', index=2,
+      number=3, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='data', full_name='Datum.data', index=3,
+      number=4, type=12, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b(""),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='label', full_name='Datum.label', index=4,
+      number=5, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='float_data', full_name='Datum.float_data', index=5,
+      number=6, type=2, cpp_type=6, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='encoded', full_name='Datum.encoded', index=6,
+      number=7, type=8, cpp_type=7, label=1,
+      has_default_value=True, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='labels', full_name='Datum.labels', index=7,
+      number=8, type=5, cpp_type=1, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=15,
+  serialized_end=160,
+)
+_ANNOTATION = _descriptor.Descriptor(
+  name='Annotation',
+  full_name='Annotation',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='x1', full_name='Annotation.x1', index=0,
+      number=1, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='y1', full_name='Annotation.y1', index=1,
+      number=2, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='x2', full_name='Annotation.x2', index=2,
+      number=3, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='y2', full_name='Annotation.y2', index=3,
+      number=4, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='name', full_name='Annotation.name', index=4,
+      number=5, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='difficult', full_name='Annotation.difficult', index=5,
+      number=6, type=8, cpp_type=7, label=1,
+      has_default_value=True, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='mask', full_name='Annotation.mask', index=6,
+      number=7, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=162,
+  serialized_end=276,
+)
+_ANNOTATEDDATUM = _descriptor.Descriptor(
+  name='AnnotatedDatum',
+  full_name='AnnotatedDatum',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='datum', full_name='AnnotatedDatum.datum', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='filename', full_name='AnnotatedDatum.filename', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='annotation', full_name='AnnotatedDatum.annotation', index=2,
+      number=3, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=278,
+  serialized_end=368,
+)
+_ANNOTATEDDATUM.fields_by_name['datum'].message_type = _DATUM
+_ANNOTATEDDATUM.fields_by_name['annotation'].message_type = _ANNOTATION
+DESCRIPTOR.message_types_by_name['Datum'] = _DATUM
+DESCRIPTOR.message_types_by_name['Annotation'] = _ANNOTATION
+DESCRIPTOR.message_types_by_name['AnnotatedDatum'] = _ANNOTATEDDATUM
+Datum = _reflection.GeneratedProtocolMessageType('Datum', (_message.Message,), dict(
+  DESCRIPTOR = _DATUM,
+  __module__ = 'anno_pb2'
+  # @@protoc_insertion_point(class_scope:Datum)
+  ))
+_sym_db.RegisterMessage(Datum)
+Annotation = _reflection.GeneratedProtocolMessageType('Annotation', (_message.Message,), dict(
+  DESCRIPTOR = _ANNOTATION,
+  __module__ = 'anno_pb2'
+  # @@protoc_insertion_point(class_scope:Annotation)
+  ))
+_sym_db.RegisterMessage(Annotation)
+AnnotatedDatum = _reflection.GeneratedProtocolMessageType('AnnotatedDatum', (_message.Message,), dict(
+  DESCRIPTOR = _ANNOTATEDDATUM,
+  __module__ = 'anno_pb2'
+  # @@protoc_insertion_point(class_scope:AnnotatedDatum)
+  ))
+_sym_db.RegisterMessage(AnnotatedDatum)
+# @@protoc_insertion_point(module_scope)
--- a/database/mrcnn/utils/make.py
+++ b/database/mrcnn/utils/make.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+import os
+import time
+import cv2
+try:
+    import cPickle
+except:
+    import pickle as cPickle
+from dragon.tools.db import LMDB
+from . import anno_pb2 as pb
+ZFILL = 8
+ENCODE_QUALITY = 95
+def set_zfill(value):
+    global ZFILL
+    ZFILL = value
+def set_quality(value):
+    global ENCODE_QUALITY
+    ENCODE_QUALITY = value
+def make_datum(image_file, mask_objects, im_scale=None):
+    filename = os.path.split(image_file)[-1]
+    anno_datum = pb.AnnotatedDatum()
+    datum = pb.Datum()
+    im = cv2.imread(image_file)
+    if im_scale: im = cv2.resize(im, None,
+        fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
+    datum.height, datum.width, datum.channels = im.shape
+    datum.encoded = ENCODE_QUALITY != 100
+    if datum.encoded:
+        result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY])
+    datum.data = im.tostring()
+    anno_datum.datum.CopyFrom(datum)
+    anno_datum.filename = filename.split('.')[0]
+    for ix, obj in enumerate(mask_objects):
+        anno = pb.Annotation()
+        x1, y1, x2, y2 = obj['bbox']
+        anno.name = obj['name']
+        anno.x1, anno.y1, anno.x2, anno.y2 = x1, y1, x2, y2
+        if 'difficult' in obj: anno.difficult = obj['difficult']
+        if 'crowd' in obj: anno.difficult = obj['crowd']
+        anno.mask = obj['mask']
+        anno_datum.annotation.add().CopyFrom(anno)
+    return anno_datum
+def make_db(database_file, images_path, mask_file,
+            splits_path, splits, ext='.jpg', im_scale=None):
+    if os.path.isdir(database_file) is True:
+        raise ValueError('The database path is already exist.')
+    else:
+        root_dir = database_file[:database_file.rfind('/')]
+        if not os.path.exists(root_dir):
+            os.makedirs(root_dir)
+    if not isinstance(images_path, list):
+        images_path = [images_path]
+    if not isinstance(splits_path, list):
+        splits_path = [splits_path]
+    assert len(splits) == len(splits_path)
+    assert len(splits) == len(images_path)
+    if mask_file is not None:
+        with open(mask_file, 'rb') as f:
+            all_masks = cPickle.load(f)
+    else:
+        all_masks = {}
+    print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
+    db = LMDB(max_commit=10000)
+    db.open(database_file, mode='w')
+    count = 0
+    total_line = 0
+    start_time = time.time()
+    zfill_flag = '{0:0%d}' % (ZFILL)
+    for db_idx, split in enumerate(splits):
+        split_file = os.path.join(splits_path[db_idx], split + '.txt')
+        assert os.path.exists(split_file)
+        with open(split_file, 'r') as f:
+            lines = f.readlines()
+            total_line += len(lines)
+        for line in lines:
+            count += 1
+            if count % 10000 == 0:
+                now_time = time.time()
+                print('{0} / {1} in {2:.2f} sec'.format(
+                    count, total_line, now_time - start_time))
+                db.commit()
+            filename = line.strip()
+            image_file = os.path.join(images_path[db_idx], filename + ext)
+            mask_objects = all_masks[filename] if filename in all_masks else None
+            if mask_objects is None:
+                raise ValueError('The image({}) takes invalid mask settings.'.format(filename))
+            datum = make_datum(image_file, mask_objects, im_scale)
+            db.put(zfill_flag.format(count - 1), datum.SerializeToString())
+    now_time = time.time()
+    print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
+    db.commit()
+    db.close()
+    # Compress the empty space
+    db.open(database_file, mode='w')
+    db.commit()
+    end_time = time.time()
+    print('{0} images have been stored in the database.'.format(total_line))
+    print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time))
+    print('The size of database is {0} MB.'.format(
+        float(os.path.getsize(database_file + '/data.mdb') / 1000 / 1000)))
\ No newline at end of file
--- a/database/mrcnn/utils/process_pool.py
+++ b/database/mrcnn/utils/process_pool.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+"""A simple process pool to map tasks."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import multiprocessing
+class ProcessPool(object):
+    def __init__(self, num_processes=8, max_qsize=100):
+        self.num_tasks = self.fetch_tasks = 0
+        self.num_processes = num_processes
+        self.Q = multiprocessing.Queue(max_qsize)
+    def __enter__(self):
+        return self
+    def __exit__(self, *excinfo):
+        pass
+    def map(self, tasks, func):
+        n_tasks_each = int(len(tasks) / self.num_processes)
+        remain_tasks = len(tasks) - n_tasks_each * self.num_processes
+        pos = 0
+        for i in range(self.num_processes):
+            if i != self.num_processes - 1:
+                work_set = tasks[pos: pos + n_tasks_each]
+                pos += n_tasks_each
+            else:
+                work_set = tasks[pos: pos + n_tasks_each + remain_tasks]
+            print('[Main]: Process #{} Got {} tasks.'.format(i, len(work_set)))
+            p = multiprocessing.Process(target=func, args=(work_set, self.Q))
+            p.start()
+    def wait(self):
+        displays = {}
+        while True:
+            qsize = self.Q.qsize()
+            if qsize == self.num_tasks: break
+            if qsize > 0 and qsize % 100 == 0:
+                if qsize not in displays:
+                    displays[qsize] = True
+                    print('[Queue]: Cached {} tasks.'.format(qsize))
+        outputs = []
+        while self.Q.qsize() > 0:
+            outputs.append(self.Q.get())
+        assert len(outputs) == self.num_tasks
+        print('[Main]: Got {} outputs.'.format(len(outputs)))
+        return outputs
+    def get(self):
+        self.fetch_tasks += 1
+        if self.fetch_tasks > self.num_tasks:
+            return None
+        return self.Q.get()
+    def run_all(self, tasks, func):
+        self.num_tasks = len(tasks)
+        self.map(tasks, func)
+        self.wait()
+    def run(self, tasks, func):
+        self.num_tasks = len(tasks)
+        self.map(tasks, func)
\ No newline at end of file
--- a/lib/__init__.py
+++ b/lib/__init__.py
+# --------------------------------------------------------
+# Detectron
+# Copyright(c) 2017 SeetaTech
+# Written by Ting Pan
+# --------------------------------------------------------
\ No newline at end of file
--- a/lib/core/__init__.py
+++ b/lib/core/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
--- a/lib/core/config.py
+++ b/lib/core/config.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/facebookresearch/Detectron/blob/master/lib/core/config.py>
+#
+# ------------------------------------------------------------
+import os.path as osp
+import numpy as np
+from lib.utils.attrdict import AttrDict as edict
+__C = edict()
+cfg = __C
+###########################################
+#                                         #
+#            Training Options             #
+#                                         #
+###########################################
+__C.TRAIN = edict()
+# Initialize network with weights from this file
+__C.TRAIN.WEIGHTS = ''
+# Database to train
+__C.TRAIN.DATABASE = ''
+# Scales to use during training (can list multiple scales)
+# Each scale is the pixel size of an image's shortest side
+__C.TRAIN.SCALES = (600,)
+# Max pixel size of the longest side of a scaled input image
+# A square will be used if value < 1
+__C.TRAIN.MAX_SIZE = 1000
+# Images to use per mini-batch
+__C.TRAIN.IMS_PER_BATCH = 1
+# Minibatch size (number of regions of interest [ROIs])
+__C.TRAIN.BATCH_SIZE = 128
+# Fraction of minibatch that is labeled foreground (i.e. class > 0)
+__C.TRAIN.FG_FRACTION = 0.25
+# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
+__C.TRAIN.FG_THRESH = 0.5
+# Overlap threshold for a ROI to be considered background (class = 0 if
+# overlap in [LO, HI))
+__C.TRAIN.BG_THRESH_HI = 0.5
+__C.TRAIN.BG_THRESH_LO = 0.0
+# Use shuffle after each epoch
+__C.TRAIN.USE_SHUFFLE = True
+# Use horizontally-flipped images during training?
+__C.TRAIN.USE_FLIPPED = True
+# Use the difficult(under occlusion) objects
+__C.TRAIN.USE_DIFF = True
+# Overlap required between a ROI and ground-truth box in order for that ROI to
+# be used as a bounding-box regression training example
+__C.TRAIN.BBOX_THRESH = 0.5
+# If True, randomly scale the image by scale range
+__C.TRAIN.SCALE_JITTERING = False
+__C.TRAIN.SCALE_RANGE = [0.75, 1.0]
+# If True, randomly distort the image by brightness, contrast, and saturation
+__C.TRAIN.COLOR_JITTERING = False
+# IOU >= thresh: positive example
+__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
+# IOU < thresh: negative example
+__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
+# If an anchor statisfied by positive and negative conditions set to negative
+__C.TRAIN.RPN_CLOBBER_POSITIVES = False
+# Max number of foreground examples
+__C.TRAIN.RPN_FG_FRACTION = 0.5
+# Total number of examples
+__C.TRAIN.RPN_BATCHSIZE = 256
+# NMS threshold used on RPN proposals
+__C.TRAIN.RPN_NMS_THRESH = 0.7
+# Number of top scoring boxes to keep before apply NMS to RPN proposals
+__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
+# Number of top scoring boxes to keep after applying NMS to RPN proposals
+__C.TRAIN.RPN_POST_NMS_TOP_N = 2000
+# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
+__C.TRAIN.RPN_MIN_SIZE = 0
+# Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels
+# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
+__C.TRAIN.RPN_STRADDLE_THRESH = 0
+# Resume from the last checkpoint?
+__C.TRAIN.RESUME = False
+###########################################
+#                                         #
+#            Testing Options              #
+#                                         #
+###########################################
+__C.TEST = edict()
+# Database to test
+__C.TEST.DATABASE = ''
+# Original json ground-truth file to use
+# Records in the Database file will be used instead
+__C.TEST.JSON_FILE = ''
+# Scales to use during testing (can list multiple scales)
+# Each scale is the pixel size of an image's shortest side
+__C.TEST.SCALES = (600,)
+# Max pixel size of the longest side of a scaled input image
+# A square will be used if value < 1
+__C.TEST.MAX_SIZE = 1000
+# Images to use per mini-batch
+__C.TEST.IMS_PER_BATCH = 1
+# Overlap threshold used for non-maximum suppression (suppress boxes with
+# IoU >= this threshold)
+__C.TEST.NMS = 0.3
+# Use Soft-NMS instead of standard NMS?
+# For the soft NMS overlap threshold, we simply use TEST.NMS
+__C.TEST.USE_SOFT_NMS = False
+__C.TEST.SOFT_NMS_METHOD = 'linear'
+__C.TEST.SOFT_NMS_SIGMA = 0.5
+# The top-k prior boxes before nms.
+__C.TEST.NMS_TOP_K = 400
+# The threshold for predicting boxes
+__C.TEST.SCORE_THRESH = 0.05
+# The threshold for predicting masks
+__C.TEST.BINARY_THRESH = 0.5
+## NMS threshold used on RPN proposals
+__C.TEST.RPN_NMS_THRESH = 0.7
+## Number of top scoring boxes to keep before apply NMS to RPN proposals
+__C.TEST.RPN_PRE_NMS_TOP_N = 6000
+## Number of top scoring boxes to keep after applying NMS to RPN proposals
+__C.TEST.RPN_POST_NMS_TOP_N = 300
+# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
+__C.TEST.RPN_MIN_SIZE = 0
+# Save detection results files if True
+# If false, results files are cleaned up (they can be large) after local
+# evaluation
+__C.TEST.COMPETITION_MODE = True
+# The optional test protocol for custom dataSet
+# Ignored by VOC, COCO dataSets
+# Available protocols: 'voc2007', 'voc2010', 'coco'
+__C.TEST.PROTOCOL = 'voc2007'
+# Maximum number of detections to return per image (100 is based on the limit
+# established for the COCO dataset)
+__C.TEST.DETECTIONS_PER_IM = 100
+###########################################
+#                                         #
+#              Model Options              #
+#                                         #
+###########################################
+__C.MODEL = edict()
+# The type of the model
+# ('faster_rcnn',
+#  'mask_rcnn',
+#  'ssd',
+#  'rssd',
+#  'retinanet,
+# )
+__C.MODEL.TYPE = ''
+# The float precision for training and inference
+# (FLOAT32, FLOAT16,)
+__C.MODEL.DATA_TYPE= 'FLOAT32'
+# The backbone
+__C.MODEL.BACKBONE = ''
+# The number of classes in the dataset
+__C.MODEL.NUM_CLASSES = -1
+# Keep it for TaaS DataSet
+__C.MODEL.CLASSES = ['__background__']
+# Add StopGrad at a specified stage so the bottom layers are frozen
+__C.MODEL.FREEZE_AT = 2
+# Whether to use bias prior to improve the one-stage detector?
+# Enabled if model type in ('ssd',)
+# Retinanet is force to use bias prior
+__C.MODEL.USE_BIAS_PRIOR = False
+# Whether to use focal loss for one-stage detectors?
+# Enabled if model type in ('ssd',)
+# Retinanet is force to use focal loss
+__C.MODEL.USE_FOCAL_LOSS = False
+__C.MODEL.FOCAL_LOSS_ALPHA = 0.25
+__C.MODEL.FOCAL_LOSS_GAMMA = 2.0
+# Stride of the coarsest Feature level
+# This is needed so the input can be padded properly
+__C.MODEL.COARSEST_STRIDE = -1
+###########################################
+#                                         #
+#              RPN Options                #
+#                                         #
+###########################################
+__C.RPN = edict()
+# Strides for multiple rpn heads
+__C.RPN.STRIDES = [4, 8, 16, 32, 64]
+# Scales for multiple anchors
+__C.RPN.SCALES = [8, 8, 8, 8, 8]
+# RPN anchor aspect ratios
+__C.RPN.ASPECT_RATIOS = [0.5, 1, 2]
+###########################################
+#                                         #
+#           Retina-Net Options            #
+#                                         #
+###########################################
+__C.RETINANET = edict()
+# Anchor aspect ratios to use
+__C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0)
+# Anchor scales per octave
+__C.RETINANET.SCALES_PER_OCTAVE = 3
+# At each FPN level, we generate anchors based on their scale, aspect_ratio,
+# stride of the level, and we multiply the resulting anchor by ANCHOR_SCALE
+__C.RETINANET.ANCHOR_SCALE = 4
+# Convolutions to use in the cls and bbox tower
+# NOTE: this doesn't include the last conv for logits
+__C.RETINANET.NUM_CONVS = 4
+# During inference, #locs to select based on cls score before NMS is performed
+__C.RETINANET.PRE_NMS_TOP_N = 1000
+# IoU overlap ratio for labeling an anchor as positive
+# Anchors with >= iou overlap are labeled positive
+__C.RETINANET.POSITIVE_OVERLAP = 0.5
+# IoU overlap ratio for labeling an anchor as negative
+# Anchors with < iou overlap are labeled negative
+__C.RETINANET.NEGATIVE_OVERLAP = 0.4
+# Whether softmax should be used in classification branch training
+__C.RETINANET.SOFTMAX = False
+###########################################
+#                                         #
+#              FPN Options                #
+#                                         #
+###########################################
+__C.FPN = edict()
+# Coarsest level of the FPN pyramid
+__C.FPN.RPN_MAX_LEVEL = 6
+# Finest level of the FPN pyramid
+__C.FPN.RPN_MIN_LEVEL = 2
+# Hyper-Parameters for the RoI-to-FPN level mapping heuristic
+__C.FPN.ROI_CANONICAL_SCALE = 224
+__C.FPN.ROI_CANONICAL_LEVEL = 4
+# Coarsest level of the FPN pyramid
+__C.FPN.ROI_MAX_LEVEL = 5
+# Finest level of the FPN pyramid
+__C.FPN.ROI_MIN_LEVEL = 2
+###########################################
+#                                         #
+#           Fast R-CNN Options            #
+#                                         #
+###########################################
+__C.FRCNN = edict()
+# RoI transformation function (e.g., RoIPool or RoIAlign)
+__C.FRCNN.ROI_XFORM_METHOD = 'RoIPool'
+# Hidden layer dimension when using an MLP for the RoI box head
+__C.FRCNN.MLP_HEAD_DIM = 1024
+# RoI transform output resolution
+# Note: some models may have constraints on what they can use, e.g. they use
+# pretrained FC layers like in VGG16, and will ignore this option
+__C.FRCNN.ROI_XFORM_RESOLUTION = 7
+###########################################
+#                                         #
+#           Mask R-CNN Options            #
+#                                         #
+###########################################
+__C.MRCNN = edict()
+# Resolution of mask predictions
+__C.MRCNN.RESOLUTION = 28
+# RoI transformation function (e.g., RoIPool or RoIAlign)
+__C.MRCNN.ROI_XFORM_METHOD = 'RoIAlign'
+# RoI transformation function (e.g., RoIPool or RoIAlign)
+__C.MRCNN.ROI_XFORM_RESOLUTION = 14
+###########################################
+#                                         #
+#               SSD Options               #
+#                                         #
+###########################################
+__C.SSD = edict()
+# Whether to enable FPN enhancement?
+__C.SSD.FPN_ON = False
+__C.SSD.MULTIBOX = edict()
+# MultiBox configs
+__C.SSD.MULTIBOX.STRIDES = []
+__C.SSD.MULTIBOX.MIN_SIZES = []
+__C.SSD.MULTIBOX.MAX_SIZES = []
+__C.SSD.MULTIBOX.ASPECT_RATIOS = []
+__C.SSD.MULTIBOX.ASPECT_ANGLES = []
+__C.SSD.OHEM = edict()
+# The threshold for selecting negative bbox in hard example mining
+__C.SSD.OHEM.NEG_OVERLAP = 0.5
+# The ratio used in hard example mining
+__C.SSD.OHEM.NEG_POS_RATIO = 3.0
+# Distort the image?
+__C.SSD.DISTORT = edict()
+__C.SSD.DISTORT.BRIGHTNESS_PROB = 0.5
+__C.SSD.DISTORT.CONTRAST_PROB = 0.5
+__C.SSD.DISTORT.SATURATION_PROB = 0.5
+# Expand the image?
+__C.SSD.EXPAND = edict()
+__C.SSD.EXPAND.PROB = 0.5
+__C.SSD.EXPAND.MAX_RATIO = 4.0
+# Resize the image?
+__C.SSD.RESIZE = edict()
+__C.SSD.RESIZE.HEIGHT = 300
+__C.SSD.RESIZE.WIDTH = 300
+__C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
+# Samplers
+# Format as (min_scale, max_scale,
+#            min_aspect_ratio, max_aspect_ratio,
+#            min_jaccard_overlap, max_jaccard_overlap,
+#            max_trials, max_sample)
+__C.SSD.SAMPLERS = [
+    (1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1, 1),  # Entire image
+    (0.3, 1.0, 0.5, 2.0, 0.1, 1.0, 10, 1), # IoU >= 0.1
+    (0.3, 1.0, 0.5, 2.0, 0.3, 1.0, 10, 1), # IoU >= 0.3
+    (0.3, 1.0, 0.5, 2.0, 0.5, 1.0, 5, 1),  # IoU >= 0.5
+    (0.3, 1.0, 0.5, 2.0, 0.7, 1.0, 5, 1),  # IoU >= 0.7
+    (0.3, 1.0, 0.5, 2.0, 0.9, 1.0, 5, 1),  # IoU >= 0.9
+    (0.3, 1.0, 0.5, 2.0, 0.0, 1.0, 1, 1),  # Any patches
+]
+###########################################
+#                                         #
+#             ResNet Options              #
+#                                         #
+###########################################
+__C.RESNET = edict()
+# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
+__C.RESNET.NUM_GROUPS = 1
+# Baseline width of each group
+__C.RESNET.GROUP_WIDTH = 64
+###########################################
+#                                         #
+#            DropBlock Options            #
+#                                         #
+###########################################
+__C.DROPBLOCK = edict()
+# Whether to use drop block for more regulization
+__C.DROPBLOCK.DROP_ON = False
+# Decrement for scheduling keep prob after each iteration
+__C.DROPBLOCK.DECREMENT = 1e-6
+###########################################
+#                                         #
+#             Solver Options              #
+#                                         #
+###########################################
+__C.SOLVER = edict()
+# Base learning rate for the specified schedule
+__C.SOLVER.BASE_LR = 0.001
+# Optional scaling factor for total loss
+# This option is helpful to scale the magnitude
+# of gradients during FP16 training
+__C.SOLVER.LOSS_SCALING = 1.
+# Schedule type (see functions in utils.lr_policy for options)
+# E.g., 'step', 'steps_with_decay', ...
+__C.SOLVER.LR_POLICY = 'steps_with_decay'
+# Hyperparameter used by the specified policy
+# For 'step', the current LR is multiplied by SOLVER.GAMMA at each step
+__C.SOLVER.GAMMA = 0.1
+# Uniform step size for 'steps' policy
+__C.SOLVER.STEP_SIZE = 30000
+__C.SOLVER.STEPS = []
+# Maximum number of SGD iterations
+__C.SOLVER.MAX_ITERS = 40000
+# Momentum to use with SGD
+__C.SOLVER.MOMENTUM = 0.9
+# L2 regularization hyper parameters
+__C.SOLVER.WEIGHT_DECAY = 0.0005
+# L2 norm factor for clipping gradients
+__C.SOLVER.CLIP_NORM = -1.0
+# Warm up to SOLVER.BASE_LR over this number of SGD iterations
+__C.SOLVER.WARM_UP_ITERS = 500
+# Start the warm up from SOLVER.BASE_LR * SOLVER.WARM_UP_FACTOR
+__C.SOLVER.WARM_UP_FACTOR = 1.0 / 3.0
+# The steps for accumulating gradients
+__C.SOLVER.ITER_SIZE = 1
+# The interval to display logs
+__C.SOLVER.DISPLAY = 20
+# The interval to snapshot a model
+__C.SOLVER.SNAPSHOT_ITERS = 5000
+# prefix to yield the path: <prefix>_iters_XYZ.caffemodel
+__C.SOLVER.SNAPSHOT_PREFIX = ''
+###########################################
+#                                         #
+#               Misc Options              #
+#                                         #
+###########################################
+# Number of GPUs to use (applies to both training and testing)
+__C.NUM_GPUS = 1
+# Use NCCL for all reduce, otherwise use cuda-aware mpi
+__C.USE_NCCL = True
+# Hosts for Inter-Machine communication
+__C.HOSTS = []
+# Pixel mean values (BGR order) as a (1, 1, 3) array
+# We use the same pixel mean for all networks even though it's not exactly what
+# they were trained with
+__C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
+# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
+# These are empirically chosen to approximately lead to unit variance targets
+__C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
+# Default weights on (dx, dy, dw, dh, da) for normalizing rbox regression targets
+__C.RBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0, 10.0)
+# Clip bounding box transformation predictions to prevent np.exp from
+# overflowing
+# Heuristic choice based on that would scale a 16 pixel anchor up to 1000 pixels
+__C.BBOX_XFORM_CLIP = np.log(1000. / 16.)
+# Clip ?
+__C.USE_XFORM_CLIP = False
+# Prior prob for the positives at the beginning of training.
+# This is used to set the bias init for the logits layer
+__C.PRIOR_PROB = 0.01
+# For reproducibility
+__C.RNG_SEED = 3
+# Root directory of project
+__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
+# Data directory
+__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
+# Place outputs under an experiments directory
+__C.EXP_DIR = ''
+# Use GPU implementation of non-maximum suppression
+__C.USE_GPU_NMS = True
+# Default GPU device id
+__C.GPU_ID = 0
+# Dump detection visualizations
+__C.VIS = False
+__C.VIS_ON_FILE = False
+# Score threshold for visualization
+__C.VIS_TH = 0.7
+# Write summaries by tensor board
+__C.ENABLE_TENSOR_BOARD = False
+def _merge_a_into_b(a, b):
+    """Merge config dictionary a into config dictionary b, clobbering the
+    options in b whenever they are also specified in a.
+    """
+    if not isinstance(a, dict): return
+    for k, v in a.items():
+        # a must specify keys that are in b
+        if not k in b:
+            raise KeyError('{} is not a valid config key'.format(k))
+        # the types must match, too
+        v = _check_and_coerce_cfg_value_type(v, b[k], k)
+        # recursively merge dicts
+        if type(v) is edict:
+            try:
+                _merge_a_into_b(a[k], b[k])
+            except:
+                print('Error under config key: {}'.format(k))
+                raise
+        else:
+            b[k] = v
+def cfg_from_file(filename):
+    """Load a config file and merge it into the default options."""
+    import yaml
+    with open(filename, 'r') as f:
+        yaml_cfg = edict(yaml.load(f))
+    global __C
+    _merge_a_into_b(yaml_cfg, __C)
+def cfg_from_list(cfg_list):
+    """Set config keys via list (e.g., from command line)."""
+    from ast import literal_eval
+    assert len(cfg_list) % 2 == 0
+    for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
+        key_list = k.split('.')
+        d = __C
+        for subkey in key_list[:-1]:
+            assert d.has_key(subkey)
+            d = d[subkey]
+        subkey = key_list[-1]
+        assert d.has_key(subkey)
+        try:
+            value = literal_eval(v)
+        except:
+            # handle the case when v is a string literal
+            value = v
+        assert type(value) == type(d[subkey]), \
+            'type {} does not match original type {}'.format(
+            type(value), type(d[subkey]))
+        d[subkey] = value
+def _check_and_coerce_cfg_value_type(value_a, value_b, key):
+    """Checks that `value_a`, which is intended to replace `value_b` is of the
+    right type. The type is correct if it matches exactly or is one of a few
+    cases in which the type can be easily coerced.
+    """
+    # The types must match (with some exceptions)
+    type_b = type(value_b)
+    type_a = type(value_a)
+    if type_a is type_b: return value_a
+    if type_b is float and type_a is int: return float(value_a)
+    # Exceptions: numpy arrays, strings, tuple<->list
+    if isinstance(value_b, np.ndarray):
+        value_a = np.array(value_a, dtype=value_b.dtype)
+    elif isinstance(value_a, tuple) and isinstance(value_b, list):
+        value_a = list(value_a)
+    elif isinstance(value_a, list) and isinstance(value_b, tuple):
+        value_a = tuple(value_a)
+    elif isinstance(value_a, dict) and isinstance(value_b, edict):
+        value_a = edict(value_a)
+    else:
+        raise ValueError(
+            'Type mismatch ({} vs. {}) with values ({} vs. {}) for config '
+            'key: {}'.format(type_b, type_a, value_b, value_a, key)
+        )
+    return value_a
\ No newline at end of file
--- a/lib/core/coordinator.py
+++ b/lib/core/coordinator.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import shutil
+import time
+import numpy as np
+from lib.core.config import cfg, cfg_from_file
+class Coordinator(object):
+    """Coordinator is a simple tool to manage the
+     unique experiments from the YAML configurations.
+    """
+    def __init__(self, cfg_file, exp_dir=None):
+        # Override the default configs
+        cfg_from_file(cfg_file)
+        if cfg.EXP_DIR != '':
+            exp_dir = cfg.EXP_DIR
+        if exp_dir is None:
+            model_id = time.strftime(
+                '%Y%m%d_%H%M%S', time.localtime(time.time()))
+            self.experiment_dir = '../experiments/{}'.format(model_id)
+            if not os.path.exists(self.experiment_dir):
+                os.makedirs(self.experiment_dir)
+        else:
+            if not os.path.exists(exp_dir):
+                raise ValueError('ExperimentDir({}) does not exist.'.format(exp_dir))
+            self.experiment_dir = exp_dir
+    def _path_at(self, file, auto_create=True):
+        path = os.path.abspath(os.path.join(self.experiment_dir, file))
+        if auto_create and not os.path.exists(path): os.makedirs(path)
+        return path
+    def checkpoints_dir(self):
+        return self._path_at('checkpoints')
+    def exports_dir(self):
+        return self._path_at('exports')
+    def results_dir(self, checkpoint=None):
+        sub_dir = os.path.splitext(os.path.basename(checkpoint))[0] if checkpoint else ''
+        return self._path_at(os.path.join('results', sub_dir))
+    def checkpoint(self, global_step=None, wait=True):
+        def locate():
+            files = os.listdir(self.checkpoints_dir())
+            steps = []
+            for ix, file in enumerate(files):
+                step = int(file.split('_iter_')[-1].split('.')[0])
+                if global_step == step: return os.path.join(self.checkpoints_dir(), files[ix])
+                steps.append(step)
+            if global_step is None:
+                if len(files) == 0:
+                    raise ValueError('Dir({}) is empty.'.format(self.checkpoints_dir()))
+                last_idx = int(np.argmax(steps)); last_step = steps[last_idx]
+                return os.path.join(self.checkpoints_dir(), files[last_idx])
+            return None
+        result = locate()
+        while not result and wait:
+            print('\rWaiting for step_{}.checkpoint to exist...'.format(global_step), end='')
+            time.sleep(10)
+            result = locate()
+        return result
+    def delete_experiment(self):
+        if os.path.exists(self.experiment_dir):
+            shutil.rmtree(self.experiment_dir)
\ No newline at end of file
--- a/lib/core/solver.py
+++ b/lib/core/solver.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import time
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.modeling.detector import Detector
+from lib.utils import logger
+class Solver(object):
+    def __init__(self):
+        # Define the generic detector
+        self.detector = Detector().cuda(cfg.GPU_ID)
+        # Define the optimizer and its arguments
+        self.optimizer = None
+        self.opt_arguments = {
+            'scale_gradient': 1. / (
+                cfg.SOLVER.LOSS_SCALING *
+                    cfg.SOLVER.ITER_SIZE),
+            'clip_gradient': float(cfg.SOLVER.CLIP_NORM),
+            'weight_decay': cfg.SOLVER.WEIGHT_DECAY,
+        }
+        # Define the global step
+        self.iter = 0
+        # Define the decay step
+        self._current_step = 0
+    def _get_param_groups(self):
+        param_groups = [
+            {
+                'params': [],
+                'lr_mult': 1.,
+                'decay_mult': 1.,
+            },
+            # Special treatment for biases (mainly to match historical impl.
+            # details):
+            # (1) Do not apply weight decay
+            # (2) Use a 2x higher learning rate
+            {
+                'params': [],
+                'lr_mult': 2.,
+                'decay_mult': 0.,
+            }
+        ]
+        for name, param in self.detector.named_parameters():
+            if 'bias' in name: param_groups[1]['params'].append(param)
+            else: param_groups[0]['params'].append(param)
+        return param_groups
+    def set_learning_rate(self):
+        policy = cfg.SOLVER.LR_POLICY
+        if policy == 'steps_with_decay':
+            if self._current_step < len(cfg.SOLVER.STEPS) \
+                    and self.iter >= cfg.SOLVER.STEPS[self._current_step]:
+                self._current_step = self._current_step + 1
+                logger.info('MultiStep Status: Iteration {}, step = {}' \
+                    .format(self.iter, self._current_step))
+                new_lr = cfg.SOLVER.BASE_LR * (
+                        cfg.SOLVER.GAMMA ** self._current_step)
+                self.optimizer.param_groups[0]['lr'] = \
+                    self.optimizer.param_groups[1]['lr'] = new_lr
+        else:
+            raise ValueError('Unknown lr policy: ' + policy)
+    def one_step(self):
+        # Forward & Backward & Compute_loss
+        iter_size = cfg.SOLVER.ITER_SIZE
+        loss_scaling = cfg.SOLVER.LOSS_SCALING
+        run_time = 0.; stats = {'loss': {'total': 0.}, 'iter': self.iter}
+        add_loss = lambda x, y: y if x is None else x + y
+        tic = time.time()
+        if iter_size > 1:
+            # Dragon is designed for manual gradients accumulating
+            # ``zero_grad`` is only required if calling ``accumulate_grad``
+            self.optimizer.zero_grad()
+        for i in range(iter_size):
+            outputs, total_loss = self.detector(), None
+            # Sum the partial losses
+            for k, v in outputs.items():
+                if 'loss' in k:
+                    if k not in stats['loss']:
+                        stats['loss'][k] = 0.
+                    total_loss = add_loss(total_loss, v)
+                    stats['loss'][k] += float(v) * loss_scaling
+            if loss_scaling != 1.: total_loss *= loss_scaling
+            stats['loss']['total'] += float(total_loss)
+            total_loss.backward()
+            if iter_size > 1: self.optimizer.accumulate_grad()
+        run_time += (time.time() - tic)
+        # Apply Update
+        self.set_learning_rate()
+        tic = time.time()
+        self.optimizer.step()
+        run_time += (time.time() - tic)
+        self.iter += 1
+        # Average loss by the iter size
+        for k in stats['loss'].keys():
+            stats['loss'][k] /= cfg.SOLVER.ITER_SIZE
+        # Misc stats
+        stats['lr'] = self.base_lr
+        stats['time'] = run_time
+        return stats
+    @property
+    def base_lr(self):
+        return self.optimizer.param_groups[0]['lr']
+    @base_lr.setter
+    def base_lr(self, value):
+        self.optimizer.param_groups[0]['lr'] = \
+            self.optimizer.param_groups[1]['lr'] = value
+class SGDSolver(Solver):
+    def __init__(self):
+        super(SGDSolver, self).__init__()
+        self.opt_arguments.update(**{
+            'lr': cfg.SOLVER.BASE_LR,
+            'momentum': cfg.SOLVER.MOMENTUM,
+        })
+        self.optimizer = torch.optim.SGD(
+            self._get_param_groups(), **self.opt_arguments)
+class NesterovSolver(Solver):
+    def __init__(self):
+        super(NesterovSolver, self).__init__()
+        self.opt_arguments.update(**{
+            'lr': cfg.SOLVER.BASE_LR,
+            'momentum': cfg.SOLVER.MOMENTUM,
+            'nesterov': True,
+        })
+        self.optimizer = torch.optim.SGD(
+            self._get_param_groups(), **self.opt_arguments)
+class RMSPropSolver(Solver):
+    def __init__(self):
+        super(RMSPropSolver, self).__init__()
+        self.opt_arguments.update(**{
+            'lr': cfg.SOLVER.BASE_LR,
+            'alpha': 0.9,
+            'eps': 1e-5,
+        })
+        self.optimizer = torch.optim.RMSprop(
+            self._get_param_groups(), **self.opt_arguments)
+class AdamSolver(Solver):
+    def __init__(self):
+        super(AdamSolver, self).__init__()
+        self.opt_arguments.update(**{
+            'lr': cfg.SOLVER.BASE_LR,
+            'beta1': 0.9,
+            'beta2': 0.999,
+            'eps': 1e-5,
+        })
+        self.optimizer = torch.optim.RMSprop(
+            self._get_param_groups(), **self.opt_arguments)
+def get_solver_func(type):
+    if type == 'MomentumSGD':
+        return SGDSolver
+    elif type == 'Nesterov':
+        return NesterovSolver
+    elif type == 'RMSProp':
+        return RMSPropSolver
+    elif type == 'Adam':
+        return AdamSolver
+    else:
+        raise ValueError('Unsupported solver type: {}.\n'
+            'Excepted in (MomentumSGD, Nesterov, RMSProp, Adam)'.format(type))
\ No newline at end of file
--- a/lib/core/test.py
+++ b/lib/core/test.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import cv2
+from multiprocessing import Queue
+from collections import OrderedDict
+from lib.core.config import cfg
+from lib.datasets.factory import get_imdb
+# All detectors share the same reader/transformer during testing
+from lib.faster_rcnn.data.data_reader import DataReader
+from lib.faster_rcnn.data.data_transformer import DataTransformer
+class TestServer(object):
+    def __init__(self, output_dir):
+        self.imdb = get_imdb(cfg.TEST.DATABASE)
+        self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE)
+        self.num_images, self.num_classes, self.classes = \
+            self.imdb.num_images, self.imdb.num_classes, self.imdb.classes
+        self.data_reader = DataReader(**{'source': self.imdb.source})
+        self.data_transformer = DataTransformer()
+        self.data_reader.Q_out = Queue(cfg.TEST.IMS_PER_BATCH)
+        self.data_reader.start()
+        self.gt_recs = OrderedDict()
+        self.output_dir = output_dir
+        if cfg.VIS_ON_FILE:
+            self.vis_dir = os.path.join(self.output_dir, 'vis')
+            if not os.path.exists(self.vis_dir): os.makedirs(self.vis_dir)
+    def set_transformer(self, transformer_cls):
+        self.data_transformer = transformer_cls()
+    def get_image(self):
+        serialized = self.data_reader.Q_out.get()
+        image = self.data_transformer.get_image(serialized)
+        image_id, objects = self.data_transformer.get_annotations(serialized)
+        self.gt_recs[image_id] = {
+            'objects': objects,
+            'width': image.shape[1],
+            'height': image.shape[0]}
+        return image_id, image
+    def get_save_filename(self, image_id, ext='.jpg'):
+        return os.path.join(self.vis_dir, image_id + ext) \
+            if cfg.VIS_ON_FILE else None
+    def get_records(self):
+        if len(self.gt_recs) != self.num_images:
+            raise RuntimeError('Loading {} records, '
+                'while the specific database required {}'.format(
+                    len(self.gt_recs), self.num_images))
+        return self.gt_recs
+    def evaluate_detections(self, all_boxes):
+        self.imdb.evaluate_detections(
+            all_boxes, self.get_records(), self.output_dir)
+    def evaluate_segmentations(self, all_boxes, all_masks):
+        self.imdb.evaluate_segmentations(
+            all_boxes, all_masks, self.get_records(), self.output_dir)
+class InferServer(object):
+    def __init__(self, output_dir):
+        self.images_dir = cfg.TEST.DATABASE
+        self.imdb = get_imdb('taas:/empty')
+        self.images = os.listdir(self.images_dir)
+        self.num_images, self.num_classes, self.classes = \
+            len(self.images), cfg.MODEL.NUM_CLASSES, cfg.MODEL.CLASSES
+        self.data_transformer = DataTransformer()
+        self.gt_recs = OrderedDict()
+        self.output_dir = output_dir
+        self.image_idx = 0
+        if cfg.VIS_ON_FILE:
+            self.vis_dir = os.path.join(self.output_dir, 'vis')
+            if not os.path.exists(self.vis_dir): os.makedirs(self.vis_dir)
+    def set_transformer(self, transformer_cls):
+        self.data_transformer = transformer_cls()
+    def get_image(self):
+        image_name = self.images[self.image_idx]
+        image_id = image_name.split('.')[0]
+        image = cv2.imread(os.path.join(self.images_dir, image_name))
+        self.image_idx = (self.image_idx + 1) % self.num_images
+        self.gt_recs[image_id] = {
+            'width': image.shape[1],
+            'height': image.shape[0]}
+        return image_id, image
+    def get_save_filename(self, image_id, ext='.jpg'):
+        return os.path.join(self.vis_dir, image_id + ext) \
+            if cfg.VIS_ON_FILE else None
+    def get_records(self):
+        if len(self.gt_recs) != self.num_images:
+            raise RuntimeError('Loading {} records, '
+                'while the specific database required {}'.format(
+                    len(self.gt_recs), self.num_images))
+        return self.gt_recs
+    def evaluate_detections(self, all_boxes):
+        self.imdb.evaluate_detections(
+            all_boxes, self.get_records(), self.output_dir)
+    def evaluate_segmentations(self, all_boxes, all_masks):
+        self.imdb.evaluate_segmentations(
+            all_boxes, all_masks, self.get_records(), self.output_dir)
\ No newline at end of file
--- a/lib/core/train.py
+++ b/lib/core/train.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/train.py>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import datetime
+from collections import OrderedDict
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.core.solver import get_solver_func
+from lib.utils.timer import Timer
+from lib.utils.stats import SmoothedValue
+from lib.utils import logger
+class SolverWrapper(object):
+    def __init__(self, coordinator):
+        self.output_dir = coordinator.checkpoints_dir()
+        self.solver = get_solver_func('MomentumSGD')()
+        # Load the pre-trained weights
+        init_weights = cfg.TRAIN.WEIGHTS
+        if init_weights != '':
+            if os.path.exists(init_weights):
+                logger.info('Loading weights from {}.'.format(init_weights))
+                self.solver.detector.load_weights(init_weights)
+            else:
+                raise ValueError('Invalid path of weights: {}'.format(init_weights))
+        # Mixed precision training?
+        if cfg.MODEL.DATA_TYPE.lower() == 'float16':
+            self.solver.detector.half()  # Powerful FP16 Support
+        # Plan the metrics
+        self.metrics = OrderedDict()
+        if cfg.ENABLE_TENSOR_BOARD:
+            from dragon.tools.tensorboard import TensorBoard
+            self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs')
+    def snapshot(self):
+        if not logger.is_root(): return None
+        filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}'
+                    .format(self.solver.iter) + '.pth')
+        filename = os.path.join(self.output_dir, filename)
+        torch.save(self.solver.detector.state_dict(), filename)
+        logger.info('Wrote snapshot to: {:s}'.format(filename))
+        return filename
+    def add_metrics(self, stats):
+        for k, v in stats['loss'].items():
+            if k not in self.metrics:
+                self.metrics[k] = SmoothedValue(20)
+            self.metrics[k].AddValue(v)
+    def send_metrics(self, stats):
+        if hasattr(self, 'board'):
+            self.board.scalar_summary('lr', stats['lr'], stats['iter'])
+            self.board.scalar_summary('time', stats['time'], stats['iter'])
+            for k, v in self.metrics.items():
+                if k == 'total':
+                    self.board.scalar_summary('total_loss', v.GetMedianValue(), stats['iter'])
+                else: self.board.scalar_summary(k, v.GetMedianValue(), stats['iter'])
+    def step(self, display=False):
+        stats = self.solver.one_step()
+        self.add_metrics(stats)
+        self.send_metrics(stats)
+        if display:
+            logger.info('Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (stats['iter'],
+                 stats['lr'], self.metrics['total'].GetMedianValue(), stats['time']))
+            for k, v in self.metrics.items():
+                if k == 'total': continue
+                logger.info('        Train net output({}): {}'.format(k, v.GetMedianValue()))
+    def train_model(self):
+        """Network training loop."""
+        last_snapshot_iter = -1
+        timer = Timer()
+        model_paths = []
+        start_lr = self.solver.base_lr
+        while self.solver.iter < cfg.SOLVER.MAX_ITERS:
+            if self.solver.iter < cfg.SOLVER.WARM_UP_ITERS:
+                alpha = (self.solver.iter + 1.0) / cfg.SOLVER.WARM_UP_ITERS
+                self.solver.base_lr = \
+                    start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha)
+            # Apply 1-step SGD update
+            timer.tic()
+            self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0)
+            timer.toc()
+            if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0:
+                average_time = timer.average_time
+                eta_seconds = average_time * (
+                    cfg.SOLVER.MAX_ITERS - self.solver.iter)
+                eta = str(datetime.timedelta(seconds=int(eta_seconds)))
+                progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS
+                logger.info('< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
+                    .format(progress, timer.average_time, eta))
+            if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0:
+                last_snapshot_iter = self.solver.iter
+                model_paths.append(self.snapshot())
+        if last_snapshot_iter != self.solver.iter:
+            model_paths.append(self.snapshot())
+        return model_paths
+def train_net(coordinator, start_iter=0):
+    sw = SolverWrapper(coordinator)
+    sw.solver.iter = start_iter
+    logger.info('Solving...')
+    model_paths = sw.train_model()
+    return model_paths
\ No newline at end of file
--- a/lib/datasets/__init__.py
+++ b/lib/datasets/__init__.py
--- a/lib/datasets/coco.py
+++ b/lib/datasets/coco.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/coco.py>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import os
+import sys
+import os.path as osp
+import numpy as np
+import uuid
+import json
+import cv2
+try:
+    import cPickle
+except:
+    import pickle as cPickle
+from lib.datasets.imdb import imdb
+# COCO API
+from lib.pycocotools.coco import COCO
+from lib.pycocotools.cocoeval import COCOeval
+from lib.pycocotools.mask import encode as encode_masks
+from lib.core.config import cfg
+from lib.utils import boxes as box_utils
+class coco(imdb):
+    def __init__(self, image_set, year):
+        imdb.__init__(self, 'coco_' + year + '_' + image_set)
+        self._year = year
+        self._image_set = image_set
+        self._data_path = cfg.DATA_DIR
+        self._classes = ('__background__', # always index 0
+                         'person', 'bicycle', 'car', 'motorcycle', 'airplane',
+                         'bus', 'train', 'truck', 'boat', 'traffic light',
+                         'fire hydrant', 'stop sign', 'parking meter', 'bench',
+                         'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
+                         'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
+                         'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+                         'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+                         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife',
+                         'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
+                         'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+                         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
+                         'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
+                         'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
+                         'teddy bear', 'hair drier', 'toothbrush')
+        self._COCO = COCO(self._get_ann_file())
+        self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
+        cats = self._COCO.loadCats(self._COCO.getCatIds())
+        self._class_to_coco_cat_id = dict(zip([c['name'] for c in cats],
+                                              self._COCO.getCatIds()))
+        self._salt = str(uuid.uuid4())
+        self.config = {'cleanup': False, 'use_salt': True}
+    ##############################################
+    #                                            #
+    #                   UTILS                    #
+    #                                            #
+    ##############################################
+    def _get_ann_file(self):
+        prefix = 'instances' if self._image_set.find('test') == -1 else 'image_info'
+        image_set = 'minival' if self._image_set == 'trainval35k' else self._image_set
+        return osp.join(self._data_path, 'annotations',
+            prefix + '_' + image_set + self._year + '.json')
+    def _get_comp_id(self):
+        return '_' + self._salt if self.config['use_salt'] else ''
+    def _get_prefix(self, type='bbox'):
+        if type == 'bbox': return 'detections_'
+        elif type == 'segm': return 'segmentations_'
+        elif type == 'kpt': return 'keypoints_'
+        return ''
+    def _get_coco_results_T(self, results_folder, type='bbox'):
+        # experiments/model_id/results/detections_minival2014_<comp_id>.json
+        filename = self._get_prefix(type) + self._name + self._get_comp_id() + '.json'
+        if not os.path.exists(results_folder): os.makedirs(results_folder)
+        return os.path.join(results_folder, filename)
+    ##############################################
+    #                                            #
+    #                 COCO-BBOX                  #
+    #                                            #
+    ##############################################
+    def _bbox_results_one_category(self, boxes, cat_id, gt_recs):
+        results = []
+        ix = 0
+        for image_name, rec in gt_recs.items():
+            dets = boxes[ix]; ix += 1
+            if isinstance(dets, list) and len(dets) == 0: continue
+            dets = dets.astype(np.float)
+            scores = dets[:, -1]
+            xs = dets[:, 0]
+            ys = dets[:, 1]
+            ws = dets[:, 2] - xs + 1
+            hs = dets[:, 3] - ys + 1
+            results.extend(
+                [{'image_id': int(image_name.split('_')[-1].split('.')[0]),
+                  'category_id': cat_id,
+                  'bbox': [xs[k], ys[k], ws[k], hs[k]],
+                  'score': scores[k]} for k in range(dets.shape[0])])
+        return results
+    def _do_detection_eval(self, res_file, output_dir):
+        coco_dt = self._COCO.loadRes(res_file)
+        coco_eval = COCOeval(self._COCO, coco_dt, 'bbox')
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        self._print_detection_eval_metrics(coco_eval)
+        eval_file = osp.join(output_dir, 'detection_results.pkl')
+        with open(eval_file, 'wb') as fid:
+            cPickle.dump(coco_eval, fid, cPickle.HIGHEST_PROTOCOL)
+        print('Wrote COCO eval results to: {}'.format(eval_file))
+    def evaluate_detections(self, all_boxes, gt_recs, output_dir):
+        res_file = self._write_coco_results(all_boxes, None, gt_recs, output_dir)
+        # Only do evaluation on non-test sets
+        if self._image_set.find('test') == -1:
+            self._do_detection_eval(res_file, output_dir)
+        # Optionally cleanup results json file
+        if self.config['cleanup']: os.remove(res_file)
+    ##############################################
+    #                                            #
+    #                 COCO-SEGM                  #
+    #                                            #
+    ##############################################
+    def _encode_masks(self, masks, boxes, im_h, im_w, binary_thresh=0.4):
+        num_pred = len(boxes)
+        assert len(masks) == num_pred
+        mask_image = np.zeros((im_h, im_w, num_pred), dtype=np.uint8, order='F')
+        # To work around an issue with cv2.resize (it seems to automatically pad
+        # with repeated border values), we manually zero-pad the masks by 1 pixel
+        # prior to resizing back to the original image resolution. This prevents
+        # "top hat" artifacts. We therefore need to expand the reference boxes by an
+        # appropriate factor.
+        M = masks[0].shape[0]
+        scale = (M + 2.0) / M
+        ref_boxes = box_utils.expand_boxes(boxes, scale)
+        ref_boxes = ref_boxes.astype(np.int32)
+        padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
+        for i in range(num_pred):
+            ref_box = ref_boxes[i, :4]
+            mask = masks[i]
+            padded_mask[1:-1, 1:-1] = mask[:, :]
+            w = ref_box[2] - ref_box[0] + 1
+            h = ref_box[3] - ref_box[1] + 1
+            w = np.maximum(w, 1)
+            h = np.maximum(h, 1)
+            mask = cv2.resize(padded_mask, (w, h))
+            mask = np.array(mask > binary_thresh, dtype=np.uint8)
+            x1 = max(ref_box[0], 0)
+            y1 = max(ref_box[1], 0)
+            x2 = min(ref_box[2] + 1, im_w)
+            y2 = min(ref_box[3] + 1, im_h)
+            mask_image[y1 : y2, x1 : x2, i] = mask[(y1 - ref_box[1]) : (y2 - ref_box[1]),
+                                                   (x1 - ref_box[0]) : (x2 - ref_box[0])]
+        return encode_masks(mask_image)
+    def _segm_results_one_category(self, boxes, masks, cat_id, gt_recs):
+        def filter_boxes(dets):
+            boxes = dets[:, :4]
+            ws = boxes[:, 2] - boxes[:, 0]
+            hs = boxes[:, 3] - boxes[:, 1]
+            keep = np.where((ws >= 1) & (hs >= 1))[0]
+            return keep
+        results = []
+        ix = 0
+        for image_name, rec in gt_recs.items():
+            image_id = int(image_name.split('_')[-1].split('.')[0])
+            dets = boxes[ix].astype(np.float)
+            msks = masks[ix]; ix += 1
+            keep = filter_boxes(dets)
+            rec = gt_recs[image_name]
+            im_h, im_w = rec['height'], rec['width']
+            if len(keep) == 0: continue
+            scores = dets[:, -1]
+            mask_encode = self._encode_masks(msks[keep], dets[keep, :4], im_h, im_w, cfg.TEST.BINARY_THRESH)
+            for k in range(dets[keep].shape[0]):
+                rle =  mask_encode[k]
+                if sys.version_info >= (3,0): rle['counts'] = rle['counts'].decode()
+                results.append({
+                    'image_id': image_id,
+                    'category_id': cat_id,
+                    'segmentation': rle,
+                    'score': scores[k]})
+        return results
+    def _do_segmentation_eval(self, res_file, output_dir):
+        coco_dt = self._COCO.loadRes(res_file)
+        coco_eval = COCOeval(self._COCO, coco_dt, 'segm')
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        self._print_detection_eval_metrics(coco_eval)
+        eval_file = osp.join(output_dir, 'segmentation_results.pkl')
+        with open(eval_file, 'wb') as fid:
+            cPickle.dump(coco_eval, fid, cPickle.HIGHEST_PROTOCOL)
+        print('Wrote COCO eval results to: {}'.format(eval_file))
+    def evaluate_segmentations(self, all_boxes, all_masks, gt_recs, output_dir):
+        res_file = self._write_coco_results(all_boxes, all_masks, gt_recs, output_dir)
+        # Only do evaluation on non-test sets
+        if self._image_set.find('test') == -1:
+            self._do_segmentation_eval(res_file, output_dir)
+        # Optionally cleanup results json file
+        if self.config['cleanup']: os.remove(res_file)
+    ##############################################
+    #                                            #
+    #                    EVAL-API                #
+    #                                            #
+    ##############################################
+    def _write_coco_results(self, all_boxes, all_masks, gt_recs, output_dir):
+        # <bbox>
+        # [{"image_id": 42,
+        #   "category_id": 18,
+        #   "bbox": [258.15,41.29,348.26,243.78],
+        #   "score": 0.236}, ...]
+        # <segmentation>
+        # [{"image_id": 42,
+        #   "category_id": 18,
+        #   "segmentation": [.....],
+        #   "score": 0.236}, ...]
+        results = []
+        eval_type = 'bbox'
+        if all_masks: eval_type = 'segm'
+        res_file = self._get_coco_results_T(output_dir, eval_type)
+        for cls_ind, cls in enumerate(self.classes):
+            if cls == '__background__':
+                continue
+            print('Collecting {} results ({:d}/{:d})'.format(cls, cls_ind, self.num_classes - 1))
+            coco_cat_id = self._class_to_coco_cat_id[cls]
+            if all_masks is None:
+                results.extend(self._bbox_results_one_category(
+                    all_boxes[cls_ind], coco_cat_id, gt_recs))
+            else:
+                results.extend(self._segm_results_one_category(
+                    all_boxes[cls_ind], all_masks[cls_ind], coco_cat_id, gt_recs))
+        print('Writing results json to {}'.format(res_file))
+        with open(res_file, 'w') as fid: json.dump(results, fid)
+        return res_file
+    def _print_detection_eval_metrics(self, coco_eval):
+        IoU_lo_thresh = 0.5
+        IoU_hi_thresh = 0.95
+        def _get_thr_ind(coco_eval, thr):
+            ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
+                           (coco_eval.params.iouThrs < thr + 1e-5))[0][0]
+            iou_thr = coco_eval.params.iouThrs[ind]
+            assert np.isclose(iou_thr, thr)
+            return ind
+        ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
+        ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)
+        # precision has dims (iou, recall, cls, area range, max dets)
+        # area range index 0: all area ranges
+        # max dets index 2: 100 per image
+        precision = \
+            coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
+        ap_default = np.mean(precision[precision > -1])
+        print ('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
+               '~~~~'.format(IoU_lo_thresh, IoU_hi_thresh))
+        print('{:.1f}'.format(100 * ap_default))
+        for cls_ind, cls in enumerate(self.classes):
+            if cls == '__background__':
+                continue
+            # minus 1 because of __background__
+            precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
+            ap = np.mean(precision[precision > -1])
+            print('{:.1f}'.format(100 * ap))
+        print('~~~~ Summary metrics ~~~~')
+        coco_eval.summarize()
+        return coco_eval.prs()
+    def competition_mode(self, on):
+        if on:
+            self.config['use_salt'] = False
+            self.config['cleanup'] = False
+        else:
+            self.config['use_salt'] = True
+            self.config['cleanup'] = True
\ No newline at end of file
--- a/lib/datasets/factory.py
+++ b/lib/datasets/factory.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/factory.py>
+#
+# ------------------------------------------------------------
+from lib.datasets.pascal_voc import pascal_voc
+from lib.datasets.coco import coco
+from lib.datasets.taas import taas
+__sets = {}
+# pascal voc
+for year in ['2007', '2012', '0712']:
+    for split in ['train', 'val', 'trainval', 'test']:
+        name = 'voc_{}_{}'.format(year, split)
+        __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
+# coco 2014
+for year in ['2014']:
+    for split in ['train', 'val', 'trainval35k', 'minival', 'valminusminival']:
+        name = 'coco_{}_{}'.format(year, split)
+        __sets[name] = (lambda split=split, year=year: coco(split, year))
+# coco 2015 & 2017
+for year in ['2015', '2017']:
+    for split in ['test', 'test-dev']:
+        name = 'coco_{}_{}'.format(year, split)
+        __sets[name] = (lambda split=split, year=year: coco(split, year))
+# taas
+__sets['taas'] = (lambda source: taas(source))
+def get_imdb(name):
+    """Get an imdb (image database) by name."""
+    keys = name.split(':')
+    if len(keys) == 2:
+        cls, source = keys
+        if cls not in __sets:
+            raise KeyError('Unknown dataset: {}'.format(cls))
+        return __sets[cls](source)
+    elif len(keys) == 1:
+        return __sets[name]()
+    else:
+        raise ValueError('Illegal format of image database: {}'.format(name))
+def list_imdbs():
+    """List all registered imdbs."""
+    return __sets.keys()
--- a/lib/datasets/imdb.py
+++ b/lib/datasets/imdb.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/imdb.py>
+#
+# ------------------------------------------------------------
+import os
+from dragon.tools.db import LMDB
+from lib.core.config import cfg
+class imdb(object):
+    def __init__(self, name):
+        self._name = name
+        self._num_classes = 0
+        self._classes = []
+    @property
+    def name(self):
+        return self._name
+    @property
+    def num_classes(self):
+        return len(self._classes)
+    @property
+    def classes(self):
+        return self._classes
+    @property
+    def cache_path(self):
+        cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache'))
+        if not os.path.exists(cache_path):
+            os.makedirs(cache_path)
+        return cache_path
+    @property
+    def source(self):
+        excepted_source = os.path.join(self.cache_path, self.name + '_lmdb')
+        if not os.path.exists(excepted_source):
+            raise RuntimeError('Excepted LMDB source from: {}, '
+                               'but it is not existed.'.format(excepted_source))
+        return excepted_source
+    @property
+    def num_images(self):
+        self._db = LMDB()
+        self._db.open(self.source)
+        num_entries = self._db.num_entries()
+        self._db.close()
+        return num_entries
+    def evaluate_detections(self, all_boxes, gt_recs, output_dir):
+        raise NotImplementedError
+    def evaluate_masks(self, all_boxes, all_masks, output_dir):
+        raise NotImplementedError
\ No newline at end of file
--- a/lib/datasets/pascal_voc.py
+++ b/lib/datasets/pascal_voc.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import json
+import numpy as np
+import uuid
+try:
+    import cPickle
+except:
+    import pickle as cPickle
+from .imdb import imdb
+from .voc_eval import voc_bbox_eval, voc_segm_eval
+class pascal_voc(imdb):
+    def __init__(self, image_set, year, name='voc'):
+        imdb.__init__(self, name + '_' + year + '_' + image_set)
+        self._year = year
+        self._image_set = image_set
+        self._classes = ('__background__',  # always index 0
+                         'aeroplane', 'bicycle', 'bird', 'boat',
+                         'bottle', 'bus', 'car', 'cat', 'chair',
+                         'cow', 'diningtable', 'dog', 'horse',
+                         'motorbike', 'person', 'pottedplant',
+                         'sheep', 'sofa', 'train', 'tvmonitor')
+        self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
+        self._salt = str(uuid.uuid4())
+        self.config = {'cleanup': True, 'use_salt': True}
+    def _get_comp_id(self):
+        return '_' + self._salt if self.config['use_salt'] else ''
+    def _get_prefix(self, type='bbox'):
+        if type == 'bbox': return 'detections_'
+        elif type == 'segm': return 'segmentations_'
+        elif type == 'kpt': return 'keypoints_'
+        return ''
+    def _get_voc_results_T(self, results_folder, type='bbox'):
+        # experiments/model_id/results/detections_voc_2007_test_<comp_id>_aeroplane.txt
+        filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.txt'
+        if not os.path.exists(results_folder): os.makedirs(results_folder)
+        return os.path.join(results_folder, filename)
+    def _write_voc_bbox_results(self, all_boxes, gt_recs, output_dir):
+        for cls_ind, cls in enumerate(self.classes):
+            if cls == '__background__': continue
+            print('Writing {} VOC format bbox results'.format(cls))
+            filename = self._get_voc_results_T(output_dir).format(cls)
+            with open(filename, 'wt') as f:
+                ix = 0
+                for image_id, rec in gt_recs.items():
+                    dets = all_boxes[cls_ind][ix]; ix += 1
+                    if dets == []: continue
+                    for k in range(dets.shape[0]):
+                        f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
+                                format(image_id, dets[k, -1],
+                                       dets[k, 0] + 1, dets[k, 1] + 1,
+                                       dets[k, 2] + 1, dets[k, 3] + 1))
+    def _write_seg_results_file(self, all_boxes, all_masks):
+        for cls_inds, cls in enumerate(self.classes):
+            if cls == '__background__': continue
+            print('Writing {} VOC results file'.format(cls))
+            results_folder = os.path.join(self._devkit_path, 'results', 'seg')
+            if not os.path.exists(results_folder): os.makedirs(results_folder)
+            det_filename = os.path.join(results_folder, cls + '_det.pkl')
+            seg_filename = os.path.join(results_folder, cls + '_seg.pkl')
+            with open(det_filename, 'wb') as f:
+                cPickle.dump(all_boxes[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
+            with open(seg_filename, 'wb') as f:
+                cPickle.dump(all_masks[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
+    def _do_voc_bbox_eval(self, gt_recs, output_dir):
+        aps = []
+        # The PASCAL VOC metric changed in 2010
+        use_07_metric = True if int(self._year) < 2010 else False
+        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No') + '\n')
+        for i, cls in enumerate(self._classes):
+            if cls == '__background__':
+                continue
+            det_file = self._get_voc_results_T(output_dir).format(cls)
+            rec, prec, ap = voc_bbox_eval(det_file, gt_recs, cls,
+                IoU=0.5, use_07_metric=use_07_metric)
+            aps += [ap]
+            print('AP for {} = {:.4f}'.format(cls, ap))
+        print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
+    def _do_voc_segm_eval(self, imagenames, output_dir):
+        aps = []
+        # define this as true according to SDS's evaluation protocol
+        use_07_metric = True
+        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
+        print('~~~~~~ Evaluation use min overlap = 0.5 ~~~~~~')
+        for i, cls in enumerate(self.classes):
+            if cls == '__background__':
+                continue
+            det_file = os.path.join(output_dir, 'bbox_' + cls + '.pkl')
+            seg_file = os.path.join(output_dir, 'segm_' + cls + '.pkl')
+            mask_file = os.path.join(self.cache_path, self.name + '.pkl')
+            ap = seg_eval_v2(det_file, seg_file, mask_file, imagenames, cls,
+                             ovthresh=0.5, use_07_metric=use_07_metric)
+            aps += [ap]
+            print('AP for {} = {:.2f}'.format(cls, ap))
+        print('Mean AP@0.5 = {:.2f}'.format(np.mean(aps)))
+        print('~~~~~~ Evaluation use min overlap = 0.7 ~~~~~~')
+        aps = []
+        for i, cls in enumerate(self.classes):
+            if cls == '__background__':
+                continue
+            det_file = os.path.join(output_dir, 'bbox_' + cls + '.pkl')
+            seg_file = os.path.join(output_dir, 'segm_' + cls + '.pkl')
+            mask_file = os.path.join(self.cache_path, self.name + '.pkl')
+            ap = seg_eval_v2(det_file, seg_file, mask_file, imagenames, cls,
+                             ovthresh=0.7, use_07_metric=use_07_metric)
+            aps += [ap]
+            print('AP for {} = {:.2f}'.format(cls, ap))
+        print('Mean AP@0.7 = {:.2f}'.format(np.mean(aps)))
+    def evaluate_detections(self, all_boxes, gt_recs, output_dir):
+        self._write_voc_bbox_results(all_boxes, gt_recs, output_dir)
+        self._do_voc_bbox_eval(gt_recs, output_dir)
+        if self.config['cleanup']:
+            for cls in self._classes:
+                if cls == '__background__': continue
+                filename = self._get_voc_results_T(output_dir).format(cls)
+                os.remove(filename)
+    def competition_mode(self, on):
+        if on:
+            self.config['use_salt'] = False
+            self.config['cleanup'] = False
+        else:
+            self.config['use_salt'] = True
+            self.config['cleanup'] = True
\ No newline at end of file
--- a/lib/datasets/taas.py
+++ b/lib/datasets/taas.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+import json
+import numpy as np
+import uuid
+import cv2
+try:
+    import cPickle
+except:
+    import pickle as cPickle
+from .imdb import imdb
+from .voc_eval import voc_bbox_eval, voc_segm_eval
+from lib.core.config import cfg
+from lib.utils import boxes as box_utils
+from lib.pycocotools.mask import encode as encode_masks
+class taas(imdb):
+    def __init__(self, source):
+        imdb.__init__(self, 'taas')
+        self._classes = cfg.MODEL.CLASSES
+        self._source = source
+        self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
+        self._class_to_cat_id = self._class_to_ind
+        self._salt = str(uuid.uuid4())
+        self.config = {'cleanup': True, 'use_salt': True}
+    @property
+    def source(self):
+        excepted_source = self._source
+        if not os.path.exists(excepted_source):
+            raise RuntimeError('Excepted LMDB source from: {}, '
+                               'but it is not existed.'.format(excepted_source))
+        return excepted_source
+    ##############################################
+    #                                            #
+    #                   UTILS                    #
+    #                                            #
+    ##############################################
+    def _get_comp_id(self):
+        return '_' + self._salt if self.config['use_salt'] else ''
+    def _get_prefix(self, type='bbox'):
+        if type == 'bbox': return 'detections_'
+        elif type == 'segm': return 'segmentations_'
+        elif type == 'kpt': return 'keypoints_'
+        return ''
+    def _get_voc_results_T(self, results_folder, type='bbox'):
+        # experiments/model_id/results/detections_taas_<comp_id>_aeroplane.txt
+        if type == 'bbox':
+            filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.txt'
+        elif type == 'segm':
+            filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.pkl'
+        else:
+            raise ValueError('Type of results can be either bbox or segm.')
+        if not os.path.exists(results_folder): os.makedirs(results_folder)
+        return os.path.join(results_folder, filename)
+    def _get_coco_annotations_T(self, results_folder, type='bbox'):
+        # experiments/model_id/annotations/[GT]detections_taas_<comp_id>.json
+        filename = '[GT]_' + self._get_prefix(type) + self._name + '.json'
+        if not os.path.exists(results_folder): os.makedirs(results_folder)
+        return os.path.join(results_folder, filename)
+    def _get_coco_results_T(self, results_folder, type='bbox'):
+        # experiments/model_id/results/detections_taas_<comp_id>.json
+        filename = self._get_prefix(type) + self._name + self._get_comp_id() + '.json'
+        if not os.path.exists(results_folder): os.makedirs(results_folder)
+        return os.path.join(results_folder, filename)
+    ##############################################
+    #                                            #
+    #                    VOC                     #
+    #                                            #
+    ##############################################
+    def _write_voc_bbox_results(self, all_boxes, gt_recs, output_dir):
+        for cls_ind, cls in enumerate(self.classes):
+            if cls == '__background__': continue
+            print('Writing {} VOC format bbox results'.format(cls))
+            filename = self._get_voc_results_T(output_dir).format(cls)
+            with open(filename, 'wt') as f:
+                ix = 0
+                for image_id, rec in gt_recs.items():
+                    dets = all_boxes[cls_ind][ix]; ix += 1
+                    if dets == []: continue
+                    for k in range(dets.shape[0]):
+                        f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
+                                format(image_id, dets[k, -1],
+                                       dets[k, 0] + 1, dets[k, 1] + 1,
+                                       dets[k, 2] + 1, dets[k, 3] + 1))
+    def _write_voc_segm_results(self, all_boxes, all_masks, output_dir):
+        for cls_inds, cls in enumerate(self.classes):
+            if cls == '__background__': continue
+            print('Writing {} VOC format segm results'.format(cls))
+            segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
+            bbox_filename = segm_filename.replace('segmentations', 'detections')
+            with open(bbox_filename, 'wb') as f:
+                cPickle.dump(all_boxes[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
+            with open(segm_filename, 'wb') as f:
+                cPickle.dump(all_masks[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
+    def _do_voc_bbox_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
+        aps = []
+        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
+        for i, cls in enumerate(self._classes):
+            if cls == '__background__': continue
+            det_file = self._get_voc_results_T(output_dir).format(cls)
+            rec, prec, ap = voc_bbox_eval(det_file, gt_recs, cls,
+                IoU=IoU, use_07_metric=use_07_metric)
+            if ap > 0: aps += [ap]
+            print('AP for {} = {:.4f}'.format(cls, ap))
+        print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
+    def _do_voc_segm_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
+        aps = []
+        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
+        for i, cls in enumerate(self.classes):
+            if cls == '__background__': continue
+            segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
+            bbox_filename = segm_filename.replace('segmentations', 'detections')
+            ap = voc_segm_eval(bbox_filename, segm_filename, gt_recs, cls,
+                IoU=IoU, use_07_metric=use_07_metric)
+            if ap > 0: aps += [ap]
+            print('AP for {} = {:.4f}'.format(cls, ap))
+        print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
+    ##############################################
+    #                                            #
+    #                    COCO                    #
+    #                                            #
+    ##############################################
+    def _get_coco_image_id(self, image_name):
+        image_id = image_name.split('_')[-1].split('.')[0]
+        try: return int(image_id)
+        except: return image_name
+    def _encode_coco_masks(self, masks, boxes, im_h, im_w):
+        num_pred = len(boxes)
+        assert len(masks) == num_pred
+        mask_image = np.zeros((im_h, im_w, num_pred), dtype=np.uint8, order='F')
+        M = masks[0].shape[0]
+        scale = (M + 2.0) / M
+        ref_boxes = box_utils.expand_boxes(boxes, scale)
+        ref_boxes = ref_boxes.astype(np.int32)
+        padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
+        for i in range(num_pred):
+            ref_box = ref_boxes[i, :4]
+            mask = masks[i]
+            padded_mask[1:-1, 1:-1] = mask[:, :]
+            w = ref_box[2] - ref_box[0] + 1
+            h = ref_box[3] - ref_box[1] + 1
+            w = np.maximum(w, 1)
+            h = np.maximum(h, 1)
+            mask = cv2.resize(padded_mask, (w, h))
+            mask = np.array(mask > cfg.TEST.BINARY_THRESH, dtype=np.uint8)
+            x1 = max(ref_box[0], 0)
+            y1 = max(ref_box[1], 0)
+            x2 = min(ref_box[2] + 1, im_w)
+            y2 = min(ref_box[3] + 1, im_h)
+            mask_image[y1 : y2, x1 : x2, i] = \
+                mask[(y1 - ref_box[1]) : (y2 - ref_box[1]),
+                     (x1 - ref_box[0]) : (x2 - ref_box[0])]
+        return encode_masks(mask_image)
+    def _write_coco_bbox_annotations(self, gt_recs, output_dir):
+        dataset = {}
+        # Build images
+        dataset['images'] = []
+        for image_name, rec in gt_recs.items():
+            dataset['images'].append({
+                'file_name': image_name + '.jpg',
+                'id': self._get_coco_image_id(image_name),
+                'height': rec['height'], 'width': rec['width']})
+        # Build categories
+        dataset['categories'] = []
+        for cls in self._classes:
+            if cls == '__background__': continue
+            dataset['categories'].append({
+                'name': cls, 'id': self._class_to_ind[cls]})
+        # Build annotations
+        dataset['annotations'] = []
+        ann_id = 0
+        for image_name, rec in gt_recs.items():
+            for obj in rec['objects']:
+                x, y = obj['bbox'][0], obj['bbox'][1]
+                w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
+                dataset['annotations'].append({
+                    'id': str(ann_id),
+                    'bbox': [x, y, w, h],
+                    'area': w * h,
+                    'iscrowd': obj['difficult'],
+                    'image_id': self._get_coco_image_id(image_name),
+                    'category_id': self._class_to_ind[obj['name']]})
+                ann_id += 1
+        ann_file = self._get_coco_annotations_T(output_dir, type='bbox')
+        with open(ann_file, 'w') as f: json.dump(dataset, f)
+        return ann_file
+    def _write_coco_segm_annotations(self, gt_recs, output_dir):
+        dataset = {}
+        # Build images
+        dataset['images'] = []
+        for image_name, rec in gt_recs.items():
+            dataset['images'].append({
+                'file_name': image_name + '.jpg',
+                'id': self._get_coco_image_id(image_name),
+                'height': rec['height'], 'width': rec['width']})
+        # Build categories
+        dataset['categories'] = []
+        for cls in self._classes:
+            if cls == '__background__': continue
+            dataset['categories'].append({
+                'name': cls, 'id': self._class_to_ind[cls]})
+        # Build annotations
+        dataset['annotations'] = []
+        ann_id = 0
+        for image_name, rec in gt_recs.items():
+            for obj in rec['objects']:
+                x, y = obj['bbox'][0], obj['bbox'][1]
+                w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
+                dataset['annotations'].append({
+                    'id': str(ann_id),
+                    'bbox': [x, y, w, h],
+                    'area': w * h,
+                    'segmentation': {
+                        'size': [rec['height'], rec['width']],
+                        'counts': obj['mask']},
+                    'iscrowd': obj['difficult'],
+                    'image_id': self._get_coco_image_id(image_name),
+                    'category_id': self._class_to_ind[obj['name']]})
+                ann_id += 1
+        ann_file = self._get_coco_annotations_T(output_dir, type='segm')
+        with open(ann_file, 'w') as f: json.dump(dataset, f)
+        return ann_file
+    def _coco_bbox_results_one_category(self, boxes, cat_id, gt_recs):
+        ix, results = 0, []
+        for image_name, rec in gt_recs.items():
+            dets = boxes[ix]; ix += 1
+            if isinstance(dets, list) and len(dets) == 0: continue
+            dets = dets.astype(np.float)
+            scores = dets[:, -1]
+            xs = dets[:, 0]
+            ys = dets[:, 1]
+            ws = dets[:, 2] - xs + 1
+            hs = dets[:, 3] - ys + 1
+            results.extend(
+                [{'image_id': self._get_coco_image_id(image_name),
+                  'category_id': cat_id,
+                  'bbox': [xs[k], ys[k], ws[k], hs[k]],
+                  'score': scores[k]} for k in range(dets.shape[0])])
+        return results
+    def _coco_segm_results_one_category(self, boxes, masks, cat_id, gt_recs):
+        def filter_boxes(dets):
+            boxes = dets[:, :4]
+            ws = boxes[:, 2] - boxes[:, 0]
+            hs = boxes[:, 3] - boxes[:, 1]
+            keep = np.where((ws >= 1) & (hs >= 1))[0]
+            return keep
+        results = []
+        ix = 0
+        for image_name, rec in gt_recs.items():
+            dets = boxes[ix].astype(np.float)
+            msks = masks[ix]; ix += 1
+            keep = filter_boxes(dets)
+            im_h, im_w = rec['height'], rec['width']
+            if len(keep) == 0: continue
+            scores = dets[:, -1]
+            mask_encode = self._encode_coco_masks(
+                msks[keep], dets[keep, :4], im_h, im_w)
+            for k in range(dets[keep].shape[0]):
+                rle =  mask_encode[k]
+                if sys.version_info >= (3,0): rle['counts'] = rle['counts'].decode()
+                results.append({
+                    'image_id': self._get_coco_image_id(image_name),
+                    'category_id': cat_id,
+                    'segmentation': rle,
+                    'score': scores[k]})
+        return results
+    def _write_coco_bbox_results(self, all_boxes, gt_recs, output_dir):
+        filename = self._get_coco_results_T(output_dir)
+        results = []
+        for cls_ind, cls in enumerate(self.classes):
+            if cls == '__background__': continue
+            print('Collecting {} results ({:d}/{:d})'.format(cls, cls_ind, self.num_classes - 1))
+            cat_id = self._class_to_cat_id[cls]
+            results.extend(self._coco_bbox_results_one_category(
+                all_boxes[cls_ind], cat_id, gt_recs))
+        print('Writing results json to {}'.format(filename))
+        with open(filename, 'w') as fid: json.dump(results, fid)
+        return filename
+    def _write_coco_segm_results(self, all_boxes, all_masks, gt_recs, output_dir):
+        filename = self._get_coco_results_T(output_dir, type='segm')
+        results = []
+        for cls_ind, cls in enumerate(self.classes):
+            if cls == '__background__': continue
+            print('Collecting {} results ({:d}/{:d})'.format(cls, cls_ind, self.num_classes - 1))
+            cat_id = self._class_to_cat_id[cls]
+            results.extend(self._coco_segm_results_one_category(
+                all_boxes[cls_ind], all_masks[cls_ind], cat_id, gt_recs))
+        print('Writing results json to {}'.format(filename))
+        with open(filename, 'w') as fid: json.dump(results, fid)
+        return filename
+    def _do_coco_bbox_eval(self, coco, res_file):
+        from lib.pycocotools.cocoeval import COCOeval
+        coco_dt = coco.loadRes(res_file)
+        coco_eval = COCOeval(coco, coco_dt, 'bbox')
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        self._print_coco_eval_results(coco_eval)
+    def _do_coco_segm_eval(self, coco, res_file):
+        from lib.pycocotools.cocoeval import COCOeval
+        coco_dt = coco.loadRes(res_file)
+        coco_eval = COCOeval(coco, coco_dt, 'segm')
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        self._print_coco_eval_results(coco_eval)
+    def _print_coco_eval_results(self, coco_eval):
+        IoU_lo_thresh = 0.5
+        IoU_hi_thresh = 0.95
+        def _get_thr_ind(coco_eval, thr):
+            ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
+                           (coco_eval.params.iouThrs < thr + 1e-5))[0][0]
+            iou_thr = coco_eval.params.iouThrs[ind]
+            assert np.isclose(iou_thr, thr)
+            return ind
+        ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
+        ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)
+        # Precision has dims (iou, recall, cls, area range, max dets)
+        # Area range index 0: all area ranges
+        # Max dets index 2: 100 per image
+        precision = \
+            coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
+        ap_default = np.mean(precision[precision > -1])
+        print ('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
+               '~~~~'.format(IoU_lo_thresh, IoU_hi_thresh))
+        print('{:.1f}'.format(100 * ap_default))
+        for cls_ind, cls in enumerate(self.classes):
+            if cls == '__background__':
+                continue
+            # Minus 1 because of __background__
+            precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
+            ap = np.mean(precision[precision > -1])
+            print('{:.1f}'.format(100 * ap))
+        print('~~~~ Summary metrics ~~~~')
+        coco_eval.summarize()
+    ##############################################
+    #                                            #
+    #                    EVAL-API                #
+    #                                            #
+    ##############################################
+    def evaluate_detections(self, all_boxes, gt_recs, output_dir):
+        protocol = cfg.TEST.PROTOCOL
+        if 'voc' in protocol:
+            self._write_voc_bbox_results(all_boxes, gt_recs, output_dir)
+            if not 'wo' in protocol:
+                print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
+                self._do_voc_bbox_eval(gt_recs, output_dir,
+                    IoU=0.5, use_07_metric='2007' in protocol)
+                print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
+                self._do_voc_bbox_eval(gt_recs, output_dir,
+                    IoU=0.7, use_07_metric='2007' in protocol)
+        elif 'coco' in protocol:
+            from lib.pycocotools.coco import COCO
+            if os.path.exists(cfg.TEST.JSON_FILE):
+                coco = COCO(cfg.TEST.JSON_FILE)
+                # We should override category id before writing results
+                cats = coco.loadCats(coco.getCatIds())
+                self._class_to_cat_id = dict(
+                    zip([c['name'] for c in cats], coco.getCatIds()))
+            else: coco = None
+            res_file = self._write_coco_bbox_results(all_boxes, gt_recs, output_dir)
+            if not 'wo' in protocol:
+                if coco is None: coco = COCO(self._write_coco_bbox_annotations(gt_recs, output_dir))
+                self._do_coco_bbox_eval(coco, res_file)
+    def evaluate_segmentations(self, all_boxes, all_masks, gt_recs, output_dir):
+        protocol = cfg.TEST.PROTOCOL
+        if 'voc' in protocol:
+            self._write_voc_segm_results(all_boxes, all_masks, output_dir)
+            if not 'wo' in protocol:
+                print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
+                self._do_voc_segm_eval(gt_recs, output_dir,
+                    IoU=0.5, use_07_metric='2007' in protocol)
+                print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
+                self._do_voc_segm_eval(gt_recs, output_dir,
+                    IoU=0.7, use_07_metric='2007' in protocol)
+        elif 'coco' in protocol:
+            from lib.pycocotools.coco import COCO
+            if os.path.exists(cfg.TEST.JSON_FILE):
+                coco = COCO(cfg.TEST.JSON_FILE)
+                # We should override category id before writing results
+                cats = coco.loadCats(coco.getCatIds())
+                self._class_to_cat_id = dict(
+                    zip([c['name'] for c in cats], coco.getCatIds()))
+            else: coco = None
+            res_file = self._write_coco_segm_results(all_boxes, all_masks, gt_recs, output_dir)
+            if not 'wo' in protocol:
+                if coco is None: coco = COCO(self._write_coco_segm_annotations(gt_recs, output_dir))
+                self._do_coco_segm_eval(coco, res_file)
+    def competition_mode(self, on):
+        if on:
+            self.config['use_salt'] = False
+            self.config['cleanup'] = False
+        else:
+            self.config['use_salt'] = True
+            self.config['cleanup'] = True
\ No newline at end of file
--- a/lib/datasets/voc_eval.py
+++ b/lib/datasets/voc_eval.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import cv2
+import numpy as np
+try:
+    import cPickle
+except:
+    import pickle as cPickle
+from lib.core.config import cfg
+from lib.utils.mask_transform import mask_overlap
+from lib.utils.boxes import expand_boxes
+from lib.pycocotools.mask_utils import mask_rle2im
+def voc_ap(rec, prec, use_07_metric=False):
+    """ ap = voc_ap(rec, prec, [use_07_metric])
+    Compute VOC AP given precision and recall.
+    If use_07_metric is true, uses the
+    VOC 07 11 point method (default:False).
+    """
+    if use_07_metric:
+        # 11 point metric
+        ap = 0.
+        for t in np.arange(0., 1.1, 0.1):
+            if np.sum(rec >= t) == 0:
+                p = 0
+            else:
+                p = np.max(prec[rec >= t])
+            ap = ap + p / 11.
+    else:
+        # correct AP calculation
+        # first append sentinel values at the end
+        mrec = np.concatenate(([0.], rec, [1.]))
+        mpre = np.concatenate(([0.], prec, [0.]))
+        # compute the precision envelope
+        for i in range(mpre.size - 1, 0, -1):
+            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+        # to calculate area under PR curve, look for points
+        # where X axis (recall) changes value
+        i = np.where(mrec[1:] != mrec[:-1])[0]
+        # and sum (\Delta recall) * prec
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+def voc_bbox_eval(det_file, gt_recs, cls_name,
+                  IoU=0.5, use_07_metric=False):
+    class_recs = {}
+    n_pos = 0
+    for image_name, rec in gt_recs.items():
+        R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
+        bbox = np.array([x['bbox'] for x in R])
+        difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
+        det = [False] * len(R)
+        n_pos = n_pos + sum(~difficult)
+        class_recs[image_name] = {
+            'bbox': bbox,
+            'difficult': difficult,
+            'det': det
+        }
+    # read detections
+    with open(det_file, 'r') as f: lines = f.readlines()
+    splitlines = [x.strip().split(' ') for x in lines]
+    image_ids = [x[0] for x in splitlines]
+    confidence = np.array([float(x[1]) for x in splitlines])
+    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
+    # avoid IndexError if detecting nothing
+    if len(BB) == 0: return 0, 0, -1
+    # sort by confidence
+    sorted_ind = np.argsort(-confidence)
+    BB = BB[sorted_ind, :]
+    image_ids = [image_ids[x] for x in sorted_ind]
+    # go down dets and mark TPs and FPs
+    nd = len(image_ids)
+    tp = np.zeros(nd)
+    fp = np.zeros(nd)
+    for d in range(nd):
+        R = class_recs[image_ids[d]]
+        bb = BB[d, :].astype(float)
+        ovmax = -np.inf
+        BBGT = R['bbox'].astype(float)
+        if BBGT.size > 0:
+            # compute overlaps
+            # intersection
+            ixmin = np.maximum(BBGT[:, 0], bb[0])
+            iymin = np.maximum(BBGT[:, 1], bb[1])
+            ixmax = np.minimum(BBGT[:, 2], bb[2])
+            iymax = np.minimum(BBGT[:, 3], bb[3])
+            iw = np.maximum(ixmax - ixmin + 1., 0.)
+            ih = np.maximum(iymax - iymin + 1., 0.)
+            inters = iw * ih
+            # union
+            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
+                (BBGT[:, 2] - BBGT[:, 0] + 1.) *
+                (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
+            overlaps = inters / uni
+            ovmax = np.max(overlaps)
+            jmax = np.argmax(overlaps)
+        if ovmax > IoU:
+            if not R['difficult'][jmax]:
+                if not R['det'][jmax]:
+                    tp[d] = 1.
+                    R['det'][jmax] = 1
+                else:
+                    fp[d] = 1.
+        else:
+            fp[d] = 1.
+    # compute precision recall
+    fp = np.cumsum(fp)
+    tp = np.cumsum(tp)
+    rec = tp / float(n_pos)
+    # avoid divide by zero in case the first detection matches a difficult
+    # ground truth
+    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
+    ap = voc_ap(rec, prec, use_07_metric)
+    return rec, prec, ap
+def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
+                  IoU=0.5, use_07_metric=False):
+    # 0. Constants
+    M = cfg.MRCNN.RESOLUTION
+    binary_thresh = cfg.TEST.BINARY_THRESH
+    scale = (M + 2.0) / M
+    padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
+    # 1. Get bbox & mask ground truths
+    image_names, class_recs, n_pos = [], {}, 0
+    for image_name, rec in gt_recs.items():
+        R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
+        bbox = np.array([x['bbox'] for x in R])
+        mask = np.array([mask_rle2im([x['mask']], rec['height'], rec['width'])[0] for x in R])
+        difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
+        det = [False] * len(R)
+        n_pos = n_pos + sum(~difficult)
+        class_recs[image_name] = {
+            'bbox': bbox,
+            'mask': mask,
+            'difficult': difficult,
+            'det': det
+        }
+        image_names.append(image_name)
+    # 2. Get predict pickle file for this class
+    with open(det_file, 'rb') as f: boxes_pkl = cPickle.load(f)
+    with open(seg_file, 'rb') as f: masks_pkl = cPickle.load(f)
+    # 3. Pre-compute number of total instances to allocate memory
+    num_images = len(gt_recs)
+    box_num = 0
+    for im_i in range(num_images):
+        box_num += len(boxes_pkl[im_i])
+    # avoid IndexError if detecting nothing
+    if box_num == 0: return 0, 0, -1
+    # 4. Re-organize all the predicted boxes
+    new_boxes = np.zeros((box_num, 5))
+    new_masks = np.zeros((box_num, M, M))
+    new_images = []
+    cnt = 0
+    for image_ind in range(num_images):
+        boxes = boxes_pkl[image_ind]
+        masks = masks_pkl[image_ind]
+        num_instance = len(boxes)
+        for box_ind in range(num_instance):
+            new_boxes[cnt] = boxes[box_ind]
+            new_masks[cnt] = masks[box_ind]
+            new_images.append(image_names[image_ind])
+            cnt += 1
+    # 5. Rearrange boxes according to their scores
+    seg_scores = new_boxes[:, -1]
+    keep_inds = np.argsort(-seg_scores)
+    new_boxes = new_boxes[keep_inds, :]
+    new_masks = new_masks[keep_inds, :, :]
+    num_pred = new_boxes.shape[0]
+    # 6. Calculate t/f positive
+    fp = np.zeros((num_pred, 1))
+    tp = np.zeros((num_pred, 1))
+    ref_boxes = expand_boxes(new_boxes, scale)
+    ref_boxes = ref_boxes.astype(np.int32)
+    for i in range(num_pred):
+        image_name = new_images[keep_inds[i]]
+        if image_name not in class_recs:
+            print('Warning: {} does not exist in the ground-truths.'.format(image_name))
+            fp[i] = 1
+            continue
+        R = class_recs[image_name]
+        im_h, im_w = \
+            gt_recs[image_name]['height'], \
+                gt_recs[image_name]['width']
+        # decode mask
+        ref_box = ref_boxes[i, :4]
+        mask = new_masks[i]
+        padded_mask[1:-1, 1:-1] = mask[:, :]
+        w = ref_box[2] - ref_box[0] + 1
+        h = ref_box[3] - ref_box[1] + 1
+        w = np.maximum(w, 1)
+        h = np.maximum(h, 1)
+        mask = cv2.resize(padded_mask, (w, h))
+        mask = np.array(mask > binary_thresh, dtype=np.uint8)
+        x1 = max(ref_box[0], 0)
+        y1 = max(ref_box[1], 0)
+        x2 = min(ref_box[2] + 1, im_w)
+        y2 = min(ref_box[3] + 1, im_h)
+        pred_mask = mask[(y1 - ref_box[1]): (y2 - ref_box[1]),
+                         (x1 - ref_box[0]): (x2 - ref_box[0])]
+        # calculate max region overlap
+        ovmax = -1; jmax = -1
+        for j in range(len(R['det'])):
+            gt_mask_bound = R['bbox'][j].astype(int)
+            pred_mask_bound = new_boxes[i, :4].astype(int)
+            crop_mask = R['mask'][j][gt_mask_bound[1] : gt_mask_bound[3] + 1,
+                                     gt_mask_bound[0] : gt_mask_bound[2] + 1]
+            ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask)
+            if ov > ovmax:
+                ovmax = ov
+                jmax = j
+        if ovmax > IoU:
+            if not R['difficult'][jmax]:
+                if not R['det'][jmax]:
+                    tp[i] = 1.
+                    R['det'][jmax] = 1
+                else:
+                    fp[i] = 1.
+        else:
+            fp[i] = 1
+    # 7. Calculate precision
+    fp = np.cumsum(fp)
+    tp = np.cumsum(tp)
+    rec = tp / float(n_pos)
+    # avoid divide by zero in case the first matches a difficult gt
+    prec = tp / np.maximum(fp + tp, np.finfo(np.float64).eps)
+    ap = voc_ap(rec, prec, use_07_metric=use_07_metric)
+    return ap
\ No newline at end of file
--- a/lib/faster_rcnn/__init__.py
+++ b/lib/faster_rcnn/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from lib.faster_rcnn.layers.data_layer import DataLayer
+from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer
+from lib.faster_rcnn.layers.proposal_layer import ProposalLayer
+from lib.faster_rcnn.layers.proposal_target_layer import ProposalTargetLayer
\ No newline at end of file
--- a/lib/faster_rcnn/data/__init__.py
+++ b/lib/faster_rcnn/data/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
\ No newline at end of file
--- a/lib/faster_rcnn/data/blob_fetcher.py
+++ b/lib/faster_rcnn/data/blob_fetcher.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+from multiprocessing import Process
+from lib.core.config import cfg
+from lib.utils.blob import im_list_to_blob
+class BlobFetcher(Process):
+    def __init__(self, **kwargs):
+        super(BlobFetcher, self).__init__()
+        self.Q1_in = self.Q2_in = self.Q_out = None
+        self.daemon = True
+    def get(self, Q_in):
+        processed_ims = []; ims_info = []; all_boxes = []
+        for ix in range(cfg.TRAIN.IMS_PER_BATCH):
+            im, im_scale, gt_boxes = Q_in.get()
+            processed_ims.append(im)
+            ims_info.append(list(im.shape[0:2]) + [im_scale])
+            # Encode boxes by adding the idx of images
+            im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), dtype=np.float32)
+            im_boxes[:, 0:gt_boxes.shape[1]] = gt_boxes
+            im_boxes[:, -1] = ix
+            all_boxes.append(im_boxes)
+        return {
+            'data': im_list_to_blob(processed_ims),
+            'ims_info': np.array(ims_info, dtype=np.float32),
+            'gt_boxes': np.concatenate(all_boxes, axis=0),
+        }
+    def run(self):
+        while True:
+            if self.Q1_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
+                self.Q_out.put(self.get(self.Q1_in))
+            elif self.Q2_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
+                self.Q_out.put(self.get(self.Q2_in))
\ No newline at end of file
--- a/lib/faster_rcnn/data/data_batch.py
+++ b/lib/faster_rcnn/data/data_batch.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import time
+import pprint
+from multiprocessing import Queue
+import dragon.core.mpi as mpi
+from lib.core.config import cfg
+import lib.utils.logger as logger
+from lib.faster_rcnn.data.data_reader import DataReader
+from lib.faster_rcnn.data.data_transformer import DataTransformer
+from lib.faster_rcnn.data.blob_fetcher import BlobFetcher
+class DataBatch(object):
+    """DataBatch aims to prefetch data by ``Triple-Buffering``.
+    It takes full advantages of the Process/Thread of Python,
+    which provides remarkable I/O speed up for scalable distributed training.
+    """
+    def __init__(self, **kwargs):
+        """Construct a ``DataBatch``.
+        Parameters
+        ----------
+        source : str
+            The path of database.
+        multiple_nodes: boolean
+            Whether to split data for multiple parallel nodes. Default is ``False``.
+        shuffle : boolean
+            Whether to shuffle the data. Default is ``False``.
+        num_chunks : int
+            The number of chunks to split. Default is ``2048``.
+        chunk_size : int
+            The size(MB) of each chunk. Default is -1 (Refer ``num_chunks``).
+        batch_size : int
+            The size of a training batch.
+        partition : boolean
+            Whether to partition batch. Default is ``False``.
+        prefetch : int
+            The prefetch count. Default is ``5``.
+        """
+        super(DataBatch, self).__init__()
+        # Init mpi
+        global_rank = 0; local_rank = 0; group_size = 1
+        if mpi.Is_Init():
+            idx, group = mpi.AllowParallel()
+            if idx != -1:  # DataParallel
+                global_rank = mpi.Rank()
+                group_size = len(group)
+                for i, node in enumerate(group):
+                    if global_rank == node: local_rank = i
+        kwargs['group_size'] = group_size
+        # Configuration
+        self._prefetch = kwargs.get('prefetch', 5)
+        self._num_readers = kwargs.get('num_readers', 1)
+        self._num_transformers = kwargs.get('num_transformers', -1)
+        self._max_transformers = kwargs.get('max_transformers', 3)
+        self._num_fetchers = kwargs.get('num_fetchers', 1)
+        # Io-Aware Policy
+        if self._num_transformers == -1:
+            self._num_transformers = 2
+            # Add 1 transformer for color augmentation
+            if cfg.TRAIN.COLOR_JITTERING:
+                self._num_transformers += 1
+        self._num_transformers = min(self._num_transformers, self._max_transformers)
+        self._batch_size = kwargs.get('batch_size', 100)
+        self._partition = kwargs.get('partition', False)
+        if self._partition:
+            self._batch_size = int(self._batch_size / kwargs['group_size'])
+        # Init queues
+        self.Q_level_1 = Queue(self._prefetch * self._num_readers * self._batch_size)
+        self.Q1_level_2 = Queue(self._prefetch * self._num_readers * self._batch_size)
+        self.Q2_level_2 = Queue(self._prefetch * self._num_readers * self._batch_size)
+        self.Q_level_3 = Queue(self._prefetch * self._num_readers)
+        # Init readers
+        self._readers = []
+        for i in range(self._num_readers):
+            self._readers.append(DataReader(**kwargs))
+            self._readers[-1].Q_out = self.Q_level_1
+        for i in range(self._num_readers):
+            num_parts = self._num_readers
+            part_idx = i
+            if self._readers[i]._multiple_nodes or \
+                    self._readers[i]._use_shuffle:
+                num_parts *= group_size
+                part_idx += local_rank * self._num_readers
+            self._readers[i]._num_parts = num_parts
+            self._readers[i]._part_idx = part_idx
+            self._readers[i]._random_seed += part_idx
+            self._readers[i].start()
+            time.sleep(0.1)
+        # Init transformers
+        self._transformers = []
+        for i in range(self._num_transformers):
+            transformer = DataTransformer(**kwargs)
+            transformer._random_seed += (i + local_rank * self._num_transformers)
+            transformer.Q_in = self.Q_level_1
+            transformer.Q1_out = self.Q1_level_2
+            transformer.Q2_out = self.Q2_level_2
+            transformer.start()
+            self._transformers.append(transformer)
+            time.sleep(0.1)
+        # Init blob fetchers
+        self._fetchers = []
+        for i in range(self._num_fetchers):
+            fetcher = BlobFetcher(**kwargs)
+            fetcher.Q1_in = self.Q1_level_2
+            fetcher.Q2_in = self.Q2_level_2
+            fetcher.Q_out = self.Q_level_3
+            fetcher.start()
+            self._fetchers.append(fetcher)
+            time.sleep(0.1)
+        # Prevent to echo multiple nodes
+        if local_rank == 0: self.echo()
+        def cleanup():
+            def terminate(processes):
+                for process in processes:
+                    process.terminate()
+                    process.join()
+            terminate(self._fetchers)
+            logger.info('Terminating BlobFetcher ......')
+            terminate(self._transformers)
+            logger.info('Terminating DataTransformer ......')
+            terminate(self._readers)
+            logger.info('Terminating DataReader......')
+        import atexit
+        atexit.register(cleanup)
+    def get(self):
+        """Get a batch.
+        Returns
+        -------
+        dict
+            The batch dict.
+        """
+        return self.Q_level_3.get()
+    def echo(self):
+        """Print I/O Information.
+        Returns
+        -------
+        None
+        """
+        print('---------------------------------------------------------')
+        print('BatchFetcher({} Threads), Using config:'.format(
+            self._num_readers + self._num_transformers + self._num_fetchers))
+        params = {'queue_size': self._prefetch,
+                  'n_readers': self._num_readers,
+                  'n_transformers': self._num_transformers,
+                  'n_fetchers': self._num_fetchers}
+        pprint.pprint(params)
+        print('---------------------------------------------------------')
--- a/lib/faster_rcnn/data/data_reader.py
+++ b/lib/faster_rcnn/data/data_reader.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import math
+import numpy as np
+import numpy.random as npr
+from multiprocessing import Process
+import dragon.config as config
+from dragon.tools.db import LMDB
+class DataReader(Process):
+    """DataReader is deployed to queue encoded str from `LMDB`_.
+    It is supported to adaptively partition and shuffle records over all distributed nodes.
+    """
+    def __init__(self, **kwargs):
+        """Construct a ``DataReader``.
+        Parameters
+        ----------
+        source : str
+            The path of database.
+        multiple_nodes: boolean
+            Whether to split data for multiple parallel nodes. Default is ``False``.
+        shuffle : boolean
+            Whether to shuffle the data. Default is ``False``.
+        num_chunks : int
+            The number of chunks to split. Default is ``2048``.
+        chunk_size : int
+            The size(MB) of each chunk. Default is -1 (Refer ``num_chunks``).
+        """
+        super(DataReader, self).__init__()
+        self._source = kwargs.get('source', '')
+        self._multiple_nodes = kwargs.get('multiple_nodes', False)
+        self._use_shuffle = kwargs.get('shuffle', False)
+        self._use_instance_chunk = kwargs.get('instance_chunk', False)
+        self._num_chunks = kwargs.get('num_chunks', 2048)
+        self._chunk_size = kwargs.get('chunk_size', -1)
+        self._part_idx, self._num_parts = 0, 1
+        self._cur_idx, self._cur_chunk_idx = 0, 0
+        self._random_seed = config.GetRandomSeed()
+        self.Q_out = None
+        self.daemon = True
+    def element(self):
+        """Get the value of current record.
+        Returns
+        -------
+        str
+            The encoded str.
+        """
+        return self._db.value()
+    def redirect(self, target_idx):
+        """Redirect to the target position.
+        Parameters
+        ----------
+        target_idx : int
+            The key of instance in ``LMDB``.
+        Returns
+        -------
+        None
+        Notes
+        -----
+        The redirection reopens the ``LMDB``.
+        You can drop caches by ``echo 3 > /proc/sys/vm/drop_caches``.
+        This will disturb getting stuck when ``Database Size`` >> ``RAM Size``.
+        """
+        self._db.close()
+        self._db.open(self._source)
+        self._cur_idx = target_idx
+        self._db.set(str(self._cur_idx).zfill(self._zfill))
+    def reset(self):
+        """Reset the cursor and environment.
+        Returns
+        -------
+        None
+        """
+        if self._multiple_nodes or self._use_shuffle:
+            if self._use_shuffle: self._perm = npr.permutation(self._num_shuffle_parts)
+            self._cur_chunk_idx = 0
+            self._start_idx = int(self._part_idx * self._num_shuffle_parts + self._perm[self._cur_chunk_idx])
+            self._start_idx = int(self._start_idx * self._chunk_size)
+            if self._start_idx >= self._num_entries: self.next_chunk()
+            self._end_idx = self._start_idx + self._chunk_size
+            self._end_idx = min(self._num_entries, self._end_idx)
+        else:
+            self._start_idx = 0
+            self._end_idx = self._num_entries
+        self.redirect(self._start_idx)
+    def next_record(self):
+        """Step the cursor of records.
+        Returns
+        -------
+        None
+        """
+        self._cur_idx += 1
+        self._db.next()
+    def next_chunk(self):
+        """Step the cursor of shuffling chunks.
+        Returns
+        -------
+        None
+        """
+        self._cur_chunk_idx += 1
+        if self._cur_chunk_idx >= self._num_shuffle_parts: self.reset()
+        else:
+            self._start_idx = self._part_idx * self._num_shuffle_parts + self._perm[self._cur_chunk_idx]
+            self._start_idx = self._start_idx * self._chunk_size
+            if self._start_idx >= self._num_entries: self.next_chunk()
+            else:
+                self._end_idx = self._start_idx + self._chunk_size
+                self._end_idx = min(self._num_entries, self._end_idx)
+            self.redirect(self._start_idx)
+    def run(self):
+        """Start the process.
+        Returns
+        -------
+        None
+        """
+        # fix seed
+        npr.seed(self._random_seed)
+        # init db
+        self._db = LMDB()
+        self._db.open(self._source)
+        self._zfill = self._db.zfill()
+        self._num_entries = self._db.num_entries()
+        self._epoch_size = int(self._num_entries/ self._num_parts + 1)
+        if self._use_shuffle:
+            if self._chunk_size == 1:
+                # Each chunk has at most 1 record [For Fully Shuffle]
+                self._chunk_size, self._num_shuffle_parts = \
+                    1, int(self._num_entries / self._num_parts) + 1
+            else:
+                if self._use_shuffle and self._chunk_size == -1:
+                    # Search a optimal chunk size by chunks [For Chunk Shuffle]
+                    max_chunk_size = self._db._total_size / ((self._num_chunks * (1 << 20)))
+                    min_chunk_size = 1
+                    while min_chunk_size * 2 < max_chunk_size: min_chunk_size *= 2
+                    self._chunk_size = min_chunk_size
+                    self._num_shuffle_parts = int(math.ceil(self._db._total_size * 1.1 /
+                                                 (self._num_parts * self._chunk_size << 20)))
+                    self._chunk_size = int(self._num_entries / self._num_shuffle_parts / self._num_parts + 1)
+                    limit = (self._num_parts - 0.5) * self._num_shuffle_parts * self._chunk_size
+                    if self._num_entries <= limit:
+                        # Roll back to fully shuffle
+                        self._chunk_size, self._num_shuffle_parts = \
+                            1, int(self._num_entries / self._num_parts) + 1
+        else:
+            # Each chunk has at most K records [For Multiple Nodes]
+            # Note that if ``shuffle`` and ``multiple_nodes`` are all ``False``,
+            # ``chunk_size`` and ``num_shuffle_parts`` are meaningless
+            self._chunk_size = int(self._num_entries / self._num_parts) + 1
+            self._num_shuffle_parts = 1
+        self._perm = np.arange(self._num_shuffle_parts)
+        # Init env
+        self.reset()
+        # Run!
+        while True:
+            self.Q_out.put(self.element())
+            self.next_record()
+            if self._cur_idx >= self._end_idx:
+                if self._multiple_nodes or \
+                    self._use_shuffle: self.next_chunk()
+                else: self.reset()
\ No newline at end of file
--- a/lib/faster_rcnn/data/data_transformer.py
+++ b/lib/faster_rcnn/data/data_transformer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from multiprocessing import Process
+import numpy as np
+import numpy.random as npr
+try:
+    import cv2
+except ImportError as e:
+    print('Failed to import cv2. Error: {0}'.format(str(e)))
+try:
+    import PIL.Image
+except ImportError as e:
+    print('Failed to import PIL. Error: {0}'.format(str(e)))
+from lib.core.config import cfg
+from lib.proto import anno_pb2 as pb
+from lib.utils.blob import prep_im_for_blob
+import lib.utils.logger as logger
+class DataTransformer(Process):
+    def __init__(self, **kwargs):
+        super(DataTransformer, self).__init__()
+        self._random_seed = cfg.RNG_SEED
+        self._use_flipped = cfg.TRAIN.USE_FLIPPED
+        self._use_diff = cfg.TRAIN.USE_DIFF
+        self._classes = kwargs.get('classes', ('__background__',))
+        self._num_classes = len(self._classes)
+        self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
+        self._queues = []
+        self.Q_in = self.Q1_out = self.Q2_out = None
+        self.daemon = True
+    def make_roidb(self, ann_datum, im_scale, flip=False, offsets=None):
+        annotations = ann_datum.annotation
+        n_objects = 0
+        if not self._use_diff:
+            for ann in annotations:
+                if not ann.difficult: n_objects += 1
+        else: n_objects = len(annotations)
+        roidb = {
+            'width': ann_datum.datum.width,
+            'height': ann_datum.datum.height,
+            'gt_classes': np.zeros((n_objects,), dtype=np.int32),
+            'boxes': np.zeros((n_objects, 4), dtype=np.float32),
+        }
+        ix = 0
+        for ann in annotations:
+           if not self._use_diff and ann.difficult: continue
+           roidb['boxes'][ix, :] = [
+               max(0, ann.x1), max(0, ann.y1),
+                min(ann.x2, ann_datum.datum.width - 1),
+                min(ann.y2, ann_datum.datum.height - 1)]
+           roidb['gt_classes'][ix] = self._class_to_ind[ann.name]
+           ix += 1
+        if flip: roidb['boxes'] = _flip_boxes(roidb['boxes'], roidb['width'])
+        roidb['boxes'] *= im_scale
+        if offsets is not None:
+            roidb['boxes'][:, 0::2] += offsets[0]
+            roidb['boxes'][:, 1::2] += offsets[1]
+            roidb['boxes'][:, :] = np.minimum(
+                np.maximum(roidb['boxes'][:, :], 0),
+                    [offsets[2][1] - 1, offsets[2][0] - 1] * 2)
+        return roidb
+    @classmethod
+    def get_image(cls, serialized):
+        datum = pb.AnnotatedDatum()
+        datum.ParseFromString(serialized)
+        datum = datum.datum
+        im = np.fromstring(datum.data, np.uint8)
+        return cv2.imdecode(im, -1) if datum.encoded is True else \
+            im.reshape((datum.height, datum.width, datum.channels))
+    @classmethod
+    def get_annotations(cls, serialized):
+        datum = pb.AnnotatedDatum()
+        datum.ParseFromString(serialized)
+        filename = datum.filename
+        annotations = datum.annotation
+        objects = []
+        for ix, ann in enumerate(annotations):
+            objects.append({
+                'name': ann.name,
+                'difficult': int(ann.difficult),
+                'bbox': [ann.x1, ann.y1, ann.x2, ann.y2],
+                'mask': ann.mask,
+            })
+        return filename, objects
+    def get(self, serialized):
+        datum = pb.AnnotatedDatum()
+        datum.ParseFromString(serialized)
+        im_datum = datum.datum
+        im = np.fromstring(im_datum.data, np.uint8)
+        if im_datum.encoded is True: im = cv2.imdecode(im, -1)
+        else: im = im.reshape((im_datum.height, im_datum.width, im_datum.channels))
+        # Scale
+        scale_indices = npr.randint(0, high=len(cfg.TRAIN.SCALES))
+        target_size = cfg.TRAIN.SCALES[scale_indices]
+        im, im_scale, jitter = prep_im_for_blob(im, target_size, cfg.TRAIN.MAX_SIZE)
+        # Crop or Pad
+        offsets = None
+        if cfg.TRAIN.MAX_SIZE > 0:
+            if jitter != 1.0:
+                # To a rectangle (scale, max_size)
+                target_size = (np.array(im.shape[0:2]) / jitter).astype(np.int)
+                im, offsets = _get_image_with_target_size(target_size, im)
+        else:
+            # To a square (target_size, target_size)
+            im, offsets = _get_image_with_target_size([target_size] * 2, im)
+        # Flip
+        flip = False
+        if self._use_flipped:
+            if npr.randint(0, 2) > 0:
+                im = im[:, ::-1, :]
+                flip = True
+        # Datum -> RoIDB
+        roidb = self.make_roidb(datum, im_scale, flip, offsets)
+        # Post-Process for gt boxes
+        # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
+        gt_boxes = np.empty((len(roidb['gt_classes']), 5), dtype=np.float32)
+        gt_boxes[:, 0:4], gt_boxes[:, 4] = roidb['boxes'], roidb['gt_classes']
+        return im, im_scale, gt_boxes
+    def run(self):
+        npr.seed(self._random_seed)
+        while True:
+            serialized = self.Q_in.get()
+            data = self.get(serialized)
+            # Ensure that there should be at least 1 ground-truth
+            if len(data[2]) < 1: continue
+            aspect_ratio = float(data[0].shape[0]) / data[0].shape[1]
+            if aspect_ratio > 1.0: self.Q1_out.put(data)
+            else: self.Q2_out.put(data)
+def _flip_boxes(boxes, width):
+    flip_boxes = boxes.copy()
+    oldx1 = boxes[:, 0].copy()
+    oldx2 = boxes[:, 2].copy()
+    flip_boxes[:, 0] = width - oldx2 - 1
+    flip_boxes[:, 2] = width - oldx1 - 1
+    if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
+        logger.fatal('Encounter invalid coordinates after flipping boxes.')
+    return flip_boxes
+def _get_image_with_target_size(target_size, im):
+    im_shape = list(im.shape)
+    width_diff = target_size[1] - im_shape[1]
+    offset_crop_width = max(-width_diff // 2, 0)
+    offset_pad_width = max(width_diff // 2, 0)
+    height_diff = target_size[0] - im_shape[0]
+    offset_crop_height = max(-height_diff // 2, 0)
+    offset_pad_height = max(height_diff // 2, 0)
+    im_shape[0 : 2] = target_size
+    new_im = np.empty(im_shape, dtype=im.dtype)
+    new_im.fill(127)
+    new_im[offset_pad_height:offset_pad_height + im.shape[0],
+           offset_pad_width:offset_pad_width + im.shape[1]] = \
+        im[offset_crop_height:offset_crop_height + target_size[0],
+           offset_crop_width:offset_crop_width + target_size[1]]
+    return new_im, (offset_pad_width - offset_crop_width,
+        offset_pad_height - offset_crop_height, target_size)
\ No newline at end of file
--- a/lib/faster_rcnn/generate_anchors.py
+++ b/lib/faster_rcnn/generate_anchors.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/generate_anchors.py>
+#
+# ------------------------------------------------------------
+import numpy as np
+# Verify that we compute the same anchors as Shaoqing's matlab implementation:
+#
+#    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
+#    >> anchors
+#
+#    anchors =
+#
+#       -83   -39   100    56
+#      -175   -87   192   104
+#      -359  -183   376   200
+#       -55   -55    72    72
+#      -119  -119   136   136
+#      -247  -247   264   264
+#       -35   -79    52    96
+#       -79  -167    96   184
+#      -167  -343   184   360
+#array([[ -83.,  -39.,  100.,   56.],
+#       [-175.,  -87.,  192.,  104.],
+#       [-359., -183.,  376.,  200.],
+#       [ -55.,  -55.,   72.,   72.],
+#       [-119., -119.,  136.,  136.],
+#       [-247., -247.,  264.,  264.],
+#       [ -35.,  -79.,   52.,   96.],
+#       [ -79., -167.,   96.,  184.],
+#       [-167., -343.,  184.,  360.]])
+def generate_anchors(base_size=16, ratios=(0.5, 1, 2),
+                     scales=2**np.arange(3, 6)):
+    """
+    Generate anchor (reference) windows by enumerating aspect ratios X
+    scales wrt a reference (0, 0, 15, 15) window.
+    """
+    base_anchor = np.array([1, 1, base_size, base_size]) - 1
+    ratio_anchors = _ratio_enum(base_anchor, ratios)
+    anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
+                         for i in range(ratio_anchors.shape[0])])
+    return anchors
+def generate_anchors_v2(stride=16, ratios=(0.5, 1, 2),
+                        sizes=(32, 64, 128, 256, 512)):
+    """
+    Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
+    are centered on stride / 2, have (approximate) sqrt areas of the specified
+    sizes, and aspect ratios as given.
+    """
+    return generate_anchors(stride, ratios,
+        np.array(sizes, dtype=np.float) / stride)
+def _whctrs(anchor):
+    """
+    Return width, height, x center, and y center for an anchor (window).
+    """
+    w = anchor[2] - anchor[0] + 1
+    h = anchor[3] - anchor[1] + 1
+    x_ctr = anchor[0] + 0.5 * (w - 1)
+    y_ctr = anchor[1] + 0.5 * (h - 1)
+    return w, h, x_ctr, y_ctr
+def _mkanchors(ws, hs, x_ctr, y_ctr):
+    """
+    Given a vector of widths (ws) and heights (hs) around a center
+    (x_ctr, y_ctr), output a set of anchors (windows).
+    """
+    ws = ws[:, np.newaxis]
+    hs = hs[:, np.newaxis]
+    anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
+                         y_ctr - 0.5 * (hs - 1),
+                         x_ctr + 0.5 * (ws - 1),
+                         y_ctr + 0.5 * (hs - 1)))
+    return anchors
+def _ratio_enum(anchor, ratios):
+    """
+    Enumerate a set of anchors for each aspect ratio wrt an anchor.
+    """
+    w, h, x_ctr, y_ctr = _whctrs(anchor)
+    size = w * h
+    size_ratios = size / ratios
+    ws = np.round(np.sqrt(size_ratios))
+    hs = np.round(ws * ratios)
+    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+    return anchors
+def _scale_enum(anchor, scales):
+    """
+    Enumerate a set of anchors for each scale wrt an anchor.
+    """
+    w, h, x_ctr, y_ctr = _whctrs(anchor)
+    ws = w * scales
+    hs = h * scales
+    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+    return anchors
+if __name__ == '__main__':
+    print(generate_anchors())
\ No newline at end of file
--- a/lib/faster_rcnn/layers/__init__.py
+++ b/lib/faster_rcnn/layers/__init__.py
+# --------------------------------------------------------
+# Mask R-CNN @ Detectron
+# Copyright (c) 2017 SeetaTech
+# Written by Ting Pan
+# --------------------------------------------------------
\ No newline at end of file
--- a/lib/faster_rcnn/layers/anchor_target_layer.py
+++ b/lib/faster_rcnn/layers/anchor_target_layer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import numpy.random as npr
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.utils import logger
+from lib.utils.blob import to_tensor
+from lib.utils.cython_bbox import bbox_overlaps
+from lib.utils.bbox_transform import bbox_transform
+from lib.faster_rcnn.generate_anchors import generate_anchors
+class AnchorTargetLayer(torch.nn.Module):
+    """Assign anchors to ground-truth targets."""
+    def __init__(self):
+        super(AnchorTargetLayer, self).__init__()
+        # Load the basic configs
+        # C4 backbone takes the first stride
+        self.scales, self.stride, self.ratios = \
+            cfg.RPN.SCALES, \
+                cfg.RPN.STRIDES[0], \
+                    cfg.RPN.ASPECT_RATIOS
+        # Allow boxes to sit over the edge by a small amount
+        self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
+        # Generate base anchors
+        self.base_anchors = generate_anchors(
+            base_size=self.stride,
+            ratios=self.ratios,
+            scales=np.array(self.scales),
+        )
+    def forward(self, features, gt_boxes, ims_info):
+        """Produces anchor classification labels and bounding-box regression targets.
+        Parameters
+        ----------
+        features : sequence of dragon.vm.torch.Tensor
+            The features of specific conv layers.
+        gt_boxes : numpy.ndarray
+            The packed ground-truth boxes.
+        ims_info : numpy.ndarray
+            The information of input images.
+        """
+        num_images = cfg.TRAIN.IMS_PER_BATCH
+        gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
+        if len(gt_boxes_wide) != num_images:
+            logger.fatal('Input {} images, got {} slices of gt boxes.' \
+                .format(num_images, len(gt_boxes_wide)))
+        # Generate proposals from shifted anchors
+        height, width = features[0].shape[-2:]
+        shift_x = np.arange(0, width) * self.stride
+        shift_y = np.arange(0, height) * self.stride
+        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                            shift_x.ravel(), shift_y.ravel())).transpose()
+        # Add A anchors (1, A, 4) to
+        # cell K shifts (K, 1, 4) to get
+        # shift anchors (K, A, 4)
+        # Reshape to (K * A, 4) shifted anchors
+        A = self.base_anchors.shape[0]
+        K = shifts.shape[0]
+        all_anchors = (self.base_anchors.reshape((1, A, 4)) +
+                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
+        all_anchors = all_anchors.reshape((K * A, 4))
+        total_anchors = int(K * A)
+        # label: 1 is positive, 0 is negative, -1 is dont care
+        all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32)
+        all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
+        all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
+        all_bbox_outside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
+        for ix in range(num_images):
+            # GT boxes (x1, y1, x2, y2, label)
+            gt_boxes = gt_boxes_wide[ix]
+            im_info = ims_info[ix]
+            if self._allowed_border >= 0:
+                # Only keep anchors inside the image
+                inds_inside = np.where(
+                    (all_anchors[:, 0] >= -self._allowed_border) &
+                    (all_anchors[:, 1] >= -self._allowed_border) &
+                    (all_anchors[:, 2] < im_info[1] + self._allowed_border) &  # width
+                    (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0] # height
+                anchors = all_anchors[inds_inside, :]
+            else:
+                inds_inside = np.arange(all_anchors.shape[0])
+                anchors = all_anchors
+            num_inside = len(inds_inside)
+            # label: 1 is positive, 0 is negative, -1 is don't care
+            labels = np.empty((num_inside,), dtype=np.float32)
+            labels.fill(-1)
+            # Overlaps between the anchors and the gt boxes
+            overlaps = bbox_overlaps(
+                np.ascontiguousarray(anchors, dtype=np.float),
+                np.ascontiguousarray(gt_boxes, dtype=np.float),
+            )
+            argmax_overlaps = overlaps.argmax(axis=1)
+            max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
+            gt_argmax_overlaps = overlaps.argmax(axis=0)
+            gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
+            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
+            if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
+                # Assign bg labels first so that positive labels can clobber them
+                labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
+            # fg label: for each gt, anchor with highest overlap
+            labels[gt_argmax_overlaps] = 1
+            # fg label: above threshold IOU
+            labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
+            if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
+                # Assign bg labels last so that negative labels can clobber positives
+                labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
+            # Subsample positive labels if we have too many
+            num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
+            fg_inds = np.where(labels == 1)[0]
+            if len(fg_inds) > num_fg:
+                disable_inds = npr.choice(
+                    fg_inds, size=(len(fg_inds) - num_fg), replace=False)
+                labels[disable_inds] = -1
+                fg_inds = np.where(labels == 1)[0]
+            # Subsample negative labels if we have too many
+            num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
+            bg_inds = np.where(labels == 0)[0]
+            if len(bg_inds) > num_bg:
+                disable_inds = npr.choice(
+                    bg_inds, size=(len(bg_inds) - num_bg), replace=False)
+                labels[disable_inds] = -1
+            bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_targets[fg_inds, :] = bbox_transform(
+                anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], 0:4])
+            bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
+            bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
+            bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
+            all_labels[ix, inds_inside] = labels  # label
+            all_bbox_targets[ix, inds_inside] = bbox_targets
+            all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights
+            all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights
+        # labels
+        labels = all_labels.reshape(
+            (num_images, height, width, A)).transpose(0, 3, 1, 2)
+        labels = labels.reshape((num_images, total_anchors))
+        # bbox_targets
+        bbox_targets = all_bbox_targets.reshape(
+            (num_images, height, width, A * 4)).transpose(0, 3, 1, 2)
+        # bbox_inside_weights
+        bbox_inside_weights = all_bbox_inside_weights.reshape(
+            (num_images, height, width, A * 4)).transpose(0, 3, 1, 2)
+        # bbox_outside_weights
+        bbox_outside_weights = all_bbox_outside_weights.reshape(
+            (num_images, height, width, A * 4)).transpose(0, 3, 1, 2)
+        return {
+            'labels': to_tensor(labels),
+            'bbox_targets': to_tensor(bbox_targets),
+            'bbox_inside_weights': to_tensor(bbox_inside_weights),
+            'bbox_outside_weights': to_tensor(bbox_outside_weights),
+        }
+def _dismantle_gt_boxes(gt_boxes, num_images):
+    return [
+        gt_boxes[
+            np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
+        ] for ix in range(num_images)
+    ]
\ No newline at end of file
--- a/lib/faster_rcnn/layers/data_layer.py
+++ b/lib/faster_rcnn/layers/data_layer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.datasets.factory import get_imdb
+from lib.faster_rcnn.data.data_batch import DataBatch
+class DataLayer(torch.nn.Module):
+    def __init__(self):
+        super(DataLayer, self).__init__()
+        database = get_imdb(cfg.TRAIN.DATABASE)
+        self.data_batch = DataBatch(**{
+            'source': database.source,
+            'classes': database.classes,
+            'shuffle': cfg.TRAIN.USE_SHUFFLE,
+            'multiple_nodes': True,
+            'chunk_size': 1, # Valid if using shuffle
+            'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
+        })
+    def forward(self):
+        # Get a mini-batch from the Queue
+        blobs = self.data_batch.get()
+        # Zero-Copy from numpy
+        blobs['data'] = torch.from_numpy(blobs['data'])
+        # Switch the data to Device
+        blobs['data'].cuda(cfg.GPU_ID)
+        return blobs
\ No newline at end of file
--- a/lib/faster_rcnn/layers/proposal_layer.py
+++ b/lib/faster_rcnn/layers/proposal_layer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# --------------------------------------------------------
+import numpy as np
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.utils.blob import to_tensor
+from lib.nms.nms_wrapper import nms
+from lib.faster_rcnn.generate_anchors import generate_anchors
+from lib.utils.bbox_transform import bbox_transform_inv, clip_boxes
+class ProposalLayer(torch.nn.Module):
+    """Outputs object detection proposals by applying estimated bounding-box
+    transformations to a set of regular boxes (called "anchors").
+    """
+    def __init__(self):
+        super(ProposalLayer, self).__init__()
+        # Load the basic configs
+        self.scales, self.stride, self.ratios = \
+            cfg.RPN.SCALES, cfg.RPN.STRIDES[0], cfg.RPN.ASPECT_RATIOS
+        # Generate base anchors
+        self.base_anchors = generate_anchors(
+            base_size=self.stride,
+            ratios=self.ratios,
+            scales=np.array(self.scales),
+        )
+    def forward(self, features, cls_prob, bbox_pred, ims_info):
+        cfg_key = 'TRAIN' if self.training else 'TEST'
+        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
+        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
+        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
+        min_size      = cfg[cfg_key].RPN_MIN_SIZE
+        # Get resources
+        num_images = ims_info.shape[0]
+        # Generate proposals from shifted anchors
+        height, width = cls_prob.shape[-2:]
+        shift_x = np.arange(0, width) * self.stride
+        shift_y = np.arange(0, height) * self.stride
+        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                            shift_x.ravel(), shift_y.ravel())).transpose()
+        # Add A anchors (1, A, 4) to
+        # cell K shifts (K, 1, 4) to get
+        # shift anchors (K, A, 4)
+        # Reshape to (K * A, 4) shifted anchors
+        A = self.base_anchors.shape[0]
+        K = shifts.shape[0]
+        anchors = self.base_anchors.reshape((1, A, 4)) + \
+                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
+        all_anchors = anchors.reshape((K * A, 4))
+        # Prepare for the outputs
+        batch_rois = []
+        # scores & deltas are (1, A, H, W) format
+        # Transpose to (1, H, W, A)
+        batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1))
+        batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1))
+        # Extract RoIs separately
+        for ix in range(num_images):
+            scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
+            deltas = batch_deltas[ix].reshape((-1, 4))
+            if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
+                order = np.argsort(-scores.squeeze())
+            else:
+                # Avoid sorting possibly large arrays; First partition to get top K
+                # unsorted and then sort just those (~20x faster for 200k scores)
+                inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
+                order = np.argsort(-scores[inds].squeeze())
+                order = inds[order]
+            deltas = deltas[order]
+            anchors = all_anchors[order]
+            scores = scores[order]
+            # 1. Convert anchors into proposals via bbox transformations
+            proposals = bbox_transform_inv(anchors, deltas)
+            # 2. Clip predicted boxes to image
+            proposals = clip_boxes(proposals, ims_info[ix, :2])
+            # 3. remove predicted boxes with either height or width < threshold
+            # (NOTE: convert min_size to input image scale stored in im_info[2])
+            keep = _filter_boxes(proposals, min_size * ims_info[ix, 2])
+            proposals = proposals[keep, :]
+            scores = scores[keep]
+            # 6. Apply nms (e.g. threshold = 0.7)
+            # 7. Take after_nms_topN (e.g. 300)
+            # 8. Return the top proposals (-> RoIs top)
+            keep = nms(np.hstack((proposals, scores)), nms_thresh)
+            if post_nms_topN > 0: keep = keep[:post_nms_topN]
+            proposals = proposals[keep, :]
+            # Output rois blob
+            batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
+            batch_inds.fill(ix)
+            rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
+            batch_rois.append(rpn_rois)
+        # Merge RoIs into a blob
+        rpn_rois = np.concatenate(batch_rois, axis=0)
+        if cfg_key == 'TRAIN': return rpn_rois
+        else: return [to_tensor(rpn_rois)]
+def _filter_boxes(boxes, min_size):
+    """Remove all boxes with any side smaller than min_size."""
+    ws = boxes[:, 2] - boxes[:, 0] + 1
+    hs = boxes[:, 3] - boxes[:, 1] + 1
+    keep = np.where((ws >= min_size) & (hs >= min_size))[0]
+    return keep
--- a/lib/faster_rcnn/layers/proposal_target_layer.py
+++ b/lib/faster_rcnn/layers/proposal_target_layer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# --------------------------------------------------------
+import numpy as np
+import numpy.random as npr
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.utils.blob import to_tensor
+from lib.utils.cython_bbox import bbox_overlaps
+from lib.utils.bbox_transform import bbox_transform
+class ProposalTargetLayer(torch.nn.Module):
+    """Assign object detection proposals to ground-truth targets.
+    Produces proposal classification labels and bounding-box regression targets.
+    """
+    def __init__(self):
+        super(ProposalTargetLayer, self).__init__()
+        self.num_classes = cfg.MODEL.NUM_CLASSES
+    def forward(self, rpn_rois, gt_boxes):
+        num_images = cfg.TRAIN.IMS_PER_BATCH
+        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
+        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
+        all_rois = rpn_rois
+        # GT boxes (x1, y1, x2, y2, label, has_mask)
+        gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
+        # Prepare for the outputs
+        keys = ['labels', 'rois', 'bbox_targets',
+                'bbox_inside_weights', 'bbox_outside_weights']
+        batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
+        # Generate targets separately
+        for ix in range(num_images):
+            gt_boxes = gt_boxes_wide[ix]
+            # Extract proposals for this image
+            rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
+            # Include ground-truth boxes in the set of candidate rois
+            inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
+            rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
+            rois_per_image = cfg.TRAIN.BATCH_SIZE
+            fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
+            labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
+                rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
+            bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
+            _fmap_batch([
+                labels,
+                rois, 
+                bbox_targets,
+                bbox_inside_weights,
+                bbox_outside_weights],
+                batch_outputs,
+                keys,
+            )
+        # Merge targets into blobs
+        for k, v in batch_outputs.items():
+            batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
+        return {
+            'rois': [to_tensor(batch_outputs['rois'])],
+            'labels': to_tensor(batch_outputs['labels']),
+            'bbox_targets': to_tensor(batch_outputs['bbox_targets']),
+            'bbox_inside_weights': to_tensor(batch_outputs['bbox_inside_weights']),
+            'bbox_outside_weights': to_tensor(batch_outputs['bbox_outside_weights']),
+        }
+def _get_bbox_regression_labels(bbox_target_data, num_classes):
+    """Bounding-box regression targets (bbox_target_data) are stored in a
+    compact form N x (class, tx, ty, tw, th)
+    This function expands those targets into the 4-of-4*K representation used
+    by the network (i.e. only one class has non-zero targets).
+    Returns:
+        bbox_target (ndarray): N x 4K blob of regression targets
+        bbox_inside_weights (ndarray): N x 4K blob of loss weights
+    """
+    clss = bbox_target_data[:, 0]
+    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
+    bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
+    inds = np.where(clss > 0)[0]
+    for ind in inds:
+        cls = clss[ind]
+        start = 4 * cls
+        end = start + 4
+        bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
+        bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
+    return bbox_targets, bbox_inside_weights
+def _compute_targets(ex_rois, gt_rois, labels):
+    """Compute bounding-box regression targets for an image."""
+    assert ex_rois.shape[0] == gt_rois.shape[0]
+    assert ex_rois.shape[1] == 4
+    assert gt_rois.shape[1] == 4
+    targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
+    return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
+def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
+    """Generate a random sample of RoIs comprising foreground and background examples."""
+    overlaps = bbox_overlaps(
+        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
+        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
+    gt_assignment = overlaps.argmax(axis=1)
+    max_overlaps = overlaps.max(axis=1)
+    labels = gt_boxes[gt_assignment, 4]
+    # Select foreground RoIs as those with >= FG_THRESH overlap
+    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
+    # Guard against the case when an image has fewer than fg_rois_per_image
+    # foreground RoIs
+    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
+    # Sample foreground regions without replacement
+    if fg_inds.size > 0:
+        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
+    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
+    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
+                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
+    # Compute number of background RoIs to take from this image (guarding
+    # against there being fewer than desired)
+    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
+    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
+    # Sample background regions without replacement
+    if bg_inds.size > 0:
+        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
+    # The indices that we're selecting (both fg and bg)
+    keep_inds = np.append(fg_inds, bg_inds)
+    # Select sampled values from various arrays:
+    labels = labels[keep_inds]
+    # Clamp labels for the background RoIs to 0
+    labels[fg_rois_per_this_image:] = 0
+    rois = all_rois[keep_inds]
+    bbox_target_data = _compute_targets(
+        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
+    bbox_targets, bbox_inside_weights = \
+        _get_bbox_regression_labels(bbox_target_data, num_classes)
+    return labels, rois, bbox_targets, bbox_inside_weights
+def _dismantle_gt_boxes(gt_boxes, num_images):
+    return [gt_boxes[np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]] \
+            for ix in range(num_images)]
+def _fmap_batch(inputs, outputs, keys):
+    for i, key in enumerate(keys):
+        outputs[key].append(inputs[i])
\ No newline at end of file
--- a/lib/faster_rcnn/test.py
+++ b/lib/faster_rcnn/test.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+try:
+    import cPickle
+except:
+    import pickle as cPickle
+import numpy as np
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.utils.image import scale_image
+from lib.utils.bbox_transform import clip_boxes, bbox_transform_inv
+from lib.nms.nms_wrapper import nms, soft_nms
+from lib.utils.timer import Timer
+from lib.utils.blob import im_list_to_blob, to_array
+from lib.utils.vis import vis_one_image
+def im_detect(detector, raw_image):
+    """Detect a image, with single or multiple scales.
+    """
+    # Prepare images
+    ims, ims_scale = scale_image(raw_image)
+    # Prepare blobs
+    blobs = {'data': im_list_to_blob(ims)}
+    blobs['ims_info'] = np.array([
+        list(blobs['data'].shape[1:3]) + [im_scale]
+            for im_scale in ims_scale], dtype=np.float32)
+    blobs['data'] = torch.from_numpy(blobs['data']).cuda(cfg.GPU_ID)
+    # Do Forward
+    with torch.no_grad():
+        outputs = detector.forward(inputs=blobs)
+    # Decode results
+    batch_rois = to_array(outputs['rois'])
+    batch_scores = to_array(outputs['cls_prob'])
+    batch_deltas = to_array(outputs['bbox_pred'])
+    batch_boxes = bbox_transform_inv(
+        batch_rois[:, 1:5], batch_deltas, cfg.BBOX_REG_WEIGHTS)
+    scores_wide = []; boxes_wide = []
+    for im_idx in range(len(ims)):
+        indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0]
+        boxes = batch_boxes[indices]
+        boxes /= ims_scale[im_idx]
+        clip_boxes(boxes, raw_image.shape)
+        scores_wide.append(batch_scores[indices])
+        boxes_wide.append(boxes)
+    return (np.vstack(scores_wide), np.vstack(boxes_wide)) \
+        if len(scores_wide) > 1 else (scores_wide[0], boxes_wide[0])
+def test_net(detector, server):
+    classes, num_images, num_classes = \
+        server.classes, server.num_images, server.num_classes
+    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
+    _t = {'im_detect' : Timer(), 'misc' : Timer()}
+    for i in range(num_images):
+        image_id, raw_image = server.get_image()
+        _t['im_detect'].tic()
+        scores, boxes = im_detect(detector, raw_image)
+        _t['im_detect'].toc()
+        _t['misc'].tic()
+        boxes_this_image = [[]]
+        for j in range(1, num_classes):
+            inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
+            cls_scores = scores[inds, j]
+            cls_boxes = boxes[inds, j*4:(j+1)*4]
+            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).\
+                            astype(np.float32, copy=False)
+            if cfg.TEST.USE_SOFT_NMS:
+                keep = soft_nms(cls_dets, cfg.TEST.NMS,
+                    method=cfg.TEST.SOFT_NMS_METHOD,
+                    sigma=cfg.TEST.SOFT_NMS_SIGMA)
+            else:
+                keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True)
+            cls_dets = cls_dets[keep, :]
+            all_boxes[j][i] = cls_dets
+            boxes_this_image.append(cls_dets)
+        if cfg.VIS or cfg.VIS_ON_FILE:
+            vis_one_image(raw_image, classes, boxes_this_image,
+                thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
+                    filename=server.get_save_filename(image_id))
+        # Limit to max_per_image detections *over all classes*
+        if cfg.TEST.DETECTIONS_PER_IM > 0:
+            image_scores = []
+            for j in range(1, num_classes):
+                if len(all_boxes[j][i]) < 1: continue
+                image_scores.append(all_boxes[j][i][:, -1])
+            if len(image_scores) > 0: image_scores = np.hstack(image_scores)
+            if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
+                image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
+                for j in range(1, num_classes):
+                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
+                    all_boxes[j][i] = all_boxes[j][i][keep, :]
+        _t['misc'].toc()
+        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
+              .format(i + 1, num_images, _t['im_detect'].average_time,
+                      _t['misc'].average_time), end='')
+    print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
+    print('Evaluating detections')
+    server.evaluate_detections(all_boxes)
\ No newline at end of file
--- a/lib/fpn/__init__.py
+++ b/lib/fpn/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from lib.fpn.layers.anchor_target_layer import AnchorTargetLayer
+from lib.fpn.layers.proposal_layer import ProposalLayer
+from lib.fpn.layers.proposal_target_layer import ProposalTargetLayer
\ No newline at end of file
--- a/lib/fpn/layers/__init__.py
+++ b/lib/fpn/layers/__init__.py
+# --------------------------------------------------------
+# Mask R-CNN @ Detectron
+# Copyright (c) 2017 SeetaTech
+# Written by Ting Pan
+# --------------------------------------------------------
\ No newline at end of file
--- a/lib/fpn/layers/anchor_target_layer.py
+++ b/lib/fpn/layers/anchor_target_layer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import numpy.random as npr
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+import lib.utils.logger as logger
+from lib.utils.blob import to_tensor
+from lib.utils.cython_bbox import bbox_overlaps
+from lib.utils.bbox_transform import bbox_transform
+from lib.faster_rcnn.generate_anchors import generate_anchors
+class AnchorTargetLayer(torch.nn.Module):
+    """Assign anchors to ground-truth targets."""
+    def __init__(self):
+        super(AnchorTargetLayer, self).__init__()
+        # Load the basic configs
+        self.scales, self.strides, self.ratios = \
+            cfg.RPN.SCALES, \
+                cfg.RPN.STRIDES, \
+                    cfg.RPN.ASPECT_RATIOS
+        if len(self.scales) != len(self.strides):
+            logger.fatal(
+                'Given {} scales and {} strides.'
+                    .format(len(self.scales), len(self.strides)))
+        # Allow boxes to sit over the edge by a small amount
+        self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
+        # Generate base anchors
+        self.base_anchors = []
+        for i in range(len(self.strides)):
+            base_size = self.strides[i]
+            scale = self.scales[i]
+            if not isinstance(scale, list): scale = [scale]
+            self.base_anchors.append(
+                generate_anchors(
+                    base_size=base_size,
+                    ratios=self.ratios,
+                    scales=np.array(scale),
+                )
+            )
+    def forward(self, features, gt_boxes, ims_info):
+        """Produces anchor classification labels and bounding-box regression targets."""
+        num_images = cfg.TRAIN.IMS_PER_BATCH
+        gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
+        if len(gt_boxes_wide) != num_images:
+            logger.fatal('Input {} images, got {} slices of gt boxes.' \
+                .format(num_images, len(gt_boxes_wide)))
+        # Generate proposals from shifted anchors
+        all_anchors = []; total_anchors = 0
+        for i in range(len(self.strides)):
+            height, width = features[i].shape[-2:]
+            shift_x = np.arange(0, width) * self.strides[i]
+            shift_y = np.arange(0, height) * self.strides[i]
+            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                                shift_x.ravel(), shift_y.ravel())).transpose()
+            # Add A anchors (1, A, 4) to
+            # cell K shifts (K, 1, 4) to get
+            # shift anchors (K, A, 4)
+            # Reshape to (K * A, 4) shifted anchors
+            A = self.base_anchors[i].shape[0]
+            K = shifts.shape[0]
+            anchors = (self.base_anchors[i].reshape((1, A, 4)) +
+                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
+            # [K, A, 4] -> [A, K, 4]
+            anchors = anchors.transpose((1, 0, 2))
+            anchors = anchors.reshape((A * K, 4))
+            all_anchors.append(anchors)
+            total_anchors += anchors.shape[0]
+        all_anchors = np.vstack(all_anchors)
+        # label: 1 is positive, 0 is negative, -1 is don't care
+        labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
+        bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
+        bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
+        bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
+        for ix in range(num_images):
+            # GT boxes (x1, y1, x2, y2, label, has_mask)
+            gt_boxes = gt_boxes_wide[ix]
+            im_info = ims_info[ix]
+            if self._allowed_border >= 0:
+                # Only keep anchors inside the image
+                inds_inside = np.where(
+                    (all_anchors[:, 0] >= -self._allowed_border) &
+                    (all_anchors[:, 1] >= -self._allowed_border) &
+                    (all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width
+                    (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0] # height
+                anchors = all_anchors[inds_inside, :]
+            else:
+                inds_inside = np.arange(all_anchors.shape[0])
+                anchors = all_anchors
+            num_inside = len(inds_inside)
+            # label: 1 is positive, 0 is negative, -1 is don't care
+            labels = np.empty((num_inside,), dtype=np.float32)
+            labels.fill(-1)
+            # Overlaps between the anchors and the gt boxes
+            overlaps = bbox_overlaps(
+                np.ascontiguousarray(anchors, dtype=np.float),
+                np.ascontiguousarray(gt_boxes, dtype=np.float),
+            )
+            argmax_overlaps = overlaps.argmax(axis=1)
+            max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
+            gt_argmax_overlaps = overlaps.argmax(axis=0)
+            gt_max_overlaps = overlaps[gt_argmax_overlaps,
+                                       np.arange(overlaps.shape[1])]
+            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
+            # fg label: for each gt, anchor with highest overlap
+            labels[gt_argmax_overlaps] = 1
+            # fg label: above threshold IOU
+            labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
+            # bg label: below threshold IOU
+            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
+            # Subsample positive labels if we have too many
+            num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
+            fg_inds = np.where(labels == 1)[0]
+            if len(fg_inds) > num_fg:
+                disable_inds = npr.choice(
+                    fg_inds, size=(len(fg_inds) - num_fg), replace=False)
+                labels[disable_inds] = -1
+                fg_inds = np.where(labels == 1)[0]
+            # Subsample negative labels if we have too many
+            num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
+            bg_inds = np.where(labels == 0)[0]
+            if len(bg_inds) > num_bg:
+                disable_inds = npr.choice(
+                    bg_inds, size=(len(bg_inds) - num_bg), replace=False)
+                labels[disable_inds] = -1
+            bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_targets[fg_inds, :] = bbox_transform(
+                anchors[fg_inds, :],
+                gt_boxes[argmax_overlaps[fg_inds], 0:4],
+            )
+            bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
+            bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
+            bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
+            labels_wide[ix, inds_inside] = labels  # label
+            bbox_targets_wide[ix, inds_inside] = bbox_targets
+            bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
+            bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
+        labels = labels_wide.reshape((num_images, total_anchors))
+        bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
+        bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
+        bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
+        return {
+            'labels': to_tensor(labels),
+            'bbox_targets': to_tensor(bbox_targets),
+            'bbox_inside_weights': to_tensor(bbox_inside_weights),
+            'bbox_outside_weights': to_tensor(bbox_outside_weights),
+        }
+def _dismantle_gt_boxes(gt_boxes, num_images):
+    return [
+        gt_boxes[
+            np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
+        ] for ix in range(num_images)
+    ]
\ No newline at end of file
--- a/lib/fpn/layers/proposal_layer.py
+++ b/lib/fpn/layers/proposal_layer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+import numpy as np
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.nms.nms_wrapper import nms
+from lib.utils import logger
+from lib.utils.blob import to_tensor
+from lib.utils.bbox_transform import bbox_transform_inv, clip_boxes
+from lib.faster_rcnn.generate_anchors import generate_anchors
+class ProposalLayer(torch.nn.Module):
+    """Outputs object detection proposals by applying estimated bounding-box.
+    transformations to a set of regular boxes (called "anchors").
+    """
+    def __init__(self):
+        super(ProposalLayer, self).__init__()
+        # Load the basic configs
+        self.scales, self.strides, self.ratios = \
+            cfg.RPN.SCALES, cfg.RPN.STRIDES, cfg.RPN.ASPECT_RATIOS
+        if len(self.scales) != len(self.strides):
+            logger.fatal(
+                'Given {} scales and {} strides.'
+                    .format(len(self.scales), len(self.strides)))
+        # Generate base anchors
+        self.base_anchors = []
+        for i in range(len(self.strides)):
+            base_size = self.strides[i]
+            scale = self.scales[i]
+            if not isinstance(scale, list): scale = [scale]
+            self.base_anchors.append(
+                generate_anchors(
+                    base_size=base_size,
+                    ratios=self.ratios,
+                    scales=np.array(scale),
+                )
+            )
+    def generate_grid_anchors(self, features):
+        # Generate proposals from shifted anchors
+        anchors_wide = []
+        for i in range(len(self.strides)):
+            height, width = features[i].shape[-2:]
+            shift_x = np.arange(0, width) * self.strides[i]
+            shift_y = np.arange(0, height) * self.strides[i]
+            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                                shift_x.ravel(), shift_y.ravel())).transpose()
+            # Add A anchors (1, A, 4) to
+            # cell K shifts (K, 1, 4) to get
+            # shift anchors (K, A, 4)
+            # Reshape to (K * A, 4) shifted anchors
+            A = self.base_anchors[i].shape[0]
+            K = shifts.shape[0]
+            anchors = (self.base_anchors[i].reshape((1, A, 4)) +
+                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
+            # [K, A, 4] -> [A, K, 4]
+            anchors = anchors.transpose((1, 0, 2))
+            anchors = anchors.reshape((A * K, 4))
+            anchors_wide.append(anchors)
+        return np.vstack(anchors_wide)
+    def forward(self, features, cls_prob, bbox_pred, ims_info):
+        cfg_key = 'TRAIN' if self.training else 'TEST'
+        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
+        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
+        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
+        min_size      = cfg[cfg_key].RPN_MIN_SIZE
+        # Get resources
+        num_images = ims_info.shape[0]
+        all_anchors = self.generate_grid_anchors(features) # [n, 4]
+        if cls_prob.shape[0] != num_images or \
+                bbox_pred.shape[0] != num_images:
+            logger.fatal('Incorrect num of images: {}'.format(num_images))
+        # Prepare for the outputs
+        batch_rois = []
+        batch_scores = cls_prob.numpy(True)
+        batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
+        # Extract RoIs separately
+        for ix in range(num_images):
+            scores = batch_scores[ix].reshape((-1, 1))  # [1, n] -> [n, 1]
+            deltas = batch_deltas[ix] # [n, 4]
+            if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
+                order = np.argsort(-scores.squeeze())
+            else:
+                # Avoid sorting possibly large arrays; First partition to get top K
+                # unsorted and then sort just those (~20x faster for 200k scores)
+                inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
+                order = np.argsort(-scores[inds].squeeze())
+                order = inds[order]
+            deltas = deltas[order]
+            anchors = all_anchors[order]
+            scores = scores[order]
+            # 1. Convert anchors into proposals via bbox transformations
+            proposals = bbox_transform_inv(anchors, deltas)
+            # 2. Clip predicted boxes to image
+            proposals = clip_boxes(proposals, ims_info[ix, :2])
+            # 3. remove predicted boxes with either height or width < threshold
+            keep = _filter_boxes(proposals, min_size * ims_info[ix, 2])
+            proposals = proposals[keep, :]
+            scores = scores[keep]
+            # 6. Apply nms (e.g. threshold = 0.7)
+            # 7. Take after_nms_topN (e.g. 300)
+            # 8. Return the top proposals (-> RoIs top)
+            keep = nms(np.hstack((proposals, scores)), nms_thresh)
+            if post_nms_topN > 0: keep = keep[:post_nms_topN]
+            proposals = proposals[keep, :]
+            # Output rois blob
+            batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
+            batch_inds.fill(ix)
+            rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
+            batch_rois.append(rpn_rois)
+        # Merge RoIs into a blob
+        rpn_rois = np.concatenate(batch_rois, axis=0)
+        if cfg_key == 'TRAIN':
+            return rpn_rois
+        else:
+            # Distribute rois into K levels
+            min_level = cfg.FPN.ROI_MIN_LEVEL
+            max_level = cfg.FPN.ROI_MAX_LEVEL
+            K = max_level - min_level + 1
+            fpn_levels = _map_rois_to_fpn_levels(rpn_rois, min_level, max_level)
+            all_rois = []
+            for i in range(K):
+                lv_indices = np.where(fpn_levels == (i + min_level))[0]
+                if len(lv_indices) == 0:
+                    # Fake a tiny roi to avoid empty roi pooling
+                    all_rois.append(to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
+                else:
+                    all_rois.append(to_tensor(rpn_rois[lv_indices]))
+            return all_rois
+def _filter_boxes(boxes, min_size):
+    """Remove all boxes with any side smaller than min_size.
+    """
+    ws = boxes[:, 2] - boxes[:, 0] + 1
+    hs = boxes[:, 3] - boxes[:, 1] + 1
+    keep = np.where((ws >= min_size) & (hs >= min_size))[0]
+    return keep
+def _map_rois_to_fpn_levels(rois, k_min, k_max):
+    """Determine which FPN level each RoI in a set of RoIs should map to based
+    on the heuristic in the FPN paper.
+    """
+    if len(rois) == 0: return []
+    ws = rois[:, 3] - rois[:, 1] + 1
+    hs = rois[:, 4] - rois[:, 2] + 1
+    s = np.sqrt(ws * hs)
+    s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
+    lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4
+    target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
+    return np.clip(target_levels, k_min, k_max)
\ No newline at end of file
--- a/lib/fpn/layers/proposal_target_layer.py
+++ b/lib/fpn/layers/proposal_target_layer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+import numpy as np
+import numpy.random as npr
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.utils.blob import to_tensor
+from lib.utils.cython_bbox import bbox_overlaps
+from lib.utils.bbox_transform import bbox_transform
+class ProposalTargetLayer(torch.nn.Module):
+    """Assign object detection proposals to ground-truth targets.
+    Produces proposal classification labels and bounding-box regression targets.
+    """
+    def __init__(self):
+        super(ProposalTargetLayer, self).__init__()
+        self.num_classes = cfg.MODEL.NUM_CLASSES
+        self.fake_outputs = {
+            'rois': np.array([[0, 0, 0, 1, 1]], dtype=np.float32),
+            'labels': np.array([-1], dtype=np.float32),
+            'bbox_targets': np.zeros((1, self.num_classes * 4), dtype=np.float32),
+            'bbox_inside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
+            'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
+        }
+    def _map_rois(self, inputs, fake_outputs, outputs, keys, levels):
+        f = lambda a, b, indices: a[indices] if len(indices) > 0 else b
+        for k in range(len(levels)):
+            inds = levels[k]
+            for i, key in enumerate(keys):
+                outputs[key].append(f(inputs[i], fake_outputs[key], inds))
+    def forward(self, rpn_rois, gt_boxes):
+        num_images = cfg.TRAIN.IMS_PER_BATCH
+        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
+        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
+        all_rois = rpn_rois
+        # GT boxes (x1, y1, x2, y2, label, has_mask)
+        gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
+        # Prepare for the outputs
+        keys = ['labels', 'rois', 'bbox_targets',
+                'bbox_inside_weights', 'bbox_outside_weights']
+        outputs = dict(map(lambda a, b: (a, b), keys, [[] for key in keys]))
+        batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for key in keys]))
+        # Generate targets separately
+        for ix in range(num_images):
+            gt_boxes = gt_boxes_wide[ix]
+            # Extract proposals for this image
+            rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
+            # Include ground-truth boxes in the set of candidate rois
+            inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
+            rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
+            rois_per_image = cfg.TRAIN.BATCH_SIZE
+            fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
+            # Sample rois with labels & bbox targets
+            labels, rois, bbox_targets, bbox_inside_weights = \
+                _sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
+            bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
+            _fmap_batch([
+                labels,
+                rois,
+                bbox_targets,
+                bbox_inside_weights,
+                bbox_outside_weights],
+                batch_outputs,
+                keys,
+            )
+        # Merge targets into blobs
+        for k, v in batch_outputs.items():
+            batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
+        # Distribute rois into K levels
+        min_level = cfg.FPN.ROI_MIN_LEVEL
+        max_level = cfg.FPN.ROI_MAX_LEVEL
+        K = max_level - min_level + 1
+        fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
+        lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)]
+        _fmap_rois([batch_outputs[key] for key in keys], self.fake_outputs, outputs, keys, lvs_indices)
+        return {
+            'rois': [to_tensor(outputs['rois'][i]) for i in range(K)],
+            'labels': to_tensor(np.concatenate(outputs['labels'], axis=0)),
+            'bbox_targets': to_tensor(np.vstack(outputs['bbox_targets'])),
+            'bbox_inside_weights': to_tensor(np.vstack(outputs['bbox_inside_weights'])),
+            'bbox_outside_weights': to_tensor(np.vstack(outputs['bbox_outside_weights'])),
+        }
+def _get_bbox_regression_labels(bbox_target_data, num_classes):
+    """Bounding-box regression targets (bbox_target_data) are stored in a
+    compact form N x (class, tx, ty, tw, th)
+    This function expands those targets into the 4-of-4*K representation used
+    by the network (i.e. only one class has non-zero targets).
+    Returns:
+        bbox_target (ndarray): N x 4K blob of regression targets
+        bbox_inside_weights (ndarray): N x 4K blob of loss weights
+    """
+    clss = bbox_target_data[:, 0]
+    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
+    bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
+    inds = np.where(clss > 0)[0]
+    for ind in inds:
+        cls = clss[ind]
+        start = 4 * cls
+        end = start + 4
+        bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
+        bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
+    return bbox_targets, bbox_inside_weights
+def _compute_targets(ex_rois, gt_rois, labels):
+    """Compute bounding-box regression targets for an image."""
+    assert ex_rois.shape[0] == gt_rois.shape[0]
+    assert ex_rois.shape[1] == 4
+    assert gt_rois.shape[1] == 4
+    targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
+    return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
+def _map_rois_to_fpn_levels(rois, k_min, k_max):
+    """Determine which FPN level each RoI in a set of RoIs should map to based
+    on the heuristic in the FPN paper.
+    """
+    if len(rois) == 0: return []
+    ws = rois[:, 3] - rois[:, 1] + 1
+    hs = rois[:, 4] - rois[:, 2] + 1
+    s = np.sqrt(ws * hs)
+    s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
+    lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4
+    target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
+    return np.clip(target_levels, k_min, k_max)
+def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
+    """Generate a random sample of RoIs comprising foreground and background
+    examples.
+    """
+    # overlaps: (rois x gt_boxes)
+    overlaps = bbox_overlaps(
+        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
+        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
+    gt_assignment = overlaps.argmax(axis=1)
+    max_overlaps = overlaps.max(axis=1)
+    labels = gt_boxes[gt_assignment, 4]
+    # Select foreground RoIs as those with >= FG_THRESH overlap
+    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
+    # Guard against the case when an image has fewer than fg_rois_per_image
+    # foreground RoIs
+    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
+    # Sample foreground regions without replacement
+    if fg_inds.size > 0:
+        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
+    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
+    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
+                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
+    # Compute number of background RoIs to take from this image (guarding
+    # against there being fewer than desired)
+    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
+    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
+    # Sample background regions without replacement
+    if bg_inds.size > 0:
+        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
+    # The indices that we're selecting (both fg and bg)
+    keep_inds = np.append(fg_inds, bg_inds)
+    # Select sampled values from various arrays:
+    labels = labels[keep_inds]
+    # Clamp labels for the background RoIs to 0
+    labels[fg_rois_per_this_image:] = 0
+    rois = all_rois[keep_inds]
+    bbox_target_data = _compute_targets(
+        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
+    bbox_targets, bbox_inside_weights = \
+        _get_bbox_regression_labels(bbox_target_data, num_classes)
+    return labels, rois, bbox_targets, bbox_inside_weights
+def _dismantle_gt_boxes(gt_boxes, num_images):
+    return [gt_boxes[np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]] \
+            for ix in range(num_images)]
+def _fmap_batch(inputs, outputs, keys):
+    for i, key in enumerate(keys):
+        outputs[key].append(inputs[i])
+def _fmap_rois(inputs, fake_outputs, outputs, keys, levels):
+    f = lambda a, b, indices: a[indices] if len(indices) > 0 else b
+    for k in range(len(levels)):
+        inds = levels[k]
+        for i, key in enumerate(keys):
+            outputs[key].append(f(inputs[i], fake_outputs[key], inds))
\ No newline at end of file
--- a/lib/modeling/__init__.py
+++ b/lib/modeling/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+# Import custom modules
+from lib.modeling.base import Bootstarp
+from lib.modeling.fpn import FPN
+from lib.modeling.rpn import RPN
+from lib.modeling.fast_rcnn import FastRCNN
+from lib.modeling.retinanet import RetinaNet
+from lib.modeling.ssd import SSD
\ No newline at end of file
--- a/lib/modeling/airnet.py
+++ b/lib/modeling/airnet.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import dragon.vm.torch as torch
+from lib.modeling.base import conv1x1, conv3x3, bn
+class WideResBlock(torch.nn.Module):
+    def __init__(self, dim_in, dim_out, stride=1, downsample=None):
+        super(WideResBlock, self).__init__()
+        self.conv1 = conv3x3(dim_in, dim_out, stride)
+        self.bn1 = bn(dim_out, eps=1e-3)
+        self.conv2 = conv3x3(dim_out, dim_out)
+        self.bn2 = bn(dim_out, eps=1e-3)
+        self.downsample = downsample
+        self.relu = torch.nn.ReLU(inplace=True)
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(residual)
+        out += residual
+        out = self.relu(out)
+        return out
+class InceptionBlock(torch.nn.Module):
+    def __init__(self, dim_in, dim_out):
+        super(InceptionBlock, self).__init__()
+        self.conv1 = conv1x1(dim_in, dim_out)
+        self.bn1 = bn(dim_out, eps=1e-3)
+        self.conv2 = conv3x3(dim_out, dim_out // 2)
+        self.bn2 = bn(dim_out // 2, eps=1e-3)
+        self.conv3a = conv3x3(dim_out // 2, dim_out)
+        self.bn3a = bn(dim_out, eps=1e-3)
+        self.conv3b = conv3x3(dim_out, dim_out)
+        self.bn3b = bn(dim_out, eps=1e-3)
+        self.conv4 = conv3x3(dim_out * 3, dim_out)
+        self.bn4 = bn(dim_out, eps=1e-3)
+        self.relu = torch.nn.ReLU(inplace=True)
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out_1x1 = self.bn1(out)
+        out_1x1 = self.relu(out_1x1)
+        out = self.conv2(out_1x1)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3a(out)
+        out_3x3_a = self.bn3a(out)
+        out_3x3_a = self.relu(out_3x3_a)
+        out = self.conv3b(out_1x1)
+        out_3x3_b = self.bn3b(out)
+        out_3x3_b = self.relu(out_3x3_b)
+        out = torch.cat([out_1x1, out_3x3_a, out_3x3_b], dim=1)
+        out = self.conv4(out)
+        out = self.bn4(out)
+        out += residual
+        out = self.relu(out)
+        return out
+class AirNet(torch.nn.Module):
+    def __init__(self, blocks, num_stages):
+        super(AirNet, self).__init__()
+        self.dim_in, filters = 64, [64, 128, 256, 384]
+        self.feature_dims = filters[1:num_stages - 1]
+        self.conv1 = torch.nn.Conv2d(
+            3, 64,
+            kernel_size=7,
+            stride=2,
+            padding=3,
+            bias=False,
+        )
+        self.bn1 = bn(self.dim_in, eps=1e-3)
+        self.relu = torch.nn.ReLU(inplace=True)
+        self.maxpool = torch.nn.MaxPool2d(
+            kernel_size=2,
+            stride=2,
+            padding=0,
+            ceil_mode=True,
+        )
+        self.layer1 = self.make_blocks(filters[0], blocks[0])
+        self.layer2 = self.make_blocks(filters[1], blocks[1], 2)
+        if num_stages >= 4: self.layer3 = self.make_blocks(filters[2], blocks[2], 2)
+        if num_stages >= 5: self.layer4 = self.make_blocks(filters[3], blocks[3], 2)
+        self.reset_parameters()
+    def reset_parameters(self):
+        # The Kaiming Initialization
+        for m in self.modules():
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.init.kaiming_uniform_(
+                    m.weight,
+                    # Fix the gain for [-127, 127]
+                    a=1,
+                ) # Xavier Initialization
+    def make_blocks(self, dim_out, blocks, stride=1):
+        downsample = torch.nn.Sequential(
+            conv1x1(self.dim_in, dim_out, stride=stride),
+            bn(dim_out, eps=1e-3),
+        )
+        layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)]
+        self.dim_in = dim_out
+        for i in range(1, len(blocks)):
+            if blocks[i] == 'r':
+                layers.append(WideResBlock(dim_out, dim_out))
+            elif blocks[i] == 'i':
+                layers.append(InceptionBlock(dim_out, dim_out))
+            else:
+                raise ValueError('Unknown block flag: ' + blocks[i])
+        return torch.nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        outputs = [self.layer2(x)]
+        if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])]
+        if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])]
+        return outputs
+def airnet(num_stages):
+    blocks = (
+        ('r', 'r'), # conv2
+        ('r', 'i'), # conv3
+        ('r', 'i'), # conv4
+        ('r', 'i'), # conv5
+    )
+    return AirNet(blocks, num_stages)
+def make_airnet_3b(): return airnet(3)
+def make_airnet_4b(): return airnet(4)
+def make_airnet_5b(): return airnet(5)
\ No newline at end of file
--- a/lib/modeling/base.py
+++ b/lib/modeling/base.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+"""Define some basic structures."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.utils.blob import to_tensor
+class Bootstarp(torch.nn.Module):
+    """Extended operator to process the images."""
+    def __init__(self):
+        super(Bootstarp, self).__init__()
+        self.dtype = cfg.MODEL.DATA_TYPE.lower()
+        self.register_op()
+    def register_op(self):
+        self.op_meta = {
+            'op_type': 'ImageData',
+            'arguments': {
+                'dtype': self.dtype,
+                'data_format': 'NCHW',
+                'mean_values': [102.9801, 115.9465, 122.7717],
+            }
+        }
+    def forward(self, x):
+        inputs, outputs = [x], [self.register_output()]
+        return self.run(inputs, outputs)
+class ProposalCXX(torch.nn.Module):
+    """Extended operator to generate proposal regions."""
+    def __init__(self):
+        super(ProposalCXX, self).__init__()
+        self.register_op()
+        self.K = (cfg.FPN.ROI_MAX_LEVEL -
+                        cfg.FPN.ROI_MIN_LEVEL + 1) \
+            if len(cfg.RPN.STRIDES) > 1 else 1
+    def register_op(self):
+        self.op_meta = {
+            'op_type': 'Proposal',
+            'arguments': {
+                'strides': cfg.RPN.STRIDES,
+                'ratios': [float(e) for e in cfg.RPN.ASPECT_RATIOS],
+                'scales': [float(e) for e in cfg.RPN.SCALES],
+                'pre_nms_top_n': cfg.TEST.RPN_PRE_NMS_TOP_N,
+                'post_nms_top_n': cfg.TEST.RPN_POST_NMS_TOP_N,
+                'nms_thresh': cfg.TEST.RPN_NMS_THRESH,
+                'min_size': cfg.TEST.RPN_MIN_SIZE,
+                'min_leve': cfg.FPN.ROI_MIN_LEVEL,
+                'max_level': cfg.FPN.ROI_MAX_LEVEL,
+                'canonical_scale': cfg.FPN.ROI_CANONICAL_SCALE,
+                'canonical_level': cfg.FPN.ROI_CANONICAL_LEVEL,
+            }
+        }
+    def forward(self, features, cls_prob, bbox_pred, ims_info):
+        inputs = features + [cls_prob, bbox_pred, to_tensor(ims_info)]
+        outputs = [self.register_output() for _ in range(self.K)]
+        outputs = self.run(inputs, outputs)
+        return outputs if isinstance(outputs, list) else [outputs]
+def conv1x1(dim_in, dim_out, stride=1, bias=False):
+    """1x1 convolution."""
+    return torch.nn.Conv2d(
+        dim_in,
+        dim_out,
+        kernel_size=1,
+        stride=stride,
+        bias=bias,
+    )
+def conv3x3(dim_in, dim_out, stride=1, bias=False):
+    """3x3 convolution with padding."""
+    return torch.nn.Conv2d(
+        dim_in,
+        dim_out,
+        kernel_size=3,
+        stride=stride,
+        padding=1,
+        bias=bias,
+    )
+def bn(dim_in, eps=1e-5):
+    """The BatchNorm."""
+    return torch.nn.BatchNorm2d(dim_in, eps=eps)
+def affine(dim_in, inplace=True):
+    """AffineBN, weight and bias are fixed."""
+    return torch.nn.Affine(
+        dim_in,
+        fix_weight=True,
+        fix_bias=True,
+        inplace=inplace,
+    )
\ No newline at end of file
--- a/lib/modeling/detector.py
+++ b/lib/modeling/detector.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import importlib
+import dragon.vm.torch as torch
+from collections import OrderedDict
+from lib.core.config import cfg
+from lib.utils.logger import is_root
+from lib.modeling.factory import get_body_func
+from lib.modeling import (
+    Bootstarp, FPN, RPN,
+    FastRCNN,
+    RetinaNet, SSD,
+)
+class Detector(torch.nn.Module):
+    """The "Detector" organizes the detection pipelines.
+    A bunch of classic algorithms are integrated, see the
+    ``lib.core.config`` for their hyper-parameters.
+    """
+    def __init__(self):
+        super(Detector, self).__init__()
+        model = cfg.MODEL.TYPE
+        backbone = cfg.MODEL.BACKBONE.lower().split('.')
+        body, modules = backbone[0], backbone[1:]
+        # + Data Loader
+        self.data_layer = importlib.import_module(
+            'lib.{}'.format(model)).DataLayer
+        self.bootstarp = Bootstarp()
+        # + Feature Extractor
+        self.body = get_body_func(body)()
+        feature_dims = self.body.feature_dims
+        # + Feature Enhancer
+        if 'fpn' in modules:
+            self.fpn = FPN(feature_dims)
+            feature_dims = self.fpn.feature_dims
+        elif 'mbox' in modules:
+            pass # Placeholder
+        else:
+            feature_dims = [feature_dims[-1]]
+        # + Detection Modules
+        if 'rcnn' in model:
+            self.rpn = RPN(feature_dims[0])
+            self.fast_rcnn = FastRCNN(feature_dims[0])
+        if 'retinanet' in model:
+            self.retinanet = RetinaNet(feature_dims[0])
+        if 'ssd' in model:
+            self.ssd = SSD(feature_dims)
+    def load_weights(self, weights):
+        """Load the state dict of this detector.
+        Note that the mismatched keys will be ignored.
+        Parameters
+        ----------
+        weights : str
+            The path of the weights file.
+        """
+        self.load_state_dict(torch.load(weights),
+            strict=False, verbose=is_root())
+    def forward(self, inputs=None):
+        """Compute the detection outputs.
+        Parameters
+        ----------
+        inputs : dict or None
+            The inputs.
+        """
+        # 0. Get the inputs
+        if inputs is None:
+            # 1) Training: <= DataLayer
+            # 2) Inference: <= Given
+            if not hasattr(self, 'data_loader'):
+                self.data_loader = self.data_layer()
+            inputs = self.data_loader()
+        # 1. Extract features
+        # Process the data:
+        #   1) NHWC => NCHW
+        #   2) Uint8 => Float32 or Float16
+        #   3) Mean subtraction
+        processed_data = self.bootstarp(inputs['data'])
+        features = self.body(processed_data)
+        # 2. Apply the FPN to enhance features if necessary
+        if hasattr(self, 'fpn'):
+            features = self.fpn(features)
+        # 3. Collect detection outputs
+        outputs = OrderedDict()
+        # 3.1 Feature -> RPN -> Fast R-CNN
+        if hasattr(self, 'rpn'):
+            outputs.update(
+                self.rpn(
+                    features=features,
+                    **inputs,
+                )
+            )
+            outputs.update(
+                self.fast_rcnn(
+                    features=features,
+                    rpn_cls_score=outputs['rpn_cls_score'],
+                    rpn_bbox_pred=outputs['rpn_bbox_pred'],
+                    **inputs,
+                )
+            )
+        # 3.2 Feature -> RetinaNet
+        if hasattr(self, 'retinanet'):
+            outputs.update(
+                self.retinanet(
+                    features=features,
+                    **inputs,
+                )
+            )
+        # 3.3 Feature -> SSD
+        if hasattr(self, 'ssd'):
+            outputs.update(
+                self.ssd(
+                    features=features,
+                    **inputs,
+                )
+            )
+        return outputs
+    def optimize_for_inference(self):
+        """Optimize the graph for the inference.
+        It usually involves the removing of BN or Affine.
+        """
+        ##################################
+        #  Merge Affine into Convolution #
+        ##################################
+        last_module = None
+        for e in self.modules():
+            if isinstance(e, torch.nn.Affine) and \
+                    isinstance(last_module, torch.nn.Conv2d):
+                if last_module.bias is None:
+                    delattr(last_module, 'bias')
+                    e.forward = lambda x: x
+                    last_module.bias = e.bias
+                    last_module.weight.data.mul_(e.weight.data)
+            last_module = e
+        ######################################
+        #  Merge BatchNorm into Convolution  #
+        ######################################
+        last_module = None
+        for e in self.modules():
+            if isinstance(e, torch.nn.BatchNorm2d) and \
+                    isinstance(last_module, torch.nn.Conv2d):
+                if last_module.bias is None:
+                    delattr(last_module, 'bias')
+                    e.forward = lambda x: x
+                    term = torch.sqrt(e.running_var.data + e.eps)
+                    term = e.weight.data / term
+                    last_module.bias = e.bias.data - term * e.running_mean.data
+                    last_module.weight.data.mul_(term)
+            last_module = e
\ No newline at end of file
--- a/lib/modeling/factory.py
+++ b/lib/modeling/factory.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import importlib
+from collections import defaultdict
+_STORE = defaultdict(dict)
+def get_template_func(name, sets, desc):
+    name = name.lower()
+    if name not in sets:
+        raise ValueError(
+            'The {} for {} was not registered.\n'
+            'Registered modules: [{}]'.format(
+                name, desc, ', '.join(sets.keys())))
+    module_name = '.'.join(sets[name].split('.')[0:-1])
+    func_name = sets[name].split('.')[-1]
+    try:
+        module = importlib.import_module(module_name)
+        return getattr(module, func_name)
+    except ImportError as e:
+        raise ValueError('Can not import module from: ' + module_name)
+###########################################
+#                                         #
+#                 Body                    #
+#                                         #
+###########################################
+# ResNet
+for D in [18, 34, 50, 101, 152, 200, 269]:
+    _STORE['BODY']['resnet{}'.format(D)] = \
+        'lib.modeling.resnet.make_resnet_{}'.format(D)
+# VGG
+for D in [16, 19]:
+    for T in ['', '_reduced_300', '_reduced_512']:
+        _STORE['BODY']['vgg{}{}'.format(D, T)] = \
+            'lib.modeling.vgg.make_vgg_{}{}'.format(D, T)
+# AirNet
+for D in ['3b', '4b', '5b']:
+    _STORE['BODY']['airnet{}'.format(D)] = \
+        'lib.modeling.airnet.make_airnet_{}'.format(D)
+def get_body_func(name):
+    return get_template_func(
+        name, _STORE['BODY'], 'Body')
\ No newline at end of file
--- a/lib/modeling/fast_rcnn.py
+++ b/lib/modeling/fast_rcnn.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import dragon.vm.torch as torch
+from collections import OrderedDict
+from lib.core.config import cfg
+from lib.modeling.base import ProposalCXX
+class FastRCNN(torch.nn.Module):
+    """Generate proposal regions for R-CNN series.
+    The pipeline is as follows:
+    ... ->   RoIs   \                          /-> cls_score -> cls_loss
+                     -> RoIFeatureXform -> MLP
+    ... -> Features /                          \-> bbox_pred -> bbox_loss
+    """
+    def __init__(self, dim_in=256):
+        super(FastRCNN, self).__init__()
+        if len(cfg.RPN.STRIDES) > 1:
+            # RPN with multiple strides(i.e. FPN)
+            from lib.fpn import ProposalLayer, ProposalTargetLayer
+        else:
+            # RPN with single stride(i.e. C4)
+            from lib.faster_rcnn import ProposalLayer, ProposalTargetLayer
+        self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
+        self.fc6 = torch.nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
+        self.fc7 = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
+        self.cls_score = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
+        self.bbox_pred = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
+        self.proposal_cxx = ProposalCXX()
+        self.proposal_layer = ProposalLayer()
+        self.proposal_target_layer = ProposalTargetLayer()
+        self.softmax = torch.nn.Softmax(dim=1)
+        self.relu = torch.nn.ReLU(inplace=True)
+        self.sigmoid = torch.nn.Sigmoid(inplace=False)
+        self.roi_func = {
+            'RoIPool': torch.roi_pool,
+            'RoIAlign': torch.roi_align,
+        }[cfg.FRCNN.ROI_XFORM_METHOD]
+        self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
+        self.bbox_loss = torch.nn.SmoothL1Loss(beta=1.)
+        # Compute spatial scales for multiple strides
+        roi_levels = [level for level in range(
+            cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)]
+        self.spatial_scales = [1.0 / (2 ** level) for level in roi_levels]
+        self.reset_parameters()
+    def reset_parameters(self):
+        # Careful initialization for Fast R-CNN
+        torch.nn.init.normal_(self.cls_score.weight, std=0.01)
+        torch.nn.init.normal_(self.bbox_pred.weight, std=0.001)
+        for name, p in self.named_parameters():
+            if 'bias' in name: torch.nn.init.constant_(p, 0)
+    def RoIFeatureTransform(self, feature, rois, spatial_scale):
+        return self.roi_func(
+            feature, rois,
+            pooled_h=cfg.FRCNN.ROI_XFORM_RESOLUTION,
+            pooled_w=cfg.FRCNN.ROI_XFORM_RESOLUTION,
+            spatial_scale=spatial_scale,
+        )
+    def forward(self, **kwargs):
+        # Generate Proposals
+        # Apply the CXX implementation during inference
+        proposal_func = self.proposal_layer \
+            if self.training else self.proposal_cxx
+        self.rcnn_data = {
+            'rois': proposal_func(
+                kwargs['features'],
+                self.sigmoid(kwargs['rpn_cls_score'].data),
+                kwargs['rpn_bbox_pred'],
+                kwargs['ims_info'],
+            )
+        }
+        # Generate Targets from Proposals
+        if self.training:
+            self.rcnn_data.update(
+                self.proposal_target_layer(
+                    rpn_rois=self.rcnn_data['rois'],
+                    gt_boxes=kwargs['gt_boxes'],
+                )
+            )
+        # Transform RoI Feature
+        roi_features = []
+        if len(self.rcnn_data['rois']) > 1:
+            for i, spatial_scale in enumerate(self.spatial_scales):
+                roi_features.append(
+                    self.RoIFeatureTransform(
+                        kwargs['features'][i],
+                        self.rcnn_data['rois'][i],
+                        spatial_scale,
+                    )
+                )
+            roi_features = torch.cat(roi_features, dim=0)
+        else:
+            spatial_scale = 1.0 / cfg.RPN.STRIDES[0]
+            roi_features = \
+                self.RoIFeatureTransform(
+                    kwargs['features'][0],
+                    self.rcnn_data['rois'][0],
+                    spatial_scale,
+                )
+        # Apply a simple MLP
+        roi_features = roi_features.view(-1, self.roi_head_dim)
+        rcnn_output = self.relu(self.fc6(roi_features))
+        rcnn_output = self.relu(self.fc7(rcnn_output))
+        # Compute rcnn logits
+        cls_score = self.cls_score(rcnn_output).float()
+        outputs = OrderedDict({
+            'bbox_pred':
+                self.bbox_pred(rcnn_output).float(),
+        })
+        if self.training:
+            # Compute rcnn losses
+            outputs.update(OrderedDict({
+                'cls_loss': self.cls_loss(
+                    cls_score,
+                    self.rcnn_data['labels'],
+                ),
+                'bbox_loss': self.bbox_loss(
+                    outputs['bbox_pred'],
+                    self.rcnn_data['bbox_targets'],
+                    self.rcnn_data['bbox_inside_weights'],
+                    self.rcnn_data['bbox_outside_weights'],
+                ),
+            }))
+        else:
+            # Return the rois to decode the refine boxes
+            if len(self.rcnn_data['rois']) > 1:
+                outputs['rois'] = torch.cat(
+                    self.rcnn_data['rois'], dim=0)
+            else:
+                outputs['rois'] = self.rcnn_data['rois'][0]
+            # Return the classification prob
+            outputs['cls_prob'] = self.softmax(cls_score)
+        return outputs
--- a/lib/modeling/fpn.py
+++ b/lib/modeling/fpn.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.modeling.base import conv1x1, conv3x3
+HIGHEST_BACKBONE_LVL = 5  # E.g., "conv5"-like level
+class FPN(torch.nn.Module):
+    """Feature Pyramid Networks for R-CNN and RetinaNet."""
+    def __init__(self, feature_dims):
+        super(FPN, self).__init__()
+        self.C = torch.nn.ModuleList()
+        self.P = torch.nn.ModuleList()
+        for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1):
+            self.C.append(conv1x1(feature_dims[lvl - 1], 256, bias=True))
+            self.P.append(conv3x3(256, 256, bias=True))
+        if 'retinanet' in cfg.MODEL.TYPE:
+            for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
+                dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else 256
+                self.P.append(conv3x3(dim_in, 256, stride=2, bias=True))
+        self.relu = torch.nn.ReLU(inplace=False)
+        self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True)
+        self.reset_parameters()
+        self.feature_dims = [256]
+    def reset_parameters(self):
+        for m in self.modules():
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.init.kaiming_uniform_(
+                    m.weight,
+                    # Fix the gain for [-127, 127]
+                    a=1,
+                ) # Xavier Initialization
+                torch.nn.init.constant_(m.bias, 0)
+    def apply_with_rcnn(self, features):
+        fpn_input = self.C[-1](features[-1])
+        min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
+        outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)]
+        # Apply MaxPool for higher features
+        for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
+            outputs.append(self.maxpool(outputs[-1]))
+        # Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
+        for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
+            lateral_output = self.C[i - min_lvl](features[i - 1])
+            upscale_output = torch.nn_resize(
+                fpn_input, dsize=lateral_output.shape[-2:])
+            fpn_input = lateral_output.__iadd__(upscale_output)
+            outputs.insert(0, self.P[i - min_lvl](fpn_input))
+        return outputs
+    def apply_with_retinanet(self, features):
+        fpn_input = self.C[-1](features[-1])
+        min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
+        outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)]
+        # Add extra convolutions for higher features
+        extra_input = features[-1]
+        for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
+            outputs.append(self.P[i - min_lvl](extra_input))
+            if i != max_lvl: extra_input = self.relu(outputs[-1])
+        # Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
+        for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
+            lateral_output = self.C[i - min_lvl](features[i - 1])
+            upscale_output = torch.nn_resize(
+                fpn_input, dsize=lateral_output.shape[-2:])
+            fpn_input = lateral_output.__iadd__(upscale_output)
+            outputs.insert(0, self.P[i - min_lvl](fpn_input))
+        return outputs
+    def forward(self, features):
+        if 'rcnn' in cfg.MODEL.TYPE:
+            return self.apply_with_rcnn(features)
+        elif 'retinanet' in cfg.MODEL.TYPE:
+            return self.apply_with_retinanet(features)
+        else:
+            raise NotImplementedError()
\ No newline at end of file
--- a/lib/modeling/resnet.py
+++ b/lib/modeling/resnet.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.modeling.base import conv1x1, conv3x3, affine
+class BasicBlock(torch.nn.Module):
+    expansion = 1
+    def __init__(self, dim_in, dim_out, stride=1,
+                 downsample=None, dropblock=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(dim_in, dim_out, stride)
+        self.bn1 = affine(dim_out)
+        self.relu = torch.nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(dim_out, dim_out)
+        self.bn2 = affine(dim_out)
+        self.downsample = downsample
+        self.dropblock = dropblock
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        if self.dropblock is not None:
+            out = self.dropblock(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.dropblock is not None:
+            residual = self.dropblock(residual)
+        if self.downsample is not None:
+            residual = self.downsample(residual)
+        out += residual
+        out = self.relu(out)
+        return out
+class Bottleneck(torch.nn.Module):
+    # 1x64d => 0.25 (ResNet)
+    # 32x8d, 64x4d => 1.0 (ResNeXt)
+    contraction = cfg.RESNET.NUM_GROUPS \
+        * cfg.RESNET.GROUP_WIDTH / 256.0
+    def __init__(self, dim_in, dim_out, stride=1,
+                 downsample=None, dropblock=None):
+        super(Bottleneck, self).__init__()
+        dim = int(dim_out * self.contraction)
+        self.conv1 = conv1x1(dim_in, dim)
+        self.bn1 = affine(dim)
+        self.conv2 = conv3x3(dim, dim, stride=stride)
+        self.bn2 = affine(dim)
+        self.conv3 = conv1x1(dim, dim_out)
+        self.bn3 = affine(dim_out)
+        self.relu = torch.nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.dropblock = dropblock
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        if self.dropblock is not None:
+            out = self.dropblock(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.dropblock is not None:
+            residual = self.dropblock(residual)
+        if self.downsample is not None:
+            residual = self.downsample(residual)
+        out += residual
+        out = self.relu(out)
+        return out
+class ResNet(torch.nn.Module):
+    def __init__(self, block, layers):
+        super(ResNet, self).__init__()
+        self.dim_in, filters = 64, [256, 512, 1024, 2048]
+        self.feature_dims = [self.dim_in] + filters
+        self.conv1 = torch.nn.Conv2d(
+            3, 64,
+            kernel_size=7,
+            stride=2,
+            padding=3,
+            bias=False,
+        )
+        self.bn1 = affine(self.dim_in)
+        self.relu = torch.nn.ReLU(inplace=True)
+        self.maxpool = torch.nn.MaxPool2d(
+            kernel_size=3,
+            stride=2,
+            padding=0,
+            ceil_mode=True,
+        )
+        self.drop3 = torch.nn.DropBlock2d(
+            7, 0.9, alpha=0.25, decrement=cfg.DROPBLOCK.DECREMENT) \
+                if cfg.DROPBLOCK.DROP_ON else None
+        self.drop4 = torch.nn.DropBlock2d(
+            7, 0.9, alpha=1., decrement=cfg.DROPBLOCK.DECREMENT) \
+                if cfg.DROPBLOCK.DROP_ON else None
+        self.layer1 = self.make_blocks(block, filters[0], layers[0])
+        self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
+        self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3)
+        self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, self.drop4)
+        self.reset_parameters()
+    def reset_parameters(self):
+        # The Kaiming Initialization
+        for m in self.modules():
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.init.kaiming_normal_(
+                    m.weight,
+                    nonlinearity='relu')
+        # Stop the gradients if necessary
+        def freeze_func(m):
+            if isinstance(m, torch.nn.Conv2d):
+                m.weight.requires_grad = False
+                m._buffers['weight'] = m.weight
+                del m._parameters['weight']
+        if cfg.MODEL.FREEZE_AT > 0:
+            self.conv1.apply(freeze_func)
+        for i in range(cfg.MODEL.FREEZE_AT, 1, -1):
+            getattr(self, 'layer{}'.format(i - 1)).apply(freeze_func)
+    def make_blocks(self, block, dim_out, blocks, stride=1, dropblock=None):
+        downsample = None
+        if stride != 1 or self.dim_in != dim_out:
+            downsample = torch.nn.Sequential(
+                conv1x1(self.dim_in, dim_out, stride=stride),
+                affine(dim_out),
+            )
+        layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)]
+        self.dim_in = dim_out
+        for i in range(1, blocks):
+            layers.append(block(dim_out, dim_out, dropblock=dropblock))
+        return torch.nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        outputs = [x]
+        outputs += [self.layer1(outputs[-1])]
+        outputs += [self.layer2(outputs[-1])]
+        outputs += [self.layer3(outputs[-1])]
+        outputs += [self.layer4(outputs[-1])]
+        return outputs
+def resnet(depth):
+    if depth == 18: units = [2, 2, 2, 2]
+    elif depth == 34: units = [3, 4, 6, 3]
+    elif depth == 50: units = [3, 4, 6, 3]
+    elif depth == 101: units = [3, 4, 23, 3]
+    elif depth == 152: units = [3, 8, 36, 3]
+    elif depth == 200: units = [3, 24, 36, 3]
+    elif depth == 269: units = [3, 30, 48, 8]
+    else: raise ValueError('Unsupported depth: %d' % depth)
+    block = Bottleneck if depth >= 50 else BasicBlock
+    return ResNet(block, units)
+def make_resnet_50(): return resnet(50)
+def make_resnet_101(): return resnet(101)
+def make_resnet_152(): return resnet(152)
\ No newline at end of file
--- a/lib/modeling/retinanet.py
+++ b/lib/modeling/retinanet.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import math
+import dragon.vm.torch as torch
+from collections import OrderedDict
+from lib.core.config import cfg
+from lib.modeling.base import conv3x3
+from lib.retinanet import AnchorTargetLayer, ProposalLayer
+class RetinaNet(torch.nn.Module):
+    def __init__(self, dim_in=256):
+        super(RetinaNet, self).__init__()
+        ########################################
+        #           RetinaNet outputs          #
+        ########################################
+        self.cls_conv = torch.nn.ModuleList(
+            conv3x3(dim_in, dim_in, bias=True)
+                for _ in range(cfg.RETINANET.NUM_CONVS))
+        self.bbox_conv = torch.nn.ModuleList(
+            conv3x3(dim_in, dim_in, bias=True)
+                for _ in range(cfg.RETINANET.NUM_CONVS))
+        # Packed as [C, A] not [A, C]
+        self.C = cfg.MODEL.NUM_CLASSES \
+            if cfg.RETINANET.SOFTMAX \
+                else cfg.MODEL.NUM_CLASSES - 1
+        A = len(cfg.RETINANET.ASPECT_RATIOS) * \
+                cfg.RETINANET.SCALES_PER_OCTAVE
+        self.cls_score = conv3x3(dim_in, self.C * A, bias=True)
+        self.bbox_pred = conv3x3(dim_in, 4 * A, bias=True)
+        self.cls_prob = torch.nn.Softmax(dim=1, inplace=True) \
+            if cfg.RETINANET.SOFTMAX else torch.nn.Sigmoid(inplace=True)
+        self.relu = torch.nn.ReLU(inplace=True)
+        self.proposal_layer = ProposalLayer()
+        ########################################
+        #           RetinaNet losses           #
+        ########################################
+        self.anchor_target_layer = AnchorTargetLayer()
+        if cfg.RETINANET.SOFTMAX:
+            self.cls_loss = torch.nn.SoftmaxFocalLoss(
+                ignore_index=-1,
+                alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
+                gamma=cfg.MODEL.FOCAL_LOSS_GAMMA)
+        else:
+            self.cls_loss = torch.nn.SigmoidFocalLoss(
+                alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
+                gamma=cfg.MODEL.FOCAL_LOSS_GAMMA)
+        self.bbox_loss = torch.nn.SmoothL1Loss(beta=1. / 9.)
+        self.reset_parameters()
+    def reset_parameters(self):
+        # Initialization following the RPN
+        # Weight ~ Normal(0, 0.01)
+        for m in self.modules():
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.init.normal_(m.weight, std=0.01)
+                torch.nn.init.constant_(m.bias, 0)
+        # Bias prior initialization for Focal Loss
+        # For details, See the official codes:
+        # https://github.com/facebookresearch/Detectron
+        if cfg.RETINANET.SOFTMAX:
+            bias = self.cls_score.bias.numpy()
+            bias = bias.reshape((cfg.MODEL.NUM_CLASSES, -1))
+            bias[0, :] = math.log(
+                (cfg.MODEL.NUM_CLASSES - 1) *
+                    (1 - cfg.PRIOR_PROB) / cfg.PRIOR_PROB)
+        else:
+            self.cls_score.bias.fill_(
+                -math.log((1 - cfg.PRIOR_PROB) / cfg.PRIOR_PROB))
+    def compute_outputs(self, features):
+        """Compute the RetinaNet logits.
+        Parameters
+        ----------
+        features : sequence of dragon.vm.torch.Tensor
+            The features of specific conv layers.
+        """
+        # Compute logits
+        cls_score_wide, bbox_pred_wide = [], []
+        for j, feature in enumerate(features):
+            cls_x, bbox_x = feature, feature
+            for i in range(cfg.RETINANET.NUM_CONVS):
+                cls_x = self.relu(self.cls_conv[i](cls_x))
+                bbox_x = self.relu(self.bbox_conv[i](bbox_x))
+            cls_score_wide.append(self.cls_score(cls_x).view(0, self.C, - 1))
+            bbox_pred_wide.append(self.bbox_pred(bbox_x).view(0, 4, -1))
+        if len(features) > 1:
+            # Concat them if necessary
+            return torch.cat(cls_score_wide, dim=2), \
+                   torch.cat(bbox_pred_wide, dim=2)
+        else:
+            return cls_score_wide[0], bbox_pred_wide[0]
+    def compute_losses(
+        self, features,
+            cls_score, bbox_pred,
+                gt_boxes, ims_info,
+    ):
+        """Compute the RetinaNet classification loss and regression loss.
+        Parameters
+        ----------
+        features : sequence of dragon.vm.torch.Tensor
+            The features of specific conv layers.
+        cls_score : dragon.vm.torch.Tensor
+            The classification logits.
+        bbox_pred : dragon.vm.torch.Tensor
+            The bbox regression logits.
+        gt_boxes : numpy.ndarray
+            The packed ground-truth boxes.
+        ims_info : numpy.ndarray
+            The information of input images.
+        """
+        self.retinanet_data = \
+            self.anchor_target_layer(
+                features=features,
+                gt_boxes=gt_boxes,
+                ims_info=ims_info,
+            )
+        return OrderedDict({
+            'cls_loss':
+                self.cls_loss(
+                    cls_score,
+                    self.retinanet_data['labels'],
+                ),
+            'bbox_loss':
+                self.bbox_loss(
+                    bbox_pred,
+                    self.retinanet_data['bbox_targets'],
+                    self.retinanet_data['bbox_inside_weights'],
+                    self.retinanet_data['bbox_outside_weights'],
+                )
+        })
+    def forward(self, *args, **kwargs):
+        cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
+        cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
+        outputs = OrderedDict({'bbox_pred': bbox_pred})
+        if self.training:
+            outputs.update(
+                self.compute_losses(
+                    kwargs['features'],
+                    cls_score,
+                    bbox_pred,
+                    kwargs['gt_boxes'],
+                    kwargs['ims_info'],
+                )
+            )
+        else:
+            outputs['detections'] = \
+                self.proposal_layer(
+                    kwargs['features'],
+                    self.cls_prob(cls_score),
+                    bbox_pred,
+                    kwargs['ims_info'],
+                )
+        return outputs
\ No newline at end of file
--- a/lib/modeling/rpn.py
+++ b/lib/modeling/rpn.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import dragon.vm.torch as torch
+from collections import OrderedDict
+from lib.core.config import cfg
+from lib.modeling.base import conv1x1, conv3x3
+class RPN(torch.nn.Module):
+    """Region Proposal Networks for R-CNN series."""
+    def __init__(self, dim_in=256):
+        super(RPN, self).__init__()
+        ##################################
+        #           RPN outputs          #
+        ##################################
+        num_anchors = len(cfg.RPN.ASPECT_RATIOS) * (
+            len(cfg.RPN.SCALES) if len(cfg.RPN.STRIDES) == 1 else 1)
+        self.output = conv3x3(dim_in, dim_in, bias=True)
+        self.cls_score = conv1x1(dim_in, num_anchors, bias=True)
+        self.bbox_pred = conv1x1(dim_in, num_anchors * 4, bias=True)
+        self.relu = torch.nn.ReLU(inplace=True)
+        ##################################
+        #            RPN losses          #
+        ##################################
+        if len(cfg.RPN.STRIDES) > 1:
+            # RPN with multiple strides(i.e. FPN)
+            from lib.fpn.layers.anchor_target_layer import AnchorTargetLayer
+        else:
+            # RPN with single stride(i.e. C4)
+            from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer
+        self.anchor_target_layer = AnchorTargetLayer()
+        self.cls_loss = torch.nn.BCEWithLogitsLoss()
+        self.bbox_loss = torch.nn.SmoothL1Loss(beta=1. / 9.)
+        self.reset_parameters()
+    def reset_parameters(self):
+        # Initialization for the RPN
+        # Weight ~ Normal(0, 0.01)
+        for m in self.modules():
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.init.normal_(m.weight, std=0.01)
+                torch.nn.init.constant_(m.bias, 0)
+        if cfg.MODEL.DATA_TYPE.lower() == 'float16':
+            # Zero the weights of linear layers for FP16
+            # Numerical stability is guaranteed
+            self.cls_score.weight.zero_()
+            self.bbox_pred.weight.zero_()
+    def compute_outputs(self, features):
+        """Compute the RPN logits.
+        Parameters
+        ----------
+        features : sequence of dragon.vm.torch.Tensor
+            The features of specific conv layers.
+        """
+        # Compute rpn logits
+        cls_score_wide,  bbox_pred_wide = [], []
+        for feature in features:
+            x = self.relu(self.output(feature))
+            if len(features) > 1:
+                cls_score = self.cls_score(x).view(0, -1)
+                bbox_pred = self.bbox_pred(x).view(0, 4, -1)
+            else:
+                cls_score = self.cls_score(x)
+                bbox_pred = self.bbox_pred(x)
+            cls_score_wide.append(cls_score)
+            bbox_pred_wide.append(bbox_pred)
+        if len(features) > 1:
+            # Concat them if necessary
+            return torch.cat(cls_score_wide, dim=1), \
+                   torch.cat(bbox_pred_wide, dim=2)
+        else:
+            return cls_score_wide[0], bbox_pred_wide[0]
+    def compute_losses(
+        self, features,
+            cls_score, bbox_pred,
+                gt_boxes, ims_info,
+    ):
+        """Compute the RPN classification loss and regression loss.
+        Parameters
+        ----------
+        features : sequence of dragon.vm.torch.Tensor
+            The features of specific conv layers.
+        cls_score : dragon.vm.torch.Tensor
+            The (binary) classification logits.
+        bbox_pred : dragon.vm.torch.Tensor
+            The bbox regression logits.
+        gt_boxes : numpy.ndarray
+            The packed ground-truth boxes.
+        ims_info : numpy.ndarray
+            The information of input images.
+        """
+        self.rpn_data = \
+            self.anchor_target_layer(
+                features=features,
+                gt_boxes=gt_boxes,
+                ims_info=ims_info,
+            )
+        return OrderedDict({
+            'rpn_cls_loss':
+                self.cls_loss(cls_score, self.rpn_data['labels']),
+            'rpn_bbox_loss':
+                self.bbox_loss(
+                    bbox_pred,
+                    self.rpn_data['bbox_targets'],
+                    self.rpn_data['bbox_inside_weights'],
+                    self.rpn_data['bbox_outside_weights'],
+                )
+        })
+    def forward(self, *args, **kwargs):
+        cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
+        cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
+        outputs = OrderedDict({
+            'rpn_cls_score': cls_score,
+            'rpn_bbox_pred': bbox_pred,
+        })
+        if self.training:
+            outputs.update(
+                self.compute_losses(
+                    kwargs['features'],
+                    cls_score,
+                    bbox_pred,
+                    kwargs['gt_boxes'],
+                    kwargs['ims_info'],
+                )
+            )
+        return outputs
\ No newline at end of file
--- a/lib/modeling/ssd.py
+++ b/lib/modeling/ssd.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import dragon.vm.torch as torch
+from collections import OrderedDict
+from lib.core.config import cfg
+from lib.modeling.base import conv3x3
+from lib.ssd import (
+    PriorBoxLayer, MultiBoxMatchLayer,
+    HardMiningLayer, MultiBoxTargetLayer,
+)
+class SSD(torch.nn.Module):
+    def __init__(self, feature_dims):
+        super(SSD, self).__init__()
+        ########################################
+        #             SSD outputs              #
+        ########################################
+        self.cls_score = torch.nn.ModuleList()
+        self.bbox_pred = torch.nn.ModuleList()
+        self.softmax = torch.nn.Softmax(dim=2)
+        C = cfg.MODEL.NUM_CLASSES
+        for i, dim_in in enumerate(feature_dims):
+            A = len(cfg.SSD.MULTIBOX.ASPECT_RATIOS[i]) + 1
+            self.cls_score.append(conv3x3(dim_in, A * C, bias=True))
+            self.bbox_pred.append(conv3x3(dim_in, A * 4, bias=True))
+        self.prior_box_layer = PriorBoxLayer()
+        ########################################
+        #              SSD losses              #
+        ########################################
+        self.box_match_layer = MultiBoxMatchLayer()
+        self.hard_mining_layer = HardMiningLayer()
+        self.box_target_layer = MultiBoxTargetLayer()
+        self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
+        self.bbox_loss = torch.nn.SmoothL1Loss()
+        self.reset_parameters()
+    def reset_parameters(self):
+        # Careful Initialization
+        # Weight ~ Normal(0, 0.001)
+        for m in self.modules():
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.init.normal_(m.weight, std=0.001)
+                torch.nn.init.constant_(m.bias, 0)
+    def compute_outputs(self, features):
+        """Compute the SSD logits.
+        Parameters
+        ----------
+        features : sequence of dragon.vm.torch.Tensor
+            The features of specific conv layers.
+        """
+        # Compute logits
+        cls_score_wide, bbox_pred_wide = [], []
+        for i, feature in enumerate(features):
+            cls_score_wide.append(
+                self.cls_score[i](feature)
+                    .permute((0, 2, 3, 1)).view(0, -1))
+            bbox_pred_wide.append(
+                self.bbox_pred[i](feature)
+                    .permute((0, 2, 3, 1)).view(0, -1))
+        # Concat them if necessary
+        return torch.cat(cls_score_wide, dim=1).view(
+                    0, -1, cfg.MODEL.NUM_CLASSES), \
+               torch.cat(bbox_pred_wide, dim=1).view(0, -1, 4)
+    def compute_losses(
+        self, prior_boxes, gt_boxes,
+            cls_score, bbox_pred, cls_prob,
+    ):
+        """Compute the SSD classification loss and regression loss.
+        Parameters
+        ----------
+        prior_boxes : numpy.ndarray
+            The prior boxes(anchors).
+        gt_boxes : numpy.ndarray
+            The packed ground-truth boxes.
+        cls_score : dragon.vm.torch.Tensor
+            The classification logits.
+        bbox_pred : dragon.vm.torch.Tensor
+            The bbox regression logits.
+        cls_prob : dragon.vm.torch.Tensor
+            The logits after a softmax function.
+        """
+        # Collect the SSD training data
+        # See the paper(Liu et al. 2016) for details
+        self.ssd_data = \
+            self.box_match_layer(
+                prior_boxes=prior_boxes,
+                gt_boxes=gt_boxes,
+            )
+        self.ssd_data.update(
+            self.hard_mining_layer(
+                conf_prob=cls_prob,
+                match_labels=self.ssd_data['match_labels'],
+                max_overlaps=self.ssd_data['max_overlaps'],
+            )
+        )
+        self.ssd_data.update(
+            self.box_target_layer(
+                match_inds=self.ssd_data['match_inds'],
+                match_labels=self.ssd_data['match_labels'],
+                prior_boxes=prior_boxes,
+                gt_boxes=gt_boxes,
+            )
+        )
+        return OrderedDict({
+            # A compensating factor of 4.0 is used
+            # As we normalize both the pos and neg samples
+            'cls_loss':
+                self.cls_loss(
+                    cls_score.view(-1, cfg.MODEL.NUM_CLASSES),
+                    self.ssd_data['labels']
+                ) * 4.,
+            'bbox_loss':
+                self.bbox_loss(
+                    bbox_pred,
+                    self.ssd_data['bbox_targets'],
+                    self.ssd_data['bbox_inside_weights'],
+                    self.ssd_data['bbox_outside_weights'],
+                )
+        })
+    def forward(self, *args, **kwargs):
+        prior_boxes = self.prior_box_layer(kwargs['features'])
+        cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
+        cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
+        outputs = OrderedDict({
+            'prior_boxes': prior_boxes,
+            'bbox_pred': bbox_pred,
+        })
+        if self.training:
+            outputs.update(
+                self.compute_losses(
+                    prior_boxes,
+                    kwargs['gt_boxes'],
+                    cls_score,
+                    bbox_pred,
+                    self.softmax(cls_score.data),
+                )
+            )
+        else:
+            outputs['cls_prob'] = \
+                self.softmax(cls_score)
+        return outputs
\ No newline at end of file
--- a/lib/modeling/vgg.py
+++ b/lib/modeling/vgg.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.modeling.base import conv1x1, conv3x3
+class VGG(torch.nn.Module):
+    def __init__(self, arch, extra_arch=None, reduced=False):
+        super(VGG, self).__init__()
+        self.reduced = reduced
+        self.units, filter_list = arch
+        self.feature_dims = filter_list[:]
+        self.maxpool = torch.nn.MaxPool2d(
+            kernel_size=2, stride=2, ceil_mode=True)
+        self.s1pool = torch.nn.MaxPool2d(
+            kernel_size=3, stride=1, padding=1, ceil_mode=True)
+        self.relu = torch.nn.ReLU(inplace=True)
+        for i in range(len(self.units)):
+            conv_name = 'conv{}'.format(i + 1)
+            dim_in = 3 if i == 0 else filter_list[i - 1]
+            for j in range(self.units[i]):
+                self.__setattr__(
+                    '{}_{}'.format(conv_name, j + 1),
+                        conv3x3(dim_in, filter_list[i], bias=True))
+                if j == 0: dim_in = filter_list[i]
+        if reduced:
+            # L2Norm is redundant from the observation of
+            # empirical experiments. We just keep a trainable scale
+            self.conv4_3_norm = torch.nn.Affine(filter_list[3], bias=False)
+            self.conv4_3_norm.weight.zero_() # Zero-Init
+            self.fc6 = torch.nn.Conv2d(filter_list[-1], 1024,
+                kernel_size=3, stride=1, padding=6, dilation=6)
+            self.fc7 = conv1x1(1024, 1024, bias=True)
+            self.feature_dims = [filter_list[-2], 1024]
+        if extra_arch is not None:
+            strides, filter_list, kps = extra_arch
+            self.extra_units = [2] * len(strides)
+            self.feature_dims += [n * 2 for n in filter_list]
+            for i in range(len(strides)):
+                conv_name = 'conv{}'.format(i + 6)
+                dim_in = 1024 if i == 0 else filter_list[i - 1] * 2
+                self.__setattr__('{}_1'.format(conv_name),
+                    conv1x1(dim_in, filter_list[i], bias=True))
+                if strides[i] == 2:
+                    self.__setattr__('{}_2'.format(conv_name),
+                        conv3x3(filter_list[i], filter_list[i] * 2, 2, bias=True))
+                else:
+                    self.__setattr__('{}_2'.format(conv_name),
+                        torch.nn.Conv2d(filter_list[i], filter_list[i] * 2,
+                            kernel_size=kps[0], padding=kps[1], stride=kps[2]))
+        self.reset_parameters()
+    def reset_parameters(self):
+        for m in self.modules():
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.init.kaiming_uniform_(
+                    m.weight,
+                    # Fix the gain for [-127, 127]
+                    a=1,
+                ) # Xavier Initialization
+                torch.nn.init.constant_(m.bias, 0)
+        # Stop the gradients if necessary
+        def freeze_func(m):
+            if isinstance(m, torch.nn.Conv2d):
+                m.weight.requires_grad = False
+                m._buffers['weight'] = m.weight
+                del m._parameters['weight']
+                m.bias.requires_grad = False
+                m._buffers['bias'] = m.bias
+                del m._parameters['bias']
+        for i in range(cfg.MODEL.FREEZE_AT, 0, -1):
+            conv_name = 'conv{}'.format(i)
+            for j in range(self.units[i - 1]):
+                self.__getattr__('{}_{}'.format(
+                    conv_name, j + 1)).apply(freeze_func)
+    def forward(self, x):
+        outputs = []
+        # Conv1.x ~ Conv5.x
+        for i in range(len(self.units)):
+            conv_name = 'conv{}'.format(i + 1)
+            for j in range(self.units[i]):
+                x = self.relu(self.__getattr__(
+                    '{}_{}'.format(conv_name, j + 1))(x))
+            if self.reduced and i == 3:
+                outputs.append(self.conv4_3_norm(x))
+            if i < 4: x = self.maxpool(x)
+            else: x = self.s1pool(x) if self.reduced else x
+        # Internal FC layers and Extra Conv Layers
+        if self.reduced:
+            x = self.relu(self.fc6(x))
+            x = self.relu(self.fc7(x))
+            outputs.append(x)
+            for i in range(len(self.extra_units)):
+                conv_name = 'conv{}'.format(i + 6)
+                for j in range(self.extra_units[i]):
+                    x = self.relu(self.__getattr__(
+                        '{}_{}'.format(conv_name, j + 1))(x))
+                outputs.append(x)
+        else:
+            outputs.append(x)
+        return outputs
+def make_vgg_16():
+    return VGG(([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]))
+def make_vgg_16_reduced(scale=300):
+    if scale == 300:
+        extra_arch = (
+            [2, 2, 1, 1],
+            [256, 128, 128, 128],
+            [3, 0, 1],
+        )
+    elif scale == 512:
+        extra_arch = (
+            [2, 2, 2, 2, 1],
+            [256, 128, 128, 128, 128],
+            [4, 1, 1],
+        )
+    else:
+        raise ValueError('Unsupported scale: {}'.format(scale))
+    return VGG(([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]),
+               extra_arch=extra_arch, reduced=True)
+def make_vgg_16_reduced_300(): return make_vgg_16_reduced(300)
+def make_vgg_16_reduced_512(): return make_vgg_16_reduced(512)
\ No newline at end of file
--- a/lib/nms/__init__.py
+++ b/lib/nms/__init__.py
--- a/lib/nms/nms_wrapper.py
+++ b/lib/nms/nms_wrapper.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/nms_wrapper.py>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from lib.core.config import cfg
+import lib.utils.logger as logger
+try:
+    from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms
+except ImportError as e:
+    print('Failed to import cpu nms. Error: {0}'.format(str(e)))
+try:
+    from lib.nms.gpu_nms import gpu_nms
+except ImportError as e:
+    print('Failed to import gpu nms. Error: {0}'.format(str(e)))
+try:
+    from lib.utils.rboxes import RNMSWrapper
+except ImportError as e:
+    print('Failed to import rnms. Error: {0}'.format(str(e)))
+def nms(detections, thresh, force_cpu=False):
+    """Perform either CPU or GPU Hard-NMS."""
+    if detections.shape[0] == 0: return []
+    if cfg.USE_GPU_NMS and not force_cpu:
+        return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
+    else: return cpu_nms(detections, thresh)
+def soft_nms(
+    detections,
+    thresh,
+    method='linear',
+    sigma=0.5,
+    score_thresh=0.001,
+):
+    """Perform CPU Soft-NMS."""
+    if detections.shape[0] == 0: return []
+    methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
+    if method not in methods:
+        logger.fatal('Unknown soft nms method: {}'.format(method))
+    return cpu_soft_nms(
+        detections,
+        thresh,
+        methods[method],
+        sigma,
+        score_thresh,
+    )
+def rnms(detections, thresh):
+    """Perform CPU Hard-NMS on rotated boxes.
+    Parameters
+    ----------
+    detections : numpy.ndarray
+        (N, 6) of double [cx, cy, w, h, a, scores]
+    thresh : float
+        The nms thresh.
+    """
+    if detections.shape[0] == 0: return []
+    wrapper = RNMSWrapper()
+    return wrapper.nms(detections, thresh)
\ No newline at end of file
--- a/lib/proto/__init__.py
+++ b/lib/proto/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
\ No newline at end of file
--- a/lib/proto/anno.proto
+++ b/lib/proto/anno.proto
+syntax = "proto2";
+message Datum {
+  optional int32 channels = 1;
+  optional int32 height = 2;
+  optional int32 width = 3;
+  optional bytes data = 4;
+  optional int32 label = 5;
+  repeated float float_data = 6;
+  optional bool encoded = 7 [default = false];
+  repeated int32 labels = 8;
+}
+message Annotation {
+  optional float x1 = 1;
+  optional float y1 = 2;
+  optional float x2 = 3;
+  optional float y2 = 4;
+  optional string name = 5;
+  optional bool difficult = 6 [default = false];
+  optional string mask = 7;
+}
+message AnnotatedDatum {
+  optional Datum datum = 1;
+  optional string filename = 2;
+  repeated Annotation annotation = 3;
+}
--- a/lib/proto/anno_pb2.py
+++ b/lib/proto/anno_pb2.py
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: anno.proto
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+_sym_db = _symbol_database.Default()
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='anno.proto',
+  package='',
+  serialized_pb=_b('\n\nanno.proto\"\x91\x01\n\x05\x44\x61tum\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\r\n\x05width\x18\x03 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\x0c\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x12\n\nfloat_data\x18\x06 \x03(\x02\x12\x16\n\x07\x65ncoded\x18\x07 \x01(\x08:\x05\x66\x61lse\x12\x0e\n\x06labels\x18\x08 \x03(\x05\"r\n\nAnnotation\x12\n\n\x02x1\x18\x01 \x01(\x02\x12\n\n\x02y1\x18\x02 \x01(\x02\x12\n\n\x02x2\x18\x03 \x01(\x02\x12\n\n\x02y2\x18\x04 \x01(\x02\x12\x0c\n\x04name\x18\x05 \x01(\t\x12\x18\n\tdifficult\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x0c\n\x04mask\x18\x07 \x01(\t\"Z\n\x0e\x41nnotatedDatum\x12\x15\n\x05\x64\x61tum\x18\x01 \x01(\x0b\x32\x06.Datum\x12\x10\n\x08\x66ilename\x18\x02 \x01(\t\x12\x1f\n\nannotation\x18\x03 \x03(\x0b\x32\x0b.Annotation')
+)
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+_DATUM = _descriptor.Descriptor(
+  name='Datum',
+  full_name='Datum',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='channels', full_name='Datum.channels', index=0,
+      number=1, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='height', full_name='Datum.height', index=1,
+      number=2, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='width', full_name='Datum.width', index=2,
+      number=3, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='data', full_name='Datum.data', index=3,
+      number=4, type=12, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b(""),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='label', full_name='Datum.label', index=4,
+      number=5, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='float_data', full_name='Datum.float_data', index=5,
+      number=6, type=2, cpp_type=6, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='encoded', full_name='Datum.encoded', index=6,
+      number=7, type=8, cpp_type=7, label=1,
+      has_default_value=True, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='labels', full_name='Datum.labels', index=7,
+      number=8, type=5, cpp_type=1, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=15,
+  serialized_end=160,
+)
+_ANNOTATION = _descriptor.Descriptor(
+  name='Annotation',
+  full_name='Annotation',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='x1', full_name='Annotation.x1', index=0,
+      number=1, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='y1', full_name='Annotation.y1', index=1,
+      number=2, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='x2', full_name='Annotation.x2', index=2,
+      number=3, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='y2', full_name='Annotation.y2', index=3,
+      number=4, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='name', full_name='Annotation.name', index=4,
+      number=5, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='difficult', full_name='Annotation.difficult', index=5,
+      number=6, type=8, cpp_type=7, label=1,
+      has_default_value=True, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='mask', full_name='Annotation.mask', index=6,
+      number=7, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=162,
+  serialized_end=276,
+)
+_ANNOTATEDDATUM = _descriptor.Descriptor(
+  name='AnnotatedDatum',
+  full_name='AnnotatedDatum',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='datum', full_name='AnnotatedDatum.datum', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='filename', full_name='AnnotatedDatum.filename', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='annotation', full_name='AnnotatedDatum.annotation', index=2,
+      number=3, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=278,
+  serialized_end=368,
+)
+_ANNOTATEDDATUM.fields_by_name['datum'].message_type = _DATUM
+_ANNOTATEDDATUM.fields_by_name['annotation'].message_type = _ANNOTATION
+DESCRIPTOR.message_types_by_name['Datum'] = _DATUM
+DESCRIPTOR.message_types_by_name['Annotation'] = _ANNOTATION
+DESCRIPTOR.message_types_by_name['AnnotatedDatum'] = _ANNOTATEDDATUM
+Datum = _reflection.GeneratedProtocolMessageType('Datum', (_message.Message,), dict(
+  DESCRIPTOR = _DATUM,
+  __module__ = 'anno_pb2'
+  # @@protoc_insertion_point(class_scope:Datum)
+  ))
+_sym_db.RegisterMessage(Datum)
+Annotation = _reflection.GeneratedProtocolMessageType('Annotation', (_message.Message,), dict(
+  DESCRIPTOR = _ANNOTATION,
+  __module__ = 'anno_pb2'
+  # @@protoc_insertion_point(class_scope:Annotation)
+  ))
+_sym_db.RegisterMessage(Annotation)
+AnnotatedDatum = _reflection.GeneratedProtocolMessageType('AnnotatedDatum', (_message.Message,), dict(
+  DESCRIPTOR = _ANNOTATEDDATUM,
+  __module__ = 'anno_pb2'
+  # @@protoc_insertion_point(class_scope:AnnotatedDatum)
+  ))
+_sym_db.RegisterMessage(AnnotatedDatum)
+# @@protoc_insertion_point(module_scope)
--- a/lib/pycocotools/__init__.py
+++ b/lib/pycocotools/__init__.py
+__author__ = 'tylin'
--- a/lib/pycocotools/_mask.pyx
+++ b/lib/pycocotools/_mask.pyx
+# distutils: language = c
+# distutils: sources = ../common/maskApi.c
+#**************************************************************************
+# Microsoft COCO Toolbox.      version 2.0
+# Data, paper, and tutorials available at:  http://mscoco.org/
+# Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
+# Licensed under the Simplified BSD License [see coco/license.txt]
+#**************************************************************************
+__author__ = 'tsungyi'
+import sys
+PYTHON_VERSION = sys.version_info[0]
+# import both Python-level and C-level symbols of Numpy
+# the API uses Numpy to interface C and Python
+import numpy as np
+cimport numpy as np
+from libc.stdlib cimport malloc, free
+# intialized Numpy. must do.
+np.import_array()
+# import numpy C function
+# we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management
+cdef extern from "numpy/arrayobject.h":
+    void PyArray_ENABLEFLAGS(np.ndarray arr, int flags)
+# Declare the prototype of the C functions in MaskApi.h
+cdef extern from "maskApi.h":
+    ctypedef unsigned int uint
+    ctypedef unsigned long siz
+    ctypedef unsigned char byte
+    ctypedef double* BB
+    ctypedef struct RLE:
+        siz h,
+        siz w,
+        siz m,
+        uint* cnts,
+    void rlesInit( RLE **R, siz n )
+    void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n )
+    void rleDecode( const RLE *R, byte *mask, siz n )
+    void rleMerge( const RLE *R, RLE *M, siz n, int intersect )
+    void rleArea( const RLE *R, siz n, uint *a )
+    void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o )
+    void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o )
+    void rleToBbox( const RLE *R, BB bb, siz n )
+    void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n )
+    void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w )
+    char* rleToString( const RLE *R )
+    void rleFrString( RLE *R, char *s, siz h, siz w )
+# python class to wrap RLE array in C
+# the class handles the memory allocation and deallocation
+cdef class RLEs:
+    cdef RLE *_R
+    cdef siz _n
+    def __cinit__(self, siz n =0):
+        rlesInit(&self._R, n)
+        self._n = n
+    # free the RLE array here
+    def __dealloc__(self):
+        if self._R is not NULL:
+            for i in range(self._n):
+                free(self._R[i].cnts)
+            free(self._R)
+    def __getattr__(self, key):
+        if key == 'n':
+            return self._n
+        raise AttributeError(key)
+# python class to wrap Mask array in C
+# the class handles the memory allocation and deallocation
+cdef class Masks:
+    cdef byte *_mask
+    cdef siz _h
+    cdef siz _w
+    cdef siz _n
+    def __cinit__(self, h, w, n):
+        self._mask = <byte*> malloc(h*w*n* sizeof(byte))
+        self._h = h
+        self._w = w
+        self._n = n
+    # def __dealloc__(self):
+        # the memory management of _mask has been passed to np.ndarray
+        # it doesn't need to be freed here
+    # called when passing into np.array() and return an np.ndarray in column-major order
+    def __array__(self):
+        cdef np.npy_intp shape[1]
+        shape[0] = <np.npy_intp> self._h*self._w*self._n
+        # Create a 1D array, and reshape it to fortran/Matlab column-major array
+        ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F')
+        # The _mask allocated by Masks is now handled by ndarray
+        PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA)
+        return ndarray
+# internal conversion from Python RLEs object to compressed RLE format
+def _toString(RLEs Rs):
+    cdef siz n = Rs.n
+    cdef bytes py_string
+    cdef char* c_string
+    objs = []
+    for i in range(n):
+        c_string = rleToString( <RLE*> &Rs._R[i] )
+        py_string = c_string
+        objs.append({
+            'size': [Rs._R[i].h, Rs._R[i].w],
+            'counts': py_string
+        })
+        free(c_string)
+    return objs
+# internal conversion from compressed RLE format to Python RLEs object
+def _frString(rleObjs):
+    cdef siz n = len(rleObjs)
+    Rs = RLEs(n)
+    cdef bytes py_string
+    cdef char* c_string
+    for i, obj in enumerate(rleObjs):
+        if PYTHON_VERSION == 2:
+            py_string = str(obj['counts']).encode('utf8')
+        elif PYTHON_VERSION == 3:
+            py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts']
+        else:
+            raise Exception('Python version must be 2 or 3')
+        c_string = py_string
+        rleFrString( <RLE*> &Rs._R[i], <char*> c_string, obj['size'][0], obj['size'][1] )
+    return Rs
+# encode mask to RLEs objects
+# list of RLE string can be generated by RLEs member function
+def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask):
+    h, w, n = mask.shape[0], mask.shape[1], mask.shape[2]
+    cdef RLEs Rs = RLEs(n)
+    rleEncode(Rs._R,<byte*>mask.data,h,w,n)
+    objs = _toString(Rs)
+    return objs
+# decode mask from compressed list of RLE string or RLEs object
+def decode(rleObjs):
+    cdef RLEs Rs = _frString(rleObjs)
+    h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n
+    masks = Masks(h, w, n)
+    rleDecode(<RLE*>Rs._R, masks._mask, n);
+    return np.array(masks)
+def merge(rleObjs, intersect=0):
+    cdef RLEs Rs = _frString(rleObjs)
+    cdef RLEs R = RLEs(1)
+    rleMerge(<RLE*>Rs._R, <RLE*> R._R, <siz> Rs._n, intersect)
+    obj = _toString(R)[0]
+    return obj
+def area(rleObjs):
+    cdef RLEs Rs = _frString(rleObjs)
+    cdef uint* _a = <uint*> malloc(Rs._n* sizeof(uint))
+    rleArea(Rs._R, Rs._n, _a)
+    cdef np.npy_intp shape[1]
+    shape[0] = <np.npy_intp> Rs._n
+    a = np.array((Rs._n, ), dtype=np.uint8)
+    a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a)
+    PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA)
+    return a
+# iou computation. support function overload (RLEs-RLEs and bbox-bbox).
+def iou( dt, gt, pyiscrowd ):
+    def _preproc(objs):
+        if len(objs) == 0:
+            return objs
+        if type(objs) == np.ndarray:
+            if len(objs.shape) == 1:
+                objs = objs.reshape((objs[0], 1))
+            # check if it's Nx4 bbox
+            if not len(objs.shape) == 2 or not objs.shape[1] == 4:
+                raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension')
+            objs = objs.astype(np.double)
+        elif type(objs) == list:
+            # check if list is in box format and convert it to np.ndarray
+            isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs]))
+            isrle = np.all(np.array([type(obj) == dict for obj in objs]))
+            if isbox:
+                objs = np.array(objs, dtype=np.double)
+                if len(objs.shape) == 1:
+                    objs = objs.reshape((1,objs.shape[0]))
+            elif isrle:
+                objs = _frString(objs)
+            else:
+                raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])')
+        else:
+            raise Exception('unrecognized type.  The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.')
+        return objs
+    def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t,  ndim=1] _iou):
+        rleIou( <RLE*> dt._R, <RLE*> gt._R, m, n, <byte*> iscrowd.data, <double*> _iou.data )
+    def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou):
+        bbIou( <BB> dt.data, <BB> gt.data, m, n, <byte*> iscrowd.data, <double*>_iou.data )
+    def _len(obj):
+        cdef siz N = 0
+        if type(obj) == RLEs:
+            N = obj.n
+        elif len(obj)==0:
+            pass
+        elif type(obj) == np.ndarray:
+            N = obj.shape[0]
+        return N
+    # convert iscrowd to numpy array
+    cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8)
+    # simple type checking
+    cdef siz m, n
+    dt = _preproc(dt)
+    gt = _preproc(gt)
+    m = _len(dt)
+    n = _len(gt)
+    if m == 0 or n == 0:
+        return []
+    if not type(dt) == type(gt):
+        raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray')
+    # define local variables
+    cdef double* _iou = <double*> 0
+    cdef np.npy_intp shape[1]
+    # check type and assign iou function
+    if type(dt) == RLEs:
+        _iouFun = _rleIou
+    elif type(dt) == np.ndarray:
+        _iouFun = _bbIou
+    else:
+        raise Exception('input data type not allowed.')
+    _iou = <double*> malloc(m*n* sizeof(double))
+    iou = np.zeros((m*n, ), dtype=np.double)
+    shape[0] = <np.npy_intp> m*n
+    iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou)
+    PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA)
+    _iouFun(dt, gt, iscrowd, m, n, iou)
+    return iou.reshape((m,n), order='F')
+def toBbox( rleObjs ):
+    cdef RLEs Rs = _frString(rleObjs)
+    cdef siz n = Rs.n
+    cdef BB _bb = <BB> malloc(4*n* sizeof(double))
+    rleToBbox( <const RLE*> Rs._R, _bb, n )
+    cdef np.npy_intp shape[1]
+    shape[0] = <np.npy_intp> 4*n
+    bb = np.array((1,4*n), dtype=np.double)
+    bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4))
+    PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA)
+    return bb
+def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ):
+    cdef siz n = bb.shape[0]
+    Rs = RLEs(n)
+    rleFrBbox( <RLE*> Rs._R, <const BB> bb.data, h, w, n )
+    objs = _toString(Rs)
+    return objs
+def frPoly( poly, siz h, siz w ):
+    cdef np.ndarray[np.double_t, ndim=1] np_poly
+    n = len(poly)
+    Rs = RLEs(n)
+    for i, p in enumerate(poly):
+        np_poly = np.array(p, dtype=np.double, order='F')
+        rleFrPoly( <RLE*>&Rs._R[i], <const double*> np_poly.data, int(len(p)/2), h, w )
+    objs = _toString(Rs)
+    return objs
+def frUncompressedRLE(ucRles, siz h, siz w):
+    cdef np.ndarray[np.uint32_t, ndim=1] cnts
+    cdef RLE R
+    cdef uint *data
+    n = len(ucRles)
+    objs = []
+    for i in range(n):
+        Rs = RLEs(1)
+        cnts = np.array(ucRles[i]['counts'], dtype=np.uint32)
+        # time for malloc can be saved here but it's fine
+        data = <uint*> malloc(len(cnts)* sizeof(uint))
+        for j in range(len(cnts)):
+            data[j] = <uint> cnts[j]
+        R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), <uint*> data)
+        Rs._R[0] = R
+        objs.append(_toString(Rs)[0])
+    return objs
+def frPyObjects(pyobj, h, w):
+    # encode rle from a list of python objects
+    if type(pyobj) == np.ndarray:
+        objs = frBbox(pyobj, h, w)
+    elif type(pyobj) == list and len(pyobj[0]) == 4:
+        objs = frBbox(pyobj, h, w)
+    elif type(pyobj) == list and len(pyobj[0]) > 4:
+        objs = frPoly(pyobj, h, w)
+    elif type(pyobj) == list and type(pyobj[0]) == dict \
+        and 'counts' in pyobj[0] and 'size' in pyobj[0]:
+        objs = frUncompressedRLE(pyobj, h, w)
+    # encode rle from single python object
+    elif type(pyobj) == list and len(pyobj) == 4:
+        objs = frBbox([pyobj], h, w)[0]
+    elif type(pyobj) == list and len(pyobj) > 4:
+        objs = frPoly([pyobj], h, w)[0]
+    elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj:
+        objs = frUncompressedRLE([pyobj], h, w)[0]
+    else:
+        raise Exception('input type is not supported.')
+    return objs
\ No newline at end of file
--- a/lib/pycocotools/coco.py
+++ b/lib/pycocotools/coco.py
+__author__ = 'tylin'
+__version__ = '2.0'
+# Interface for accessing the Microsoft COCO dataset.
+# Microsoft COCO is a large image dataset designed for object detection,
+# segmentation, and caption generation. pycocotools is a Python API that
+# assists in loading, parsing and visualizing the annotations in COCO.
+# Please visit http://mscoco.org/ for more information on COCO, including
+# for the data, paper, and tutorials. The exact format of the annotations
+# is also described on the COCO website. For example usage of the pycocotools
+# please see pycocotools_demo.ipynb. In addition to this API, please download both
+# the COCO images and annotations in order to run the demo.
+# An alternative to using the API is to load the annotations directly
+# into Python dictionary
+# Using the API provides additional utility functions. Note that this API
+# supports both *instance* and *caption* annotations. In the case of
+# captions not all functions are defined (e.g. categories are undefined).
+# The following API functions are defined:
+#  COCO       - COCO api class that loads COCO annotation file and prepare data structures.
+#  decodeMask - Decode binary mask M encoded via run-length encoding.
+#  encodeMask - Encode binary mask M using run-length encoding.
+#  getAnnIds  - Get ann ids that satisfy given filter conditions.
+#  getCatIds  - Get cat ids that satisfy given filter conditions.
+#  getImgIds  - Get img ids that satisfy given filter conditions.
+#  loadAnns   - Load anns with the specified ids.
+#  loadCats   - Load cats with the specified ids.
+#  loadImgs   - Load imgs with the specified ids.
+#  annToMask  - Convert segmentation in an annotation to binary mask.
+#  showAnns   - Display the specified annotations.
+#  loadRes    - Load algorithm results and create API for accessing them.
+#  download   - Download COCO images from mscoco.org server.
+# Throughout the API "ann"=annotation, "cat"=category, and "img"=image.
+# Help on each functions can be accessed by: "help COCO>function".
+# See also COCO>decodeMask,
+# COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds,
+# COCO>getImgIds, COCO>loadAnns, COCO>loadCats,
+# COCO>loadImgs, COCO>annToMask, COCO>showAnns
+# Microsoft COCO Toolbox.      version 2.0
+# Data, paper, and tutorials available at:  http://mscoco.org/
+# Code written by Piotr Dollar and Tsung-Yi Lin, 2014.
+# Licensed under the Simplified BSD License [see bsd.txt]
+import json
+import time
+import matplotlib.pyplot as plt
+from matplotlib.collections import PatchCollection
+from matplotlib.patches import Polygon
+import numpy as np
+import copy
+import itertools
+from . import mask as maskUtils
+import os
+from collections import defaultdict
+import sys
+PYTHON_VERSION = sys.version_info[0]
+if PYTHON_VERSION == 2:
+    from urllib import urlretrieve
+elif PYTHON_VERSION == 3:
+    from urllib.request import urlretrieve
+def _isArrayLike(obj):
+    return hasattr(obj, '__iter__') and hasattr(obj, '__len__')
+class COCO:
+    def __init__(self, annotation_file=None):
+        """
+        Constructor of Microsoft COCO helper class for reading and visualizing annotations.
+        :param annotation_file (str): location of annotation file
+        :param image_folder (str): location to the folder that hosts images.
+        :return:
+        """
+        # load dataset
+        self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict()
+        self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
+        if not annotation_file == None:
+            print('loading annotations into memory...')
+            tic = time.time()
+            dataset = json.load(open(annotation_file, 'r'))
+            assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
+            print('Done (t={:0.2f}s)'.format(time.time()- tic))
+            self.dataset = dataset
+            self.createIndex()
+    def createIndex(self):
+        # create index
+        print('creating index...')
+        anns, cats, imgs = {}, {}, {}
+        imgToAnns,catToImgs = defaultdict(list),defaultdict(list)
+        if 'annotations' in self.dataset:
+            for ann in self.dataset['annotations']:
+                imgToAnns[ann['image_id']].append(ann)
+                anns[ann['id']] = ann
+        if 'images' in self.dataset:
+            for img in self.dataset['images']:
+                imgs[img['id']] = img
+        if 'categories' in self.dataset:
+            for cat in self.dataset['categories']:
+                cats[cat['id']] = cat
+        if 'annotations' in self.dataset and 'categories' in self.dataset:
+            for ann in self.dataset['annotations']:
+                catToImgs[ann['category_id']].append(ann['image_id'])
+        print('index created!')
+        # create class members
+        self.anns = anns
+        self.imgToAnns = imgToAnns
+        self.catToImgs = catToImgs
+        self.imgs = imgs
+        self.cats = cats
+    def info(self):
+        """
+        Print information about the annotation file.
+        :return:
+        """
+        for key, value in self.dataset['info'].items():
+            print('{}: {}'.format(key, value))
+    def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
+        """
+        Get ann ids that satisfy given filter conditions. default skips that filter
+        :param imgIds  (int array)     : get anns for given imgs
+               catIds  (int array)     : get anns for given cats
+               areaRng (float array)   : get anns for given area range (e.g. [0 inf])
+               iscrowd (boolean)       : get anns for given crowd label (False or True)
+        :return: ids (int array)       : integer array of ann ids
+        """
+        imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
+        catIds = catIds if _isArrayLike(catIds) else [catIds]
+        if len(imgIds) == len(catIds) == len(areaRng) == 0:
+            anns = self.dataset['annotations']
+        else:
+            if not len(imgIds) == 0:
+                lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns]
+                anns = list(itertools.chain.from_iterable(lists))
+            else:
+                anns = self.dataset['annotations']
+            anns = anns if len(catIds)  == 0 else [ann for ann in anns if ann['category_id'] in catIds]
+            anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]]
+        if not iscrowd == None:
+            ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd]
+        else:
+            ids = [ann['id'] for ann in anns]
+        return ids
+    def getCatIds(self, catNms=[], supNms=[], catIds=[]):
+        """
+        filtering parameters. default skips that filter.
+        :param catNms (str array)  : get cats for given cat names
+        :param supNms (str array)  : get cats for given supercategory names
+        :param catIds (int array)  : get cats for given cat ids
+        :return: ids (int array)   : integer array of cat ids
+        """
+        catNms = catNms if _isArrayLike(catNms) else [catNms]
+        supNms = supNms if _isArrayLike(supNms) else [supNms]
+        catIds = catIds if _isArrayLike(catIds) else [catIds]
+        if len(catNms) == len(supNms) == len(catIds) == 0:
+            cats = self.dataset['categories']
+        else:
+            cats = self.dataset['categories']
+            cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name']          in catNms]
+            cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms]
+            cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id']            in catIds]
+        ids = [cat['id'] for cat in cats]
+        return ids
+    def getImgIds(self, imgIds=[], catIds=[]):
+        '''
+        Get img ids that satisfy given filter conditions.
+        :param imgIds (int array) : get imgs for given ids
+        :param catIds (int array) : get imgs with all given cats
+        :return: ids (int array)  : integer array of img ids
+        '''
+        imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
+        catIds = catIds if _isArrayLike(catIds) else [catIds]
+        if len(imgIds) == len(catIds) == 0:
+            ids = self.imgs.keys()
+        else:
+            ids = set(imgIds)
+            for i, catId in enumerate(catIds):
+                if i == 0 and len(ids) == 0:
+                    ids = set(self.catToImgs[catId])
+                else:
+                    ids &= set(self.catToImgs[catId])
+        return list(ids)
+    def loadAnns(self, ids=[]):
+        """
+        Load anns with the specified ids.
+        :param ids (int array)       : integer ids specifying anns
+        :return: anns (object array) : loaded ann objects
+        """
+        if _isArrayLike(ids):
+            return [self.anns[id] for id in ids]
+        elif type(ids) == int:
+            return [self.anns[ids]]
+    def loadCats(self, ids=[]):
+        """
+        Load cats with the specified ids.
+        :param ids (int array)       : integer ids specifying cats
+        :return: cats (object array) : loaded cat objects
+        """
+        if _isArrayLike(ids):
+            return [self.cats[id] for id in ids]
+        elif type(ids) == int:
+            return [self.cats[ids]]
+    def loadImgs(self, ids=[]):
+        """
+        Load anns with the specified ids.
+        :param ids (int array)       : integer ids specifying img
+        :return: imgs (object array) : loaded img objects
+        """
+        if _isArrayLike(ids):
+            return [self.imgs[id] for id in ids]
+        elif type(ids) == int:
+            return [self.imgs[ids]]
+    def showAnns(self, anns):
+        """
+        Display the specified annotations.
+        :param anns (array of object): annotations to display
+        :return: None
+        """
+        if len(anns) == 0:
+            return 0
+        if 'segmentation' in anns[0] or 'keypoints' in anns[0]:
+            datasetType = 'instances'
+        elif 'caption' in anns[0]:
+            datasetType = 'captions'
+        else:
+            raise Exception('datasetType not supported')
+        if datasetType == 'instances':
+            ax = plt.gca()
+            ax.set_autoscale_on(False)
+            polygons = []
+            color = []
+            for ann in anns:
+                c = (np.random.random((1, 3))*0.6+0.4).tolist()[0]
+                if 'segmentation' in ann:
+                    if type(ann['segmentation']) == list:
+                        # polygon
+                        for seg in ann['segmentation']:
+                            poly = np.array(seg).reshape((int(len(seg)/2), 2))
+                            polygons.append(Polygon(poly))
+                            color.append(c)
+                    else:
+                        # mask
+                        t = self.imgs[ann['image_id']]
+                        if type(ann['segmentation']['counts']) == list:
+                            rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width'])
+                        else:
+                            rle = [ann['segmentation']]
+                        m = maskUtils.decode(rle)
+                        img = np.ones( (m.shape[0], m.shape[1], 3) )
+                        if ann['iscrowd'] == 1:
+                            color_mask = np.array([2.0,166.0,101.0])/255
+                        if ann['iscrowd'] == 0:
+                            color_mask = np.random.random((1, 3)).tolist()[0]
+                        for i in range(3):
+                            img[:,:,i] = color_mask[i]
+                        ax.imshow(np.dstack( (img, m*0.5) ))
+                if 'keypoints' in ann and type(ann['keypoints']) == list:
+                    # turn skeleton into zero-based index
+                    sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1
+                    kp = np.array(ann['keypoints'])
+                    x = kp[0::3]
+                    y = kp[1::3]
+                    v = kp[2::3]
+                    for sk in sks:
+                        if np.all(v[sk]>0):
+                            plt.plot(x[sk],y[sk], linewidth=3, color=c)
+                    plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2)
+                    plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2)
+            p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4)
+            ax.add_collection(p)
+            p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2)
+            ax.add_collection(p)
+        elif datasetType == 'captions':
+            for ann in anns:
+                print(ann['caption'])
+    def loadRes(self, resFile):
+        """
+        Load result file and return a result api object.
+        :param   resFile (str)     : file name of result file
+        :return: res (obj)         : result api object
+        """
+        res = COCO()
+        res.dataset['images'] = [img for img in self.dataset['images']]
+        print('Loading and preparing results...')
+        tic = time.time()
+        if type(resFile) == str or type(resFile) == unicode:
+            anns = json.load(open(resFile))
+        elif type(resFile) == np.ndarray:
+            anns = self.loadNumpyAnnotations(resFile)
+        else:
+            anns = resFile
+        assert type(anns) == list, 'results in not an array of objects'
+        annsImgIds = [ann['image_id'] for ann in anns]
+        assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
+               'Results do not correspond to current coco set'
+        if 'caption' in anns[0]:
+            imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
+            res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
+            for id, ann in enumerate(anns):
+                ann['id'] = id+1
+        elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
+            res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
+            for id, ann in enumerate(anns):
+                bb = ann['bbox']
+                x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]]
+                if not 'segmentation' in ann:
+                    ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
+                ann['area'] = bb[2]*bb[3]
+                ann['id'] = id+1
+                ann['iscrowd'] = 0
+        elif 'segmentation' in anns[0]:
+            res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
+            for id, ann in enumerate(anns):
+                # now only support compressed RLE format as segmentation results
+                ann['area'] = maskUtils.area([ann['segmentation']])[0]
+                if not 'bbox' in ann:
+                    ann['bbox'] = maskUtils.toBbox([ann['segmentation']])[0]
+                ann['id'] = id+1
+                ann['iscrowd'] = 0
+        elif 'keypoints' in anns[0]:
+            res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
+            for id, ann in enumerate(anns):
+                s = ann['keypoints']
+                x = s[0::3]
+                y = s[1::3]
+                x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y)
+                ann['area'] = (x1-x0)*(y1-y0)
+                ann['id'] = id + 1
+                ann['bbox'] = [x0,y0,x1-x0,y1-y0]
+        print('DONE (t={:0.2f}s)'.format(time.time()- tic))
+        res.dataset['annotations'] = anns
+        res.createIndex()
+        return res
+    def download(self, tarDir = None, imgIds = [] ):
+        '''
+        Download COCO images from mscoco.org server.
+        :param tarDir (str): COCO results directory name
+               imgIds (list): images to be downloaded
+        :return:
+        '''
+        if tarDir is None:
+            print('Please specify target directory')
+            return -1
+        if len(imgIds) == 0:
+            imgs = self.imgs.values()
+        else:
+            imgs = self.loadImgs(imgIds)
+        N = len(imgs)
+        if not os.path.exists(tarDir):
+            os.makedirs(tarDir)
+        for i, img in enumerate(imgs):
+            tic = time.time()
+            fname = os.path.join(tarDir, img['file_name'])
+            if not os.path.exists(fname):
+                urlretrieve(img['coco_url'], fname)
+            print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic))
+    def loadNumpyAnnotations(self, data):
+        """
+        Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class}
+        :param  data (numpy.ndarray)
+        :return: annotations (python nested list)
+        """
+        print('Converting ndarray to lists...')
+        assert(type(data) == np.ndarray)
+        print(data.shape)
+        assert(data.shape[1] == 7)
+        N = data.shape[0]
+        ann = []
+        for i in range(N):
+            if i % 1000000 == 0:
+                print('{}/{}'.format(i,N))
+            ann += [{
+                'image_id'  : int(data[i, 0]),
+                'bbox'  : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ],
+                'score' : data[i, 5],
+                'category_id': int(data[i, 6]),
+                }]
+        return ann
+    def annToRLE(self, ann):
+        """
+        Convert annotation which can be polygons, uncompressed RLE to RLE.
+        :return: binary mask (numpy 2D array)
+        """
+        t = self.imgs[ann['image_id']]
+        h, w = t['height'], t['width']
+        segm = ann['segmentation']
+        if type(segm) == list:
+            # polygon -- a single object might consist of multiple parts
+            # we merge all parts into one mask rle code
+            rles = maskUtils.frPyObjects(segm, h, w)
+            rle = maskUtils.merge(rles)
+        elif type(segm['counts']) == list:
+            # uncompressed RLE
+            rle = maskUtils.frPyObjects(segm, h, w)
+        else:
+            # rle
+            rle = ann['segmentation']
+        return rle
+    def annToMask(self, ann):
+        """
+        Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
+        :return: binary mask (numpy 2D array)
+        """
+        rle = self.annToRLE(ann)
+        m = maskUtils.decode(rle)
+        return m
\ No newline at end of file
--- a/lib/pycocotools/cocoeval.py
+++ b/lib/pycocotools/cocoeval.py
+__author__ = 'tsungyi'
+import numpy as np
+import datetime
+import time
+from collections import defaultdict
+from . import mask as maskUtils
+import copy
+class COCOeval:
+    # Interface for evaluating detection on the Microsoft COCO dataset.
+    #
+    # The usage for CocoEval is as follows:
+    #  cocoGt=..., cocoDt=...       # load dataset and results
+    #  E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
+    #  E.params.recThrs = ...;      # set parameters as desired
+    #  E.evaluate();                # run per image evaluation
+    #  E.accumulate();              # accumulate per image results
+    #  E.summarize();               # display summary metrics of results
+    # For example usage see evalDemo.m and http://mscoco.org/.
+    #
+    # The evaluation parameters are as follows (defaults in brackets):
+    #  imgIds     - [all] N img ids to use for evaluation
+    #  catIds     - [all] K cat ids to use for evaluation
+    #  iouThrs    - [.5:.05:.95] T=10 IoU thresholds for evaluation
+    #  recThrs    - [0:.01:1] R=101 recall thresholds for evaluation
+    #  areaRng    - [...] A=4 object area ranges for evaluation
+    #  maxDets    - [1 10 100] M=3 thresholds on max detections per image
+    #  iouType    - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints'
+    #  iouType replaced the now DEPRECATED useSegm parameter.
+    #  useCats    - [1] if true use category labels for evaluation
+    # Note: if useCats=0 category labels are ignored as in proposal scoring.
+    # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
+    #
+    # evaluate(): evaluates detections on every image and every category and
+    # concats the results into the "evalImgs" with fields:
+    #  dtIds      - [1xD] id for each of the D detections (dt)
+    #  gtIds      - [1xG] id for each of the G ground truths (gt)
+    #  dtMatches  - [TxD] matching gt id at each IoU or 0
+    #  gtMatches  - [TxG] matching dt id at each IoU or 0
+    #  dtScores   - [1xD] confidence of each dt
+    #  gtIgnore   - [1xG] ignore flag for each gt
+    #  dtIgnore   - [TxD] ignore flag for each dt at each IoU
+    #
+    # accumulate(): accumulates the per-image, per-category evaluation
+    # results in "evalImgs" into the dictionary "eval" with fields:
+    #  params     - parameters used for evaluation
+    #  date       - date evaluation was performed
+    #  counts     - [T,R,K,A,M] parameter dimensions (see above)
+    #  precision  - [TxRxKxAxM] precision for every evaluation setting
+    #  recall     - [TxKxAxM] max recall for every evaluation setting
+    # Note: precision and recall==-1 for settings with no gt objects.
+    #
+    # See also coco, mask, pycocoDemo, pycocoEvalDemo
+    #
+    # Microsoft COCO Toolbox.      version 2.0
+    # Data, paper, and tutorials available at:  http://mscoco.org/
+    # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
+    # Licensed under the Simplified BSD License [see coco/license.txt]
+    def __init__(self, cocoGt=None, cocoDt=None, iouType='segm'):
+        '''
+        Initialize CocoEval using coco APIs for gt and dt
+        :param cocoGt: coco object with ground truth annotations
+        :param cocoDt: coco object with detection results
+        :return: None
+        '''
+        if not iouType:
+            print('iouType not specified. use default iouType segm')
+        self.cocoGt   = cocoGt              # ground truth COCO API
+        self.cocoDt   = cocoDt              # detections COCO API
+        self.params   = {}                  # evaluation parameters
+        self.evalImgs = defaultdict(list)   # per-image per-category evaluation results [KxAxI] elements
+        self.eval     = {}                  # accumulated evaluation results
+        self._gts = defaultdict(list)       # gt for evaluation
+        self._dts = defaultdict(list)       # dt for evaluation
+        self.params = Params(iouType=iouType) # parameters
+        self._paramsEval = {}               # parameters for evaluation
+        self.stats = []                     # result summarization
+        self.ious = {}                      # ious between all gts and dts
+        if not cocoGt is None:
+            self.params.imgIds = sorted(cocoGt.getImgIds())
+            self.params.catIds = sorted(cocoGt.getCatIds())
+    def _prepare(self):
+        '''
+        Prepare ._gts and ._dts for evaluation based on params
+        :return: None
+        '''
+        def _toMask(anns, coco):
+            # modify ann['segmentation'] by reference
+            for ann in anns:
+                rle = coco.annToRLE(ann)
+                ann['segmentation'] = rle
+        p = self.params
+        if p.useCats:
+            gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
+            dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
+        else:
+            gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
+            dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
+        # convert ground truth to mask if iouType == 'segm'
+        if p.iouType == 'segm':
+            _toMask(gts, self.cocoGt)
+            _toMask(dts, self.cocoDt)
+        # set ignore flag
+        for gt in gts:
+            gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0
+            gt['ignore'] = 'iscrowd' in gt and gt['iscrowd']
+            if p.iouType == 'keypoints':
+                gt['ignore'] = (gt['num_keypoints'] == 0) or gt['ignore']
+        self._gts = defaultdict(list)       # gt for evaluation
+        self._dts = defaultdict(list)       # dt for evaluation
+        for gt in gts:
+            self._gts[gt['image_id'], gt['category_id']].append(gt)
+        for dt in dts:
+            self._dts[dt['image_id'], dt['category_id']].append(dt)
+        self.evalImgs = defaultdict(list)   # per-image per-category evaluation results
+        self.eval     = {}                  # accumulated evaluation results
+    def evaluate(self):
+        '''
+        Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
+        :return: None
+        '''
+        tic = time.time()
+        print('Running per image evaluation...')
+        p = self.params
+        # add backward compatibility if useSegm is specified in params
+        if not p.useSegm is None:
+            p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
+            print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
+        print('Evaluate annotation type *{}*'.format(p.iouType))
+        p.imgIds = list(np.unique(p.imgIds))
+        if p.useCats:
+            p.catIds = list(np.unique(p.catIds))
+        p.maxDets = sorted(p.maxDets)
+        self.params=p
+        self._prepare()
+        # loop through images, area range, max detection number
+        catIds = p.catIds if p.useCats else [-1]
+        if p.iouType == 'segm' or p.iouType == 'bbox':
+            computeIoU = self.computeIoU
+        elif p.iouType == 'keypoints':
+            computeIoU = self.computeOks
+        self.ious = {(imgId, catId): computeIoU(imgId, catId) \
+                        for imgId in p.imgIds
+                        for catId in catIds}
+        evaluateImg = self.evaluateImg
+        maxDet = p.maxDets[-1]
+        self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet)
+                 for catId in catIds
+                 for areaRng in p.areaRng
+                 for imgId in p.imgIds
+             ]
+        self._paramsEval = copy.deepcopy(self.params)
+        toc = time.time()
+        print('DONE (t={:0.2f}s).'.format(toc-tic))
+    def computeIoU(self, imgId, catId):
+        p = self.params
+        if p.useCats:
+            gt = self._gts[imgId,catId]
+            dt = self._dts[imgId,catId]
+        else:
+            gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
+            dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
+        if len(gt) == 0 and len(dt) ==0:
+            return []
+        inds = np.argsort([-d['score'] for d in dt], kind='mergesort')
+        dt = [dt[i] for i in inds]
+        if len(dt) > p.maxDets[-1]:
+            dt=dt[0:p.maxDets[-1]]
+        if p.iouType == 'segm':
+            g = [g['segmentation'] for g in gt]
+            d = [d['segmentation'] for d in dt]
+        elif p.iouType == 'bbox':
+            g = [g['bbox'] for g in gt]
+            d = [d['bbox'] for d in dt]
+        else:
+            raise Exception('unknown iouType for iou computation')
+        # compute iou between each dt and gt region
+        iscrowd = [int(o['iscrowd']) for o in gt]
+        ious = maskUtils.iou(d,g,iscrowd)
+        return ious
+    def computeOks(self, imgId, catId):
+        p = self.params
+        # dimention here should be Nxm
+        gts = self._gts[imgId, catId]
+        dts = self._dts[imgId, catId]
+        inds = np.argsort([-d['score'] for d in dts], kind='mergesort')
+        dts = [dts[i] for i in inds]
+        if len(dts) > p.maxDets[-1]:
+            dts = dts[0:p.maxDets[-1]]
+        # if len(gts) == 0 and len(dts) == 0:
+        if len(gts) == 0 or len(dts) == 0:
+            return []
+        ious = np.zeros((len(dts), len(gts)))
+        sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0
+        vars = (sigmas * 2)**2
+        k = len(sigmas)
+        # compute oks between each detection and ground truth object
+        for j, gt in enumerate(gts):
+            # create bounds for ignore regions(double the gt bbox)
+            g = np.array(gt['keypoints'])
+            xg = g[0::3]; yg = g[1::3]; vg = g[2::3]
+            k1 = np.count_nonzero(vg > 0)
+            bb = gt['bbox']
+            x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2
+            y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2
+            for i, dt in enumerate(dts):
+                d = np.array(dt['keypoints'])
+                xd = d[0::3]; yd = d[1::3]
+                if k1>0:
+                    # measure the per-keypoint distance if keypoints visible
+                    dx = xd - xg
+                    dy = yd - yg
+                else:
+                    # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
+                    z = np.zeros((k))
+                    dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0)
+                    dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0)
+                e = (dx**2 + dy**2) / vars / (gt['area']+np.spacing(1)) / 2
+                if k1 > 0:
+                    e=e[vg > 0]
+                ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
+        return ious
+    def evaluateImg(self, imgId, catId, aRng, maxDet):
+        '''
+        perform evaluation for single category and image
+        :return: dict (single image results)
+        '''
+        p = self.params
+        if p.useCats:
+            gt = self._gts[imgId,catId]
+            dt = self._dts[imgId,catId]
+        else:
+            gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
+            dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
+        if len(gt) == 0 and len(dt) ==0:
+            return None
+        for g in gt:
+            if g['ignore'] or (g['area']<aRng[0] or g['area']>aRng[1]):
+                g['_ignore'] = 1
+            else:
+                g['_ignore'] = 0
+        # sort dt highest score first, sort gt ignore last
+        gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort')
+        gt = [gt[i] for i in gtind]
+        dtind = np.argsort([-d['score'] for d in dt], kind='mergesort')
+        dt = [dt[i] for i in dtind[0:maxDet]]
+        iscrowd = [int(o['iscrowd']) for o in gt]
+        # load computed ious
+        ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId]
+        T = len(p.iouThrs)
+        G = len(gt)
+        D = len(dt)
+        gtm  = np.zeros((T,G))
+        dtm  = np.zeros((T,D))
+        gtIg = np.array([g['_ignore'] for g in gt])
+        dtIg = np.zeros((T,D))
+        if not len(ious)==0:
+            for tind, t in enumerate(p.iouThrs):
+                for dind, d in enumerate(dt):
+                    # information about best match so far (m=-1 -> unmatched)
+                    iou = min([t,1-1e-10])
+                    m   = -1
+                    for gind, g in enumerate(gt):
+                        # if this gt already matched, and not a crowd, continue
+                        if gtm[tind,gind]>0 and not iscrowd[gind]:
+                            continue
+                        # if dt matched to reg gt, and on ignore gt, stop
+                        if m>-1 and gtIg[m]==0 and gtIg[gind]==1:
+                            break
+                        # continue to next gt unless better match made
+                        if ious[dind,gind] < iou:
+                            continue
+                        # if match successful and best so far, store appropriately
+                        iou=ious[dind,gind]
+                        m=gind
+                    # if match made store id of match for both dt and gt
+                    if m ==-1:
+                        continue
+                    dtIg[tind,dind] = gtIg[m]
+                    dtm[tind,dind]  = gt[m]['id']
+                    gtm[tind,m]     = d['id']
+        # set unmatched detections outside of area range to ignore
+        a = np.array([d['area']<aRng[0] or d['area']>aRng[1] for d in dt]).reshape((1, len(dt)))
+        dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0)))
+        # store results for given image and category
+        return {
+                'image_id':     imgId,
+                'category_id':  catId,
+                'aRng':         aRng,
+                'maxDet':       maxDet,
+                'dtIds':        [d['id'] for d in dt],
+                'gtIds':        [g['id'] for g in gt],
+                'dtMatches':    dtm,
+                'gtMatches':    gtm,
+                'dtScores':     [d['score'] for d in dt],
+                'gtIgnore':     gtIg,
+                'dtIgnore':     dtIg,
+            }
+    def accumulate(self, p = None):
+        '''
+        Accumulate per image evaluation results and store the result in self.eval
+        :param p: input params for evaluation
+        :return: None
+        '''
+        print('Accumulating evaluation results...')
+        tic = time.time()
+        if not self.evalImgs:
+            print('Please run evaluate() first')
+        # allows input customized parameters
+        if p is None:
+            p = self.params
+        p.catIds = p.catIds if p.useCats == 1 else [-1]
+        T           = len(p.iouThrs)
+        R           = len(p.recThrs)
+        K           = len(p.catIds) if p.useCats else 1
+        A           = len(p.areaRng)
+        M           = len(p.maxDets)
+        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
+        recall      = -np.ones((T,K,A,M))
+        scores      = -np.ones((T,R,K,A,M))
+        # create dictionary for future indexing
+        _pe = self._paramsEval
+        catIds = _pe.catIds if _pe.useCats else [-1]
+        setK = set(catIds)
+        setA = set(map(tuple, _pe.areaRng))
+        setM = set(_pe.maxDets)
+        setI = set(_pe.imgIds)
+        # get inds to evaluate
+        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
+        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
+        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
+        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
+        I0 = len(_pe.imgIds)
+        A0 = len(_pe.areaRng)
+        # retrieve E at each category, area range, and max number of detections
+        for k, k0 in enumerate(k_list):
+            Nk = k0*A0*I0
+            for a, a0 in enumerate(a_list):
+                Na = a0*I0
+                for m, maxDet in enumerate(m_list):
+                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
+                    E = [e for e in E if not e is None]
+                    if len(E) == 0:
+                        continue
+                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
+                    # different sorting method generates slightly different results.
+                    # mergesort is used to be consistent as Matlab implementation.
+                    inds = np.argsort(-dtScores, kind='mergesort')
+                    dtScoresSorted = dtScores[inds]
+                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
+                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
+                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
+                    npig = np.count_nonzero(gtIg==0 )
+                    if npig == 0:
+                        continue
+                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
+                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
+                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
+                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
+                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+                        tp = np.array(tp)
+                        fp = np.array(fp)
+                        nd = len(tp)
+                        rc = tp / npig
+                        pr = tp / (fp+tp+np.spacing(1))
+                        fn = npig - tp
+                        tn = nd - tp - fp - fn
+                        q  = np.zeros((R,))
+                        ss = np.zeros((R,))
+                        if nd:
+                            recall[t,k,a,m] = rc[-1]
+                        else:
+                            recall[t,k,a,m] = 0
+                        # numpy is slow without cython optimization for accessing elements
+                        # use python array gets significant speed improvement
+                        pr = pr.tolist(); q = q.tolist()
+                        for i in range(nd-1, 0, -1):
+                            if pr[i] > pr[i-1]:
+                                pr[i-1] = pr[i]
+                        inds = np.searchsorted(rc, p.recThrs, side='left')
+                        try:
+                            for ri, pi in enumerate(inds):
+                                q[ri] = pr[pi]
+                                ss[ri] = dtScoresSorted[pi]
+                        except:
+                            pass
+                        precision[t,:,k,a,m] = np.array(q)
+                        scores[t,:,k,a,m] = np.array(ss)
+        self.eval = {
+            'params': p,
+            'counts': [T, R, K, A, M],
+            'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            'precision': precision,
+            'recall':   recall,
+            'scores': scores,
+        }
+        toc = time.time()
+        print('DONE (t={:0.2f}s).'.format( toc-tic))
+    def summarize(self):
+        '''
+        Compute and display summary metrics for evaluation results.
+        Note this functin can *only* be applied on the default parameter setting
+        '''
+        def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ):
+            p = self.params
+            iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
+            titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
+            typeStr = '(AP)' if ap==1 else '(AR)'
+            iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
+                if iouThr is None else '{:0.2f}'.format(iouThr)
+            aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
+            mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
+            if ap == 1:
+                # dimension of precision: [TxRxKxAxM]
+                s = self.eval['precision']
+                # IoU
+                if iouThr is not None:
+                    t = np.where(iouThr == p.iouThrs)[0]
+                    s = s[t]
+                s = s[:,:,:,aind,mind]
+            else:
+                # dimension of recall: [TxKxAxM]
+                s = self.eval['recall']
+                if iouThr is not None:
+                    t = np.where(iouThr == p.iouThrs)[0]
+                    s = s[t]
+                s = s[:,:,aind,mind]
+            if len(s[s>-1])==0:
+                mean_s = -1
+            else:
+                mean_s = np.mean(s[s>-1])
+            print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s))
+            return mean_s
+        def _summarizeDets():
+            stats = np.zeros((12,))
+            stats[0] = _summarize(1)
+            stats[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
+            stats[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])
+            stats[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])
+            stats[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])
+            stats[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])
+            stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
+            stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
+            stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
+            stats[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])
+            stats[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])
+            stats[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])
+            return stats
+        def _summarizeKps():
+            stats = np.zeros((10,))
+            stats[0] = _summarize(1, maxDets=20)
+            stats[1] = _summarize(1, maxDets=20, iouThr=.5)
+            stats[2] = _summarize(1, maxDets=20, iouThr=.75)
+            stats[3] = _summarize(1, maxDets=20, areaRng='medium')
+            stats[4] = _summarize(1, maxDets=20, areaRng='large')
+            stats[5] = _summarize(0, maxDets=20)
+            stats[6] = _summarize(0, maxDets=20, iouThr=.5)
+            stats[7] = _summarize(0, maxDets=20, iouThr=.75)
+            stats[8] = _summarize(0, maxDets=20, areaRng='medium')
+            stats[9] = _summarize(0, maxDets=20, areaRng='large')
+            return stats
+        if not self.eval:
+            raise Exception('Please run accumulate() first')
+        iouType = self.params.iouType
+        if iouType == 'segm' or iouType == 'bbox':
+            summarize = _summarizeDets
+        elif iouType == 'keypoints':
+            summarize = _summarizeKps
+        self.stats = summarize()
+    def prs(self):
+        def _summarize(iouThr=None, areaRng='all', maxDets=100):
+            p = self.params
+            iStr = '[ IoU={:<9} | area={:>6} | maxDets={:>3} ]'
+            iouStr = '%0.2f:%0.2f' % (p.iouThrs[0], p.iouThrs[-1]) if iouThr is None else '%0.2f' % (iouThr)
+            areaStr = areaRng
+            maxDetsStr = '%d' % (maxDets)
+            aind = [i for i, aRng in enumerate(['all', 'small', 'medium', 'large']) if aRng == areaRng]
+            mind = [i for i, mDet in enumerate([1, 10, 100]) if mDet == maxDets]
+            prec = self.eval['precision']
+            if iouThr is not None:
+                t = np.where(iouThr == p.iouThrs)[0]
+                prec = prec[t]
+            prec = prec[:, :, :, aind, mind]
+            # [iou, rec, cls, 1] -> [rec]
+            prec = prec.mean(0).mean(1).flatten()
+            return iStr.format(iouStr, areaStr, maxDetsStr), prec
+        if not self.eval:
+            raise Exception('Please run accumulate() first')
+        prs = []
+        prs.append(_summarize())  # 0.5:0.95, all
+        prs.append(_summarize(iouThr=.5))  # 0.5, all
+        prs.append(_summarize(iouThr=.75))  # 0.75, all
+        prs.append(_summarize(areaRng='small'))  # 0.5:0.95, small
+        prs.append(_summarize(iouThr=.5, areaRng='small'))  # 0.5, small
+        prs.append(_summarize(iouThr=.75, areaRng='small'))  # 0.75, small
+        prs.append(_summarize(areaRng='medium'))  # 0.5:0.95, medium
+        prs.append(_summarize(iouThr=.5, areaRng='medium'))  # 0.5, medium
+        prs.append(_summarize(iouThr=.75, areaRng='medium'))  # 0.75, medium
+        prs.append(_summarize(areaRng='large'))  # 0.5:0.95, large
+        prs.append(_summarize(iouThr=.5, areaRng='large'))  # 0.5, large
+        prs.append(_summarize(iouThr=.75, areaRng='large'))  # 0.75, large
+        return dict(prs)
+    def __str__(self):
+        self.summarize()
+class Params:
+    '''
+    Params for coco evaluation api
+    '''
+    def setDetParams(self):
+        self.imgIds = []
+        self.catIds = []
+        # np.arange causes trouble.  the data point on arange is slightly larger than the true value
+        self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True)
+        self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
+        self.maxDets = [1, 10, 100]
+        self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
+        self.areaRngLbl = ['all', 'small', 'medium', 'large']
+        self.useCats = 1
+    def setKpParams(self):
+        self.imgIds = []
+        self.catIds = []
+        # np.arange causes trouble.  the data point on arange is slightly larger than the true value
+        self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True)
+        self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
+        self.maxDets = [20]
+        self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
+        self.areaRngLbl = ['all', 'medium', 'large']
+        self.useCats = 1
+    def __init__(self, iouType='segm'):
+        if iouType == 'segm' or iouType == 'bbox':
+            self.setDetParams()
+        elif iouType == 'keypoints':
+            self.setKpParams()
+        else:
+            raise Exception('iouType not supported')
+        self.iouType = iouType
+        # useSegm is deprecated
+        self.useSegm = None
\ No newline at end of file
--- a/lib/pycocotools/license.txt
+++ b/lib/pycocotools/license.txt
+Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met: 
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer. 
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution. 
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies, 
+either expressed or implied, of the FreeBSD Project.
--- a/lib/pycocotools/mask.py
+++ b/lib/pycocotools/mask.py
+__author__ = 'tsungyi'
+import lib.pycocotools._mask as _mask
+# Interface for manipulating masks stored in RLE format.
+#
+# RLE is a simple yet efficient format for storing binary masks. RLE
+# first divides a vector (or vectorized image) into a series of piecewise
+# constant regions and then for each piece simply stores the length of
+# that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
+# be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
+# (note that the odd counts are always the numbers of zeros). Instead of
+# storing the counts directly, additional compression is achieved with a
+# variable bitrate representation based on a common scheme called LEB128.
+#
+# Compression is greatest given large piecewise constant regions.
+# Specifically, the size of the RLE is proportional to the number of
+# *boundaries* in M (or for an image the number of boundaries in the y
+# direction). Assuming fairly simple shapes, the RLE representation is
+# O(sqrt(n)) where n is number of pixels in the object. Hence space usage
+# is substantially lower, especially for large simple objects (large n).
+#
+# Many common operations on masks can be computed directly using the RLE
+# (without need for decoding). This includes computations such as area,
+# union, intersection, etc. All of these operations are linear in the
+# size of the RLE, in other words they are O(sqrt(n)) where n is the area
+# of the object. Computing these operations on the original mask is O(n).
+# Thus, using the RLE can result in substantial computational savings.
+#
+# The following API functions are defined:
+#  encode         - Encode binary masks using RLE.
+#  decode         - Decode binary masks encoded via RLE.
+#  merge          - Compute union or intersection of encoded masks.
+#  iou            - Compute intersection over union between masks.
+#  area           - Compute area of encoded masks.
+#  toBbox         - Get bounding boxes surrounding encoded masks.
+#  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
+#
+# Usage:
+#  Rs     = encode( masks )
+#  masks  = decode( Rs )
+#  R      = merge( Rs, intersect=false )
+#  o      = iou( dt, gt, iscrowd )
+#  a      = area( Rs )
+#  bbs    = toBbox( Rs )
+#  Rs     = frPyObjects( [pyObjects], h, w )
+#
+# In the API the following formats are used:
+#  Rs      - [dict] Run-length encoding of binary masks
+#  R       - dict Run-length encoding of binary mask
+#  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
+#  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
+#  bbs     - [nx4] Bounding box(es) stored as [x y w h]
+#  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
+#  dt,gt   - May be either bounding boxes or encoded masks
+# Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
+#
+# Finally, a note about the intersection over union (iou) computation.
+# The standard iou of a ground truth (gt) and detected (dt) object is
+#  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
+# For "crowd" regions, we use a modified criteria. If a gt object is
+# marked as "iscrowd", we allow a dt to match any subregion of the gt.
+# Choosing gt' in the crowd gt that best matches the dt can be done using
+# gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
+#  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
+# For crowd gt regions we use this modified criteria above for the iou.
+#
+# To compile run "python setup.py build_ext --inplace"
+# Please do not contact us for help with compiling.
+#
+# Microsoft COCO Toolbox.      version 2.0
+# Data, paper, and tutorials available at:  http://mscoco.org/
+# Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
+# Licensed under the Simplified BSD License [see coco/license.txt]
+encode      = _mask.encode
+decode      = _mask.decode
+iou         = _mask.iou
+merge       = _mask.merge
+area        = _mask.area
+toBbox      = _mask.toBbox
+frPyObjects = _mask.frPyObjects
\ No newline at end of file
--- a/lib/pycocotools/maskApi.c
+++ b/lib/pycocotools/maskApi.c
+/**************************************************************************
+* Microsoft COCO Toolbox.      version 2.0
+* Data, paper, and tutorials available at:  http://mscoco.org/
+* Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
+* Licensed under the Simplified BSD License [see coco/license.txt]
+**************************************************************************/
+#include "maskApi.h"
+#include <math.h>
+#include <stdlib.h>
+uint umin( uint a, uint b ) { return (a<b) ? a : b; }
+uint umax( uint a, uint b ) { return (a>b) ? a : b; }
+void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
+  R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m);
+  if(cnts) for(siz j=0; j<m; j++) R->cnts[j]=cnts[j];
+}
+void rleFree( RLE *R ) {
+  free(R->cnts); R->cnts=0;
+}
+void rlesInit( RLE **R, siz n ) {
+  *R = (RLE*) malloc(sizeof(RLE)*n);
+  for(siz i=0; i<n; i++) rleInit((*R)+i,0,0,0,0);
+}
+void rlesFree( RLE **R, siz n ) {
+  for(siz i=0; i<n; i++) rleFree((*R)+i); free(*R); *R=0;
+}
+void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) {
+  siz i, j, k, a=w*h; uint c, *cnts; byte p;
+  cnts = malloc(sizeof(uint)*(a+1));
+  for(i=0; i<n; i++) {
+    const byte *T=M+a*i; k=0; p=0; c=0;
+    for(j=0; j<a; j++) { if(T[j]!=p) { cnts[k++]=c; c=0; p=T[j]; } c++; }
+    cnts[k++]=c; rleInit(R+i,h,w,k,cnts);
+  }
+  free(cnts);
+}
+void rleDecode( const RLE *R, byte *M, siz n ) {
+  for( siz i=0; i<n; i++ ) {
+    byte v=0; for( siz j=0; j<R[i].m; j++ ) {
+      for( siz k=0; k<R[i].cnts[j]; k++ ) *(M++)=v; v=!v; }}
+}
+void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ) {
+  uint *cnts, c, ca, cb, cc, ct; bool v, va, vb, vp;
+  siz i, a, b, h=R[0].h, w=R[0].w, m=R[0].m; RLE A, B;
+  if(n==0) { rleInit(M,0,0,0,0); return; }
+  if(n==1) { rleInit(M,h,w,m,R[0].cnts); return; }
+  cnts = malloc(sizeof(uint)*(h*w+1));
+  for( a=0; a<m; a++ ) cnts[a]=R[0].cnts[a];
+  for( i=1; i<n; i++ ) {
+    B=R[i]; if(B.h!=h||B.w!=w) { h=w=m=0; break; }
+    rleInit(&A,h,w,m,cnts); ca=A.cnts[0]; cb=B.cnts[0];
+    v=va=vb=0; m=0; a=b=1; cc=0; ct=1;
+    while( ct>0 ) {
+      c=umin(ca,cb); cc+=c; ct=0;
+      ca-=c; if(!ca && a<A.m) { ca=A.cnts[a++]; va=!va; } ct+=ca;
+      cb-=c; if(!cb && b<B.m) { cb=B.cnts[b++]; vb=!vb; } ct+=cb;
+      vp=v; if(intersect) v=va&&vb; else v=va||vb;
+      if( v!=vp||ct==0 ) { cnts[m++]=cc; cc=0; }
+    }
+    rleFree(&A);
+  }
+  rleInit(M,h,w,m,cnts); free(cnts);
+}
+void rleArea( const RLE *R, siz n, uint *a ) {
+  for( siz i=0; i<n; i++ ) {
+    a[i]=0; for( siz j=1; j<R[i].m; j+=2 ) a[i]+=R[i].cnts[j]; }
+}
+void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
+  siz g, d; BB db, gb; bool crowd;
+  db=malloc(sizeof(double)*m*4); rleToBbox(dt,db,m);
+  gb=malloc(sizeof(double)*n*4); rleToBbox(gt,gb,n);
+  bbIou(db,gb,m,n,iscrowd,o); free(db); free(gb);
+  for( g=0; g<n; g++ ) for( d=0; d<m; d++ ) if(o[g*m+d]>0) {
+    crowd=iscrowd!=NULL && iscrowd[g];
+    if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
+    siz ka, kb, a, b; uint c, ca, cb, ct, i, u; bool va, vb;
+    ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
+    cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
+    while( ct>0 ) {
+      c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
+      ca-=c; if(!ca && a<ka) { ca=dt[d].cnts[a++]; va=!va; } ct+=ca;
+      cb-=c; if(!cb && b<kb) { cb=gt[g].cnts[b++]; vb=!vb; } ct+=cb;
+    }
+    if(i==0) u=1; else if(crowd) rleArea(dt+d,1,&u);
+    o[g*m+d] = (double)i/(double)u;
+  }
+}
+void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) {
+  double h, w, i, u, ga, da; siz g, d; bool crowd;
+  for( g=0; g<n; g++ ) {
+    BB G=gt+g*4; ga=G[2]*G[3]; crowd=iscrowd!=NULL && iscrowd[g];
+    for( d=0; d<m; d++ ) {
+      BB D=dt+d*4; da=D[2]*D[3]; o[g*m+d]=0;
+      w=fmin(D[2]+D[0],G[2]+G[0])-fmax(D[0],G[0]); if(w<=0) continue;
+      h=fmin(D[3]+D[1],G[3]+G[1])-fmax(D[1],G[1]); if(h<=0) continue;
+      i=w*h; u = crowd ? da : da+ga-i; o[g*m+d]=i/u;
+    }
+  }
+}
+void rleToBbox( const RLE *R, BB bb, siz n ) {
+  for( siz i=0; i<n; i++ ) {
+    uint h, w, x, y, xs, ys, xe, ye, cc, t; siz j, m;
+    h=(uint)R[i].h; w=(uint)R[i].w; m=R[i].m;
+    m=((siz)(m/2))*2; xs=w; ys=h; xe=ye=0; cc=0;
+    if(m==0) { bb[4*i+0]=bb[4*i+1]=bb[4*i+2]=bb[4*i+3]=0; continue; }
+    for( j=0; j<m; j++ ) {
+      cc+=R[i].cnts[j]; t=cc-j%2; y=t%h; x=(t-y)/h;
+      xs=umin(xs,x); xe=umax(xe,x); ys=umin(ys,y); ye=umax(ye,y);
+    }
+    bb[4*i+0]=xs; bb[4*i+2]=xe-xs+1;
+    bb[4*i+1]=ys; bb[4*i+3]=ye-ys+1;
+  }
+}
+void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) {
+  for( siz i=0; i<n; i++ ) {
+    double xs=bb[4*i+0], xe=xs+bb[4*i+2];
+    double ys=bb[4*i+1], ye=ys+bb[4*i+3];
+    double xy[8] = {xs,ys,xs,ye,xe,ye,xe,ys};
+    rleFrPoly( R+i, xy, 4, h, w );
+  }
+}
+int uintCompare(const void *a, const void *b) {
+  uint c=*((uint*)a), d=*((uint*)b); return c>d?1:c<d?-1:0;
+}
+void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) {
+  // upsample and get discrete points densely along entire boundary
+  siz j, m=0; double scale=5; int *x, *y, *u, *v; uint *a, *b;
+  x=malloc(sizeof(int)*(k+1)); y=malloc(sizeof(int)*(k+1));
+  for(j=0; j<k; j++) x[j]=(int)(scale*xy[j*2+0]+.5); x[k]=x[0];
+  for(j=0; j<k; j++) y[j]=(int)(scale*xy[j*2+1]+.5); y[k]=y[0];
+  for(j=0; j<k; j++) m+=umax(abs(x[j]-x[j+1]),abs(y[j]-y[j+1]))+1;
+  u=malloc(sizeof(int)*m); v=malloc(sizeof(int)*m); m=0;
+  for( j=0; j<k; j++ ) {
+    int xs=x[j], xe=x[j+1], ys=y[j], ye=y[j+1], dx, dy, t;
+    bool flip; double s; dx=abs(xe-xs); dy=abs(ys-ye);
+    flip = (dx>=dy && xs>xe) || (dx<dy && ys>ye);
+    if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; }
+    s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy;
+    if(dx>=dy) for( int d=0; d<=dx; d++ ) {
+      t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++;
+    } else for( int d=0; d<=dy; d++ ) {
+      t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++;
+    }
+  }
+  // get points along y-boundary and downsample
+  free(x); free(y); k=m; m=0; double xd, yd;
+  x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k);
+  for( j=1; j<k; j++ ) if(u[j]!=u[j-1]) {
+    xd=(double)(u[j]<u[j-1]?u[j]:u[j]-1); xd=(xd+.5)/scale-.5;
+    if( floor(xd)!=xd || xd<0 || xd>w-1 ) continue;
+    yd=(double)(v[j]<v[j-1]?v[j]:v[j-1]); yd=(yd+.5)/scale-.5;
+    if(yd<0) yd=0; else if(yd>h) yd=h; yd=ceil(yd);
+    x[m]=(int) xd; y[m]=(int) yd; m++;
+  }
+  // compute rle encoding given y-boundary points
+  k=m; a=malloc(sizeof(uint)*(k+1));
+  for( j=0; j<k; j++ ) a[j]=(uint)(x[j]*(int)(h)+y[j]);
+  a[k++]=(uint)(h*w); free(u); free(v); free(x); free(y);
+  qsort(a,k,sizeof(uint),uintCompare); uint p=0;
+  for( j=0; j<k; j++ ) { uint t=a[j]; a[j]-=p; p=t; }
+  b=malloc(sizeof(uint)*k); j=m=0; b[m++]=a[j++];
+  while(j<k) if(a[j]>0) b[m++]=a[j++]; else {
+    j++; if(j<k) b[m-1]+=a[j++]; }
+  rleInit(R,h,w,m,b); free(a); free(b);
+}
+char* rleToString( const RLE *R ) {
+  // Similar to LEB128 but using 6 bits/char and ascii chars 48-111.
+  siz i, m=R->m, p=0; long x; bool more;
+  char *s=malloc(sizeof(char)*m*6);
+  for( i=0; i<m; i++ ) {
+    x=(long) R->cnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1;
+    while( more ) {
+      char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0;
+      if(more) c |= 0x20; c+=48; s[p++]=c;
+    }
+  }
+  s[p]=0; return s;
+}
+void rleFrString( RLE *R, char *s, siz h, siz w ) {
+  siz m=0, p=0, k; long x; bool more; uint *cnts;
+  while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0;
+  while( s[p] ) {
+    x=0; k=0; more=1;
+    while( more ) {
+      char c=s[p]-48; x |= (c & 0x1f) << 5*k;
+      more = c & 0x20; p++; k++;
+      if(!more && (c & 0x10)) x |= -1 << 5*k;
+    }
+    if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x;
+  }
+  rleInit(R,h,w,m,cnts); free(cnts);
+}
--- a/lib/pycocotools/maskApi.h
+++ b/lib/pycocotools/maskApi.h
+/**************************************************************************
+* Microsoft COCO Toolbox.      version 2.0
+* Data, paper, and tutorials available at:  http://mscoco.org/
+* Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
+* Licensed under the Simplified BSD License [see coco/license.txt]
+**************************************************************************/
+#pragma once
+#include <stdbool.h>
+typedef unsigned int uint;
+typedef unsigned long siz;
+typedef unsigned char byte;
+typedef double* BB;
+typedef struct { siz h, w, m; uint *cnts; } RLE;
+// Initialize/destroy RLE.
+void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
+void rleFree( RLE *R );
+// Initialize/destroy RLE array.
+void rlesInit( RLE **R, siz n );
+void rlesFree( RLE **R, siz n );
+// Encode binary masks using RLE.
+void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
+// Decode binary masks encoded via RLE.
+void rleDecode( const RLE *R, byte *mask, siz n );
+// Compute union or intersection of encoded masks.
+void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );
+// Compute area of encoded masks.
+void rleArea( const RLE *R, siz n, uint *a );
+// Compute intersection over union between masks.
+void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
+// Compute intersection over union between bounding boxes.
+void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
+// Get bounding boxes surrounding encoded masks.
+void rleToBbox( const RLE *R, BB bb, siz n );
+// Convert bounding boxes to encoded masks.
+void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
+// Convert polygon to encoded mask.
+void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
+// Get compressed string representation of encoded mask.
+char* rleToString( const RLE *R );
+// Convert from compressed string representation of encoded mask.
+void rleFrString( RLE *R, char *s, siz h, siz w );
--- a/lib/pycocotools/mask_utils.py
+++ b/lib/pycocotools/mask_utils.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+from lib.pycocotools.mask import encode as encode_masks, \
+    decode as decode_masks, frPyObjects
+def decode_rle(R):
+    N = len(R['counts'])
+    M = np.zeros( (R['size'][0]*R['size'][1], ), dtype=np.uint8)
+    n = 0
+    val = 1
+    for pos in range(N):
+        val = not val
+        for c in range(R['counts'][pos]):
+            R['counts'][pos]
+            M[n] = val
+            n += 1
+    return M.reshape((R['size']), order='F')
+def mask_poly2im(polys, im_height, im_width):
+    return frPyObjects(polys, im_height, im_width)
+def mask_coco2im(coco_masks, im_height, im_width):
+    im_masks = []
+    for i, ann in enumerate(coco_masks):
+        if isinstance(ann, list):
+            m = mask_poly2im(ann, im_height, im_width)
+        elif isinstance(ann, np.ndarray):
+            m = ann.astype(np.uint8)
+        else:
+            raise TypeError('Unknown type of mask: {}'.format(type(ann)))
+        im_masks.append(m)
+    return im_masks
+def mask_rle2im(rle_masks, im_height, im_width):
+    coco_masks = [{'counts': rle, 'size': [im_height, im_width]} for rle in rle_masks]
+    coco_masks = decode_masks(coco_masks)
+    coco_masks = coco_masks.transpose((2, 0, 1))
+    return mask_coco2im(coco_masks, im_height, im_width)
+def mask_bin2rle(bin_masks):
+    rle_masks = []
+    for bin_mask in bin_masks:
+        if bin_mask is None:
+            rle_mask = ''
+        else:
+            rle_mask = encode_masks(np.array(np.stack([bin_mask], axis=2), order='F'))[0]['counts']
+        rle_masks.append(rle_mask)
+    return rle_masks
+def mask_poly2rle(segmentations, im_height, im_width):
+    masks = []
+    for polys in segmentations:
+        mask = mask_poly2im(polys, im_height, im_width)
+        masks.append(mask[0]['counts'])
+    return masks
\ No newline at end of file
--- a/lib/retinanet/__init__.py
+++ b/lib/retinanet/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from lib.faster_rcnn.layers.data_layer import DataLayer
+from lib.retinanet.layers.anchor_target_layer import AnchorTargetLayer
+from lib.retinanet.layers.proposal_layer import ProposalLayer
\ No newline at end of file
--- a/lib/retinanet/layers/__init__.py
+++ b/lib/retinanet/layers/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
\ No newline at end of file
--- a/lib/retinanet/layers/anchor_target_layer.py
+++ b/lib/retinanet/layers/anchor_target_layer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.utils import logger
+from lib.utils.cython_bbox import bbox_overlaps
+from lib.utils.blob import to_tensor
+from lib.utils.bbox_transform import bbox_transform
+from lib.faster_rcnn.generate_anchors import generate_anchors_v2
+class AnchorTargetLayer(torch.nn.Module):
+    """Assign anchors to ground-truth targets."""
+    def __init__(self):
+        super(AnchorTargetLayer, self).__init__()
+        # Load the basic configs
+        k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
+        scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
+        anchor_scale = cfg.RETINANET.ANCHOR_SCALE
+        self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
+        self.ratios = cfg.RETINANET.ASPECT_RATIOS
+        # Generate base anchors
+        self.base_anchors = []
+        for stride in self.strides:
+            sizes = [stride * anchor_scale *
+                (2 ** (octave / float(scales_per_octave)))
+                     for octave in range(scales_per_octave)]
+            self.base_anchors.append(generate_anchors_v2(
+                stride=stride, ratios=self.ratios, sizes=sizes))
+    def forward(self, features, gt_boxes, ims_info):
+        """Produces anchor classification labels and bounding-box regression targets."""
+        num_images = cfg.TRAIN.IMS_PER_BATCH
+        gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
+        if len(gt_boxes_wide) != num_images:
+            logger.fatal('Input {} images, got {} slices of gt boxes.' \
+                .format(num_images, len(gt_boxes_wide)))
+        # Generate proposals from shifted anchors
+        all_anchors = []; total_anchors = 0
+        for i in range(len(self.strides)):
+            height, width = features[i].shape[-2:]
+            shift_x = np.arange(0, width) * self.strides[i]
+            shift_y = np.arange(0, height) * self.strides[i]
+            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                                shift_x.ravel(), shift_y.ravel())).transpose()
+            # Add A anchors (1, A, 4) to
+            # cell K shifts (K, 1, 4) to get
+            # shift anchors (K, A, 4)
+            # Reshape to (K * A, 4) shifted anchors
+            A = self.base_anchors[i].shape[0]
+            K = shifts.shape[0]
+            anchors = (self.base_anchors[i].reshape((1, A, 4)) +
+                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
+            # [K, A, 4] -> [A, K, 4]
+            anchors = anchors.transpose((1, 0, 2))
+            anchors = anchors.reshape((A * K, 4))
+            all_anchors.append(anchors)
+            total_anchors += anchors.shape[0]
+        all_anchors = np.concatenate(all_anchors, axis=0)
+        # label: 1 is positive, 0 is negative, -1 is don't care
+        labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
+        bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
+        bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
+        bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
+        anchors = all_anchors
+        inds_inside = np.arange(all_anchors.shape[0])
+        num_inside = len(inds_inside)
+        for ix in range(num_images):
+            # GT boxes (x1, y1, x2, y2, label)
+            gt_boxes = gt_boxes_wide[ix]
+            # label: 1 is positive, 0 is negative, -1 is don't care
+            labels = np.empty((num_inside,), dtype=np.float32)
+            labels.fill(-1)
+            # Overlaps between the anchors and the gt boxes
+            overlaps = bbox_overlaps(
+                np.ascontiguousarray(anchors, dtype=np.float),
+                np.ascontiguousarray(gt_boxes, dtype=np.float))
+            argmax_overlaps = overlaps.argmax(axis=1)
+            max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
+            # fg label: for each gt, anchor with highest overlap
+            gt_argmax_overlaps = overlaps.argmax(axis=0)
+            gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
+            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
+            gt_inds = argmax_overlaps[gt_argmax_overlaps]
+            labels[gt_argmax_overlaps] = gt_boxes[gt_inds, 4]
+            # fg label: above threshold IOU
+            inds = max_overlaps >= cfg.RETINANET.POSITIVE_OVERLAP
+            gt_inds = argmax_overlaps[inds]
+            labels[inds] = gt_boxes[gt_inds, 4]
+            fg_inds = np.where(labels > 0)[0]
+            # bg label: below threshold IOU
+            labels[max_overlaps < cfg.RETINANET.NEGATIVE_OVERLAP] = 0
+            bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_targets[fg_inds, :] = bbox_transform(
+                anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], :4])
+            bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_inside_weights[fg_inds, :] = np.array((1.0, 1.0, 1.0, 1.0))
+            bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
+            bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1.0)
+            labels_wide[ix, inds_inside] = labels
+            bbox_targets_wide[ix, inds_inside] = bbox_targets
+            bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
+            bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
+        labels = labels_wide.reshape((num_images, total_anchors))
+        bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
+        bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
+        bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
+        return {
+            'labels': to_tensor(labels),
+            'bbox_targets': to_tensor(bbox_targets),
+            'bbox_inside_weights': to_tensor(bbox_inside_weights),
+            'bbox_outside_weights': to_tensor(bbox_outside_weights),
+        }
+def _dismantle_gt_boxes(gt_boxes, num_images):
+    return [
+        gt_boxes[
+            np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
+        ] for ix in range(num_images)
+    ]
\ No newline at end of file
--- a/lib/retinanet/layers/proposal_layer.py
+++ b/lib/retinanet/layers/proposal_layer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+import numpy as np
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.utils import logger
+from lib.utils.bbox_transform import bbox_transform_inv
+from lib.faster_rcnn.generate_anchors import generate_anchors_v2
+class ProposalLayer(torch.nn.Module):
+    """Outputs object detection proposals by applying estimated bounding-box.
+    transformations to a set of regular boxes (called "anchors").
+    """
+    def __init__(self):
+        super(ProposalLayer, self).__init__()
+        # Load the basic configs
+        k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
+        scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
+        anchor_scale = cfg.RETINANET.ANCHOR_SCALE
+        self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
+        self.ratios = cfg.RETINANET.ASPECT_RATIOS
+        # Generate base anchors
+        self.base_anchors = []
+        for stride in self.strides:
+            sizes = [stride * anchor_scale *
+                (2 ** (octave / float(scales_per_octave)))
+                     for octave in range(scales_per_octave)]
+            self.base_anchors.append(generate_anchors_v2(
+                stride=stride, ratios=self.ratios, sizes=sizes))
+    def forward(self, features, cls_prob, bbox_pred, ims_info):
+        # Get resources
+        num_images = ims_info.shape[0]
+        cls_prob, bbox_pred = cls_prob.numpy(True), bbox_pred.numpy(True)
+        lvl_info = [features[i].shape[-2:] for i in range(len(self.strides))]
+        if cls_prob.shape[0] != num_images or \
+                bbox_pred.shape[0] != num_images:
+            logger.fatal('Incorrect num of images: {}'.format(num_images))
+        # Prepare for the outputs
+        batch_probs = cls_prob
+        batch_deltas = bbox_pred.transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
+        batch_detections = []
+        # Extract Detections separately
+        for ix in range(num_images):
+            im_scale = ims_info[ix, 2]
+            if cfg.RETINANET.SOFTMAX: P = batch_probs[ix, 1:, :]
+            else: P = batch_probs[ix] # [num_classes - 1, n]
+            D = batch_deltas[ix] # [n, 4]
+            anchor_pos = 0
+            for lvl, (H, W) in enumerate(lvl_info):
+                A, K = self.base_anchors[lvl].shape[0], H * W
+                num_anchors = A * K
+                prob = P[:, anchor_pos : anchor_pos + num_anchors]
+                deltas = D[anchor_pos : anchor_pos + num_anchors]
+                anchor_pos += num_anchors
+                prob_ravel = prob.ravel()
+                candidate_inds = np.where(prob_ravel > cfg.TEST.SCORE_THRESH)[0]
+                if len(candidate_inds) == 0: continue
+                pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
+                inds = np.argpartition(
+                    prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:]
+                inds = candidate_inds[inds]
+                prob_4d = prob.reshape((prob.shape[0], A, H, W))
+                inds_2d = np.array(np.unravel_index(inds, prob.shape)).transpose()
+                inds_4d = np.array(np.unravel_index(inds, prob_4d.shape)).transpose()
+                classes, anchor_ids = inds_2d[:, 0], inds_2d[:, 1]
+                a, y, x = inds_4d[:, 1], inds_4d[:, 2], inds_4d[:, 3]
+                scores = prob[classes, anchor_ids]
+                deltas = deltas[anchor_ids]
+                anchors = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
+                anchors = (anchors * self.strides[lvl]) + self.base_anchors[lvl][a, :]
+                pred_boxes = bbox_transform_inv(anchors, deltas)
+                pred_boxes /= im_scale
+                # {im_idx, x1, y1, x2, y2, score, cls}
+                detections = np.zeros((pred_boxes.shape[0], 7), dtype=np.float32)
+                detections[:, 0], detections[:, 1:5] = ix, pred_boxes
+                detections[:, 5], detections[:, 6] = scores, classes + 1
+                batch_detections.append(detections)
+        # Merge Detections into a blob
+        batch_detections = np.vstack(batch_detections) \
+            if len(batch_detections) > 0 else \
+                np.zeros((1, 7), dtype=np.float32)
+        return batch_detections
\ No newline at end of file
--- a/lib/retinanet/test.py
+++ b/lib/retinanet/test.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+try:
+    import cPickle
+except:
+    import pickle as cPickle
+import numpy as np
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.utils.image import scale_image
+from lib.utils.bbox_transform import clip_boxes
+from lib.nms.nms_wrapper import nms, soft_nms
+from lib.utils.timer import Timer
+from lib.utils.blob import im_list_to_blob
+from lib.utils.vis import vis_one_image
+def im_detect(detector, raw_image):
+    """Detect a image, with single or multiple scales."""
+    # Prepare images
+    ims, ims_scale = scale_image(raw_image)
+    # Prepare blobs
+    blobs = {'data': im_list_to_blob(ims)}
+    blobs['ims_info'] = np.array([
+        list(blobs['data'].shape[1:3]) + [im_scale]
+            for im_scale in ims_scale], dtype=np.float32)
+    blobs['data'] = torch.from_numpy(blobs['data']).cuda(cfg.GPU_ID)
+    # Do Forward
+    with torch.no_grad():
+        outputs = detector.forward(inputs=blobs)
+    # Decode results
+    results = outputs['detections']
+    detections_wide = []
+    for im_idx in range(len(ims)):
+        indices = np.where(results[:, 0].astype(np.int32) == im_idx)[0]
+        detections = results[indices, 1:]
+        detections[:, :4] = clip_boxes(detections[:, :4], raw_image.shape)
+        detections_wide.append(detections)
+    return np.vstack(detections_wide) \
+        if len(detections_wide) > 1 else detections_wide[0]
+def ims_detect(net, raw_images):
+    """Detect images, with single or multiple scales.
+    """
+    # Prepare images
+    ims, ims_scale = scale_image(raw_images[0])
+    ims_shape = [im.shape for im in raw_images]
+    for item_idx in range(1, len(raw_images)):
+        ims_ext, ims_scale_ext = scale_image(raw_images[item_idx])
+        ims += ims_ext; ims_scale += ims_scale_ext
+    # Prepare blobs
+    blobs = {'data': im_list_to_blob(ims)}
+    blobs['ims_info'] = np.array([
+        list(blobs['data'].shape[2:4]) + [im_scale]
+            for im_scale in ims_scale], dtype=np.float32)
+    # Do Forward
+    net.forward(**blobs)()
+    # Decode results
+    results = net.blobs['detections'].data.get_value()
+    detections_wide = [[] for _ in range(len(ims_shape))]
+    for i in range(len(ims)):
+        j = i % len(ims_shape)
+        indices = np.where(results[:, 0].astype(np.int32) == i)[0]
+        detections = results[indices, 1:]
+        detections[:, :4] = clip_boxes(detections[:, :4], ims_shape[j])
+        detections_wide[j].append(detections)
+    for j in range(len(ims_shape)):
+        detections_wide[j] = np.vstack(detections_wide[j]) \
+        if len(detections_wide[j]) > 1 else detections_wide[j][0]
+    return detections_wide
+def test_net(net, server):
+    classes, num_images, num_classes = \
+        server.classes, server.num_images, server.num_classes
+    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
+    _t = {'im_detect' : Timer(), 'misc' : Timer()}
+    for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
+        # Collect raw images and ground-truths
+        image_ids, raw_images = [], []
+        for item_idx in range(cfg.TEST.IMS_PER_BATCH):
+            if batch_idx + item_idx >= num_images: continue
+            image_id, raw_image = server.get_image()
+            image_ids.append(image_id)
+            raw_images.append(raw_image)
+        # Run detecting on specific scales
+        _t['im_detect'].tic()
+        if cfg.TEST.IMS_PER_BATCH > 1:
+            results = ims_detect(net, raw_images)
+        else:
+            results = [im_detect(net, raw_images[0])]
+        _t['im_detect'].toc()
+        # Post-Processing
+        _t['misc'].tic()
+        for item_idx, detections in enumerate(results):
+            i = batch_idx + item_idx
+            boxes_this_image = [[]]
+            # {x1, y1, x2, y2, score, cls}
+            detections = np.array(detections)
+            for j in range(1, num_classes):
+                cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0]
+                cls_boxes = detections[cls_indices, 0:4]
+                cls_scores = detections[cls_indices, 4]
+                cls_dets = np.hstack((
+                    cls_boxes, cls_scores[:, np.newaxis])).\
+                        astype(np.float32, copy=False)
+                if cfg.TEST.USE_SOFT_NMS:
+                    keep = soft_nms(cls_dets, cfg.TEST.NMS,
+                        method=cfg.TEST.SOFT_NMS_METHOD,
+                        sigma=cfg.TEST.SOFT_NMS_SIGMA)
+                else: keep = nms(cls_dets, cfg.TEST.NMS)
+                cls_dets = cls_dets[keep, :]
+                all_boxes[j][i] = cls_dets
+                boxes_this_image.append(cls_dets)
+            if cfg.VIS or cfg.VIS_ON_FILE:
+                vis_one_image(raw_images[item_idx], classes, boxes_this_image,
+                    thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
+                        filename=server.get_save_filename(image_ids[item_idx]))
+            # Limit to max_per_image detections *over all classes*
+            if cfg.TEST.DETECTIONS_PER_IM > 0:
+                image_scores = []
+                for j in range(1, num_classes):
+                    if len(all_boxes[j][i]) < 1: continue
+                    image_scores.append(all_boxes[j][i][:, -1])
+                if len(image_scores) > 0: image_scores = np.hstack(image_scores)
+                if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
+                    image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
+                    for j in range(1, num_classes):
+                        keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
+                        all_boxes[j][i] = all_boxes[j][i][keep, :]
+        _t['misc'].toc()
+        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
+              .format(batch_idx + cfg.TEST.IMS_PER_BATCH,
+                    num_images, _t['im_detect'].average_time,
+                        _t['misc'].average_time), end='')
+    print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
+    print('Evaluating detections')
+    server.evaluate_detections(all_boxes)
\ No newline at end of file
--- a/lib/ssd/__init__.py
+++ b/lib/ssd/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from lib.ssd.layers.data_layer import DataLayer
+from lib.ssd.layers.prior_box_layer import PriorBoxLayer
+from lib.ssd.layers.multibox_match_layer import MultiBoxMatchLayer
+from lib.ssd.layers.hard_mining_layer import HardMiningLayer
+from lib.ssd.layers.multibox_target_layer import MultiBoxTargetLayer
\ No newline at end of file
--- a/lib/ssd/data/__init__.py
+++ b/lib/ssd/data/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
\ No newline at end of file
--- a/lib/ssd/data/blob_fetcher.py
+++ b/lib/ssd/data/blob_fetcher.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+from multiprocessing import Process
+from lib.core.config import cfg
+class BlobFetcher(Process):
+    def __init__(self, **kwargs):
+        super(BlobFetcher, self).__init__()
+        self.Q_in = self.Q_out = None
+        self.daemon = True
+    def get(self):
+        num_images = cfg.TRAIN.IMS_PER_BATCH
+        target_h = cfg.SSD.RESIZE.HEIGHT; target_w = cfg.SSD.RESIZE.WIDTH
+        ims_blob = np.zeros(shape=(num_images, target_h, target_w, 3), dtype=np.uint8)
+        gt_boxes_wide = []
+        for ix in range(cfg.TRAIN.IMS_PER_BATCH):
+            im, gt_boxes = self.Q_in.get()
+            ims_blob[ix, :, :, :] = im
+            # Encode boxes by adding the idx of images
+            im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), dtype=np.float32)
+            im_boxes[:, 0:gt_boxes.shape[1]] = gt_boxes
+            im_boxes[:, -1] = ix
+            gt_boxes_wide.append(im_boxes)
+        return {'data': ims_blob, 'gt_boxes': np.concatenate(gt_boxes_wide, axis=0)}
+    def run(self):
+        while True: self.Q_out.put(self.get())
\ No newline at end of file
--- a/lib/ssd/data/data_batch.py
+++ b/lib/ssd/data/data_batch.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import time
+import pprint
+from multiprocessing import Queue
+import dragon.core.mpi as mpi
+import lib.utils.logger as logger
+from lib.faster_rcnn.data.data_reader import DataReader
+from lib.ssd.data.data_transformer import DataTransformer
+from lib.ssd.data.blob_fetcher import BlobFetcher
+class DataBatch(object):
+    """DataBatch aims to prefetch data by ``Triple-Buffering``.
+    It takes full advantages of the Process/Thread of Python,
+    which provides remarkable I/O speed up for scalable distributed training.
+    """
+    def __init__(self, **kwargs):
+        """Construct a ``DataBatch``.
+        Parameters
+        ----------
+        source : str
+            The path of database.
+        multiple_nodes: boolean
+            Whether to split data for multiple parallel nodes. Default is ``False``.
+        shuffle : boolean
+            Whether to shuffle the data. Default is ``False``.
+        num_chunks : int
+            The number of chunks to split. Default is ``2048``.
+        chunk_size : int
+            The size(MB) of each chunk. Default is -1 (Refer ``num_chunks``).
+        batch_size : int
+            The size of a training batch.
+        partition : boolean
+            Whether to partition batch. Default is ``False``.
+        prefetch : int
+            The prefetch count. Default is ``5``.
+        """
+        super(DataBatch, self).__init__()
+        # Init mpi
+        global_rank = 0; local_rank = 0; group_size = 1
+        if mpi.Is_Init():
+            idx, group = mpi.AllowParallel()
+            if idx != -1:  # DataParallel
+                global_rank = mpi.Rank()
+                group_size = len(group)
+                for i, node in enumerate(group):
+                    if global_rank == node: local_rank = i
+        kwargs['group_size'] = group_size
+        # Configuration
+        self._prefetch = kwargs.get('prefetch', 5)
+        self._num_readers = kwargs.get( 'num_readers', 1)
+        self._num_transformers = kwargs.get('num_transformers', -1)
+        self._max_transformers = kwargs.get('max_transformers', 3)
+        self._num_fetchers = kwargs.get('num_fetchers', 1)
+        # Io-Aware Policy
+        if self._num_transformers == -1:
+            self._num_transformers = 3
+            # Add 1 transformer for color augmentation
+            if kwargs.get('color_augmentation', False):
+                self._num_transformers += 1
+            # Add 1 transformer for random scale
+            if kwargs.get('max_random_scale', 1.0) - \
+                    kwargs.get('min_random_scale', 1.0) != 0:
+                self._num_transformers += 1
+        self._num_transformers = min(self._num_transformers, self._max_transformers)
+        self._batch_size = kwargs.get('batch_size', 100)
+        self._partition = kwargs.get('partition', False)
+        if self._partition:
+            self._batch_size = int(self._batch_size / kwargs['group_size'])
+        # Init queues
+        self.Q_level_1 = Queue(self._prefetch * self._num_readers * self._batch_size)
+        self.Q_level_2 = Queue(self._prefetch * self._num_readers * self._batch_size)
+        self.Q_level_3 = Queue(self._prefetch * self._num_readers)
+        # Init readers
+        self._readers = []
+        for i in range(self._num_readers):
+            self._readers.append(DataReader(**kwargs))
+            self._readers[-1].Q_out = self.Q_level_1
+        for i in range(self._num_readers):
+            num_parts = self._num_readers
+            part_idx = i
+            if self._readers[i]._multiple_nodes or \
+                    self._readers[i]._use_shuffle:
+                num_parts *= group_size
+                part_idx += local_rank * self._num_readers
+            self._readers[i]._num_parts = num_parts
+            self._readers[i]._part_idx = part_idx
+            self._readers[i]._random_seed += part_idx
+            self._readers[i].start()
+            time.sleep(0.1)
+        # Init transformers
+        self._transformers = []
+        for i in range(self._num_transformers):
+            transformer = DataTransformer(**kwargs)
+            transformer._random_seed += (i + local_rank * self._num_transformers)
+            transformer.Q_in = self.Q_level_1
+            transformer.Q_out = self.Q_level_2
+            transformer.start()
+            self._transformers.append(transformer)
+            time.sleep(0.1)
+        # Init blob fetchers
+        self._fetchers = []
+        for i in range(self._num_fetchers):
+            fetcher = BlobFetcher(**kwargs)
+            fetcher.Q_in = self.Q_level_2
+            fetcher.Q_out = self.Q_level_3
+            fetcher.start()
+            self._fetchers.append(fetcher)
+            time.sleep(0.1)
+        # Prevent to echo multiple nodes
+        if local_rank == 0: self.echo()
+        def cleanup():
+            def terminate(processes):
+                for process in processes:
+                    process.terminate()
+                    process.join()
+            terminate(self._fetchers)
+            logger.info('Terminating BlobFetcher ......')
+            terminate(self._transformers)
+            logger.info('Terminating DataTransformer ......')
+            terminate(self._readers)
+            logger.info('Terminating DataReader......')
+        import atexit
+        atexit.register(cleanup)
+    def get(self):
+        """Get a batch.
+        Returns
+        -------
+        dict
+            The batch dict.
+        """
+        return self.Q_level_3.get()
+    def echo(self):
+        """Print I/O Information.
+        Returns
+        -------
+        None
+        """
+        print('---------------------------------------------------------')
+        print('BatchFetcher({} Threads), Using config:'.format(
+            self._num_readers + self._num_transformers + self._num_fetchers))
+        params = {'queue_size': self._prefetch,
+                  'n_readers': self._num_readers,
+                  'n_transformers': self._num_transformers,
+                  'n_fetchers': self._num_fetchers}
+        pprint.pprint(params)
+        print('---------------------------------------------------------')
\ No newline at end of file
--- a/lib/ssd/data/data_transformer.py
+++ b/lib/ssd/data/data_transformer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import cv2
+import numpy as np
+import numpy.random as npr
+from multiprocessing import Process
+from lib.core.config import cfg
+from lib.proto import anno_pb2 as pb
+from lib.ssd.data.preprocessing import *
+import lib.utils.logger as logger
+class DataTransformer(Process):
+    def __init__(self, **kwargs):
+        super(DataTransformer, self).__init__()
+        self._distorter = Distortor()
+        self._expander = Expander()
+        self._sampler = Sampler(cfg.SSD.SAMPLERS)
+        self._resizer = Resizer()
+        self._random_seed = cfg.RNG_SEED
+        self._mirror = cfg.TRAIN.USE_FLIPPED
+        self._use_diff = cfg.TRAIN.USE_DIFF
+        self._classes = kwargs.get('classes', ('__background__',))
+        self._num_classes = len(self._classes)
+        self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
+        self._queues = []
+        self.Q_in = self.Q_out = None
+        self.daemon = True
+    def make_roidb(self, ann_datum, flip=False):
+        annotations = ann_datum.annotation
+        n_objects = 0
+        if not self._use_diff:
+            for ann in annotations:
+                if not ann.difficult: n_objects += 1
+        else: n_objects = len(annotations)
+        roidb = {
+            'width': ann_datum.datum.width,
+            'height': ann_datum.datum.height,
+            'gt_classes': np.zeros((n_objects,), dtype=np.int32),
+            'boxes': np.zeros((n_objects, 4), dtype=np.float32),
+            'normalized_boxes': np.zeros((n_objects, 4), dtype=np.float32),
+        }
+        ix = 0
+        for ann in annotations:
+            if not self._use_diff and ann.difficult: continue
+            roidb['boxes'][ix, :] = [
+                max(0, ann.x1), max(0, ann.y1),
+                min(ann.x2, ann_datum.datum.width - 1),
+                min(ann.y2, ann_datum.datum.height - 1)]
+            roidb['gt_classes'][ix] = self._class_to_ind[ann.name]
+            ix += 1
+        if flip: roidb['boxes'] = _flip_boxes(roidb['boxes'], roidb['width'])
+        roidb['normalized_boxes'][:, 0::2] = roidb['boxes'][:, 0::2] / float(roidb['width'])
+        roidb['normalized_boxes'][:, 1::2] = roidb['boxes'][:, 1::2] / float(roidb['height'])
+        return roidb
+    def get(self, serialized):
+        ann_datum = pb.AnnotatedDatum()
+        ann_datum.ParseFromString(serialized)
+        im_datum = ann_datum.datum
+        im = np.fromstring(im_datum.data, np.uint8)
+        if im_datum.encoded is True: im = cv2.imdecode(im, -1)
+        else: im = im.reshape((im_datum.height, im_datum.width, im_datum.channels))
+        # Flip
+        flip = False
+        if self._mirror:
+            if npr.randint(0, 2) > 0:
+                im = im[:, ::-1, :]
+                flip = True
+        # Datum -> RoIDB
+        roidb = self.make_roidb(ann_datum, flip)
+        # Post-Process for gt boxes
+        # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
+        gt_boxes = np.empty((len(roidb['gt_classes']), 5), dtype=np.float32)
+        gt_boxes[:, 0:4] = roidb['normalized_boxes']
+        gt_boxes[:, 4] = roidb['gt_classes']
+        # Distort => Expand => Sample => Resize
+        im = self._distorter.distort_image(im)
+        im, gt_boxes = self._expander.expand_image(im, gt_boxes)
+        im, gt_boxes = self._sampler.sample_image(im, gt_boxes)
+        im = self._resizer.resize_image(im)
+        # Modify gt boxes to the blob scale
+        gt_boxes[:, 0] *= cfg.SSD.RESIZE.WIDTH
+        gt_boxes[:, 1] *= cfg.SSD.RESIZE.HEIGHT
+        gt_boxes[:, 2] *= cfg.SSD.RESIZE.WIDTH
+        gt_boxes[:, 3] *= cfg.SSD.RESIZE.HEIGHT
+        return im, gt_boxes
+    def run(self):
+        npr.seed(self._random_seed)
+        while True:
+            serialized = self.Q_in.get()
+            im, gt_boxes = self.get(serialized)
+            if len(gt_boxes) < 1: continue
+            self.Q_out.put((im, gt_boxes))
+def _flip_boxes(boxes, width):
+    flip_boxes = boxes.copy()
+    oldx1 = boxes[:, 0].copy()
+    oldx2 = boxes[:, 2].copy()
+    flip_boxes[:, 0] = width - oldx2 - 1
+    flip_boxes[:, 2] = width - oldx1 - 1
+    if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
+        logger.fatal('Encounter invalid coordinates after flipping boxes.')
+    return flip_boxes
\ No newline at end of file
--- a/lib/ssd/data/preprocessing/__init__.py
+++ b/lib/ssd/data/preprocessing/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from .distort import Distortor
+from .expand import Expander
+from .sample import Sampler
+from .resize import Resizer
\ No newline at end of file
--- a/lib/ssd/data/preprocessing/cat.jpg
+++ b/lib/ssd/data/preprocessing/cat.jpg
--- a/lib/ssd/data/preprocessing/distort.py
+++ b/lib/ssd/data/preprocessing/distort.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import cv2
+import PIL.Image
+import PIL.ImageEnhance
+import numpy as np
+import numpy.random as npr
+from lib.core.config import cfg
+class Distortor(object):
+    def __init__(self):
+        self._brightness_prob = cfg.SSD.DISTORT.BRIGHTNESS_PROB
+        self._brightness_delta = 0.3
+        self._contrast_prob = cfg.SSD.DISTORT.CONTRAST_PROB
+        self._contrast_delta = 0.3
+        self._saturation_prob = cfg.SSD.DISTORT.SATURATION_PROB
+        self._saturation_delta = 0.3
+    def distort_image(self, im):
+        im = PIL.Image.fromarray(im)
+        if npr.uniform() < self._brightness_prob:
+            delta_brightness = npr.uniform(-self._brightness_delta, self._brightness_delta) + 1.0
+            im = PIL.ImageEnhance.Brightness(im)
+            im = im.enhance(delta_brightness)
+        if npr.uniform() < self._contrast_prob:
+            delta_contrast = npr.uniform(-self._contrast_delta, self._contrast_delta) + 1.0
+            im = PIL.ImageEnhance.Contrast(im)
+            im = im.enhance(delta_contrast)
+        if npr.uniform() < self._saturation_prob:
+            delta_saturation = npr.uniform(-self._contrast_delta, self._contrast_delta) + 1.0
+            im = PIL.ImageEnhance.Color(im)
+            im = im.enhance(delta_saturation)
+        im = np.array(im)
+        return im
+if __name__ == '__main__':
+    distortor = Distortor()
+    while True:
+        im = cv2.imread('cat.jpg')
+        im = distortor.distort_image(im)
+        cv2.imshow('Distort', im)
+        cv2.waitKey(0)
\ No newline at end of file
--- a/lib/ssd/data/preprocessing/expand.py
+++ b/lib/ssd/data/preprocessing/expand.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import cv2
+import numpy.random as npr
+import numpy as np
+import math
+from lib.core.config import cfg
+import lib.utils.logger as logger
+class Expander(object):
+    def __init__(self, **params):
+        self._expand_prob = cfg.SSD.EXPAND.PROB
+        self._max_expand_ratio = cfg.SSD.EXPAND.MAX_RATIO
+        if self._max_expand_ratio < 1.0:
+            logger.fatal('The max expand ratio must >= 1.0, got {}'.format(self._max_expand_ratio))
+    def expand_image(self, im, gt_boxes=None):
+        prob = npr.uniform()
+        if prob > self._expand_prob : return im, gt_boxes
+        ratio = npr.uniform(1.0, self._max_expand_ratio)
+        if ratio == 1: return im, gt_boxes
+        im_h = im.shape[0]
+        im_w = im.shape[1]
+        expand_h = int(im_h * ratio)
+        expand_w = int(im_w * ratio)
+        h_off = int(math.floor(npr.uniform(0.0, expand_h - im_h)))
+        w_off = int(math.floor(npr.uniform(0.0, expand_w - im_w)))
+        new_im = np.empty((expand_h, expand_w, 3), dtype=np.uint8)
+        new_im.fill(127)
+        new_im[h_off: h_off + im_h, w_off: w_off + im_w, :] = im
+        if gt_boxes is not None:
+            ex_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
+            ex_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w + w_off) / expand_w
+            ex_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h + h_off) / expand_h
+            ex_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w + w_off) / expand_w
+            ex_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h + h_off) / expand_h
+            return new_im, ex_gt_boxes
+        return new_im, gt_boxes
+if __name__ == '__main__':
+    expander = Expander()
+    while True:
+        im = cv2.imread('cat.jpg')
+        gt_boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
+        im, gt_boxes = expander.expand_image(im, gt_boxes)
+        x1 = int(gt_boxes[0][0] * im.shape[1])
+        y1 = int(gt_boxes[0][1] * im.shape[0])
+        x2 = int(gt_boxes[0][2] * im.shape[1])
+        y2 = int(gt_boxes[0][3] * im.shape[0])
+        cv2.rectangle(im, (x1, y1), (x2, y2), (188,119,64), 2)
+        cv2.imshow('Expand', im)
+        cv2.waitKey(0)
\ No newline at end of file
--- a/lib/ssd/data/preprocessing/resize.py
+++ b/lib/ssd/data/preprocessing/resize.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import PIL.Image
+import numpy as np
+import numpy.random as npr
+from lib.core.config import cfg
+class Resizer(object):
+    def __init__(self):
+        self._re_height = cfg.SSD.RESIZE.HEIGHT
+        self._re_width = cfg.SSD.RESIZE.WIDTH
+        interp_list = {
+            'LINEAR': PIL.Image.BILINEAR,
+            'AREA': PIL.Image.BILINEAR,
+            'NEAREST': PIL.Image.NEAREST,
+            'CUBIC': PIL.Image.CUBIC,
+            'LANCZOS4': PIL.Image.LANCZOS,
+        }
+        interp_mode = cfg.SSD.RESIZE.INTERP_MODE
+        self._interp_mode = [interp_list[key] for key in interp_mode]
+    def resize_image(self, im):
+        rand = npr.randint(0, len(self._interp_mode))
+        im = PIL.Image.fromarray(im)
+        im = im.resize((self._re_width, self._re_height), self._interp_mode[rand])
+        return np.array(im)
\ No newline at end of file
--- a/lib/ssd/data/preprocessing/sample.py
+++ b/lib/ssd/data/preprocessing/sample.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import numpy.random as npr
+from lib.utils.bbox_transform import clip_boxes
+from lib.utils.boxes import iou
+import lib.utils.logger as logger
+class Sampler(object):
+    def __init__(self, samplers):
+        if not isinstance(samplers, list): samplers = [samplers]
+        self._samplers = []
+        for sampler in samplers:
+            if len(sampler) != 8:
+                logger.fatal('The sample params should be a tuple of length 8.')
+            sample_param = {
+                'min_scale': sampler[0],
+                'max_scale': sampler[1],
+                'min_aspect_ratio': sampler[2],
+                'max_aspect_ratio': sampler[3],
+                'min_jaccard_overlap': sampler[4],
+                'max_jaccard_overlap': sampler[5],
+                'max_trials': sampler[6],
+                'max_sample': sampler[7]}
+            self._samplers.append(sample_param)
+    def _compute_overlaps(self, rand_box, gt_boxes):
+        return iou(np.expand_dims(rand_box, 0), gt_boxes[:, 0:4])
+    def _generate_sample(self, sample_param):
+        min_scale = sample_param.get('min_scale', 1.0)
+        max_scale = sample_param.get('max_scale', 1.0)
+        scale = npr.uniform(min_scale, max_scale)
+        min_aspect_ratio = sample_param.get('min_aspect_ratio', 1.0)
+        max_aspect_ratio = sample_param.get('max_aspect_ratio', 1.0)
+        min_aspect_ratio = max(min_aspect_ratio, scale**2)
+        max_aspect_ratio = min(max_aspect_ratio, 1.0 / (scale**2))
+        aspect_ratio = npr.uniform(min_aspect_ratio, max_aspect_ratio)
+        bbox_w = scale * (aspect_ratio ** 0.5)
+        bbox_h = scale / (aspect_ratio ** 0.5)
+        w_off = npr.uniform(0.0, float(1 - bbox_w))
+        h_off = npr.uniform(0.0, float(1 - bbox_h))
+        return np.array([w_off, h_off, w_off + bbox_w, h_off + bbox_h])
+    def _check_satisfy(self, sample_box, gt_boxes, constraint):
+        min_jaccard_overlap = constraint.get('min_jaccard_overlap', None)
+        max_jaccard_overlap = constraint.get('max_jaccard_overlap', None)
+        if min_jaccard_overlap == None and \
+            max_jaccard_overlap == None:
+            return True
+        max_overlap = self._compute_overlaps(sample_box, gt_boxes).max()
+        if min_jaccard_overlap is not None:
+            if max_overlap < min_jaccard_overlap: return False
+        if max_jaccard_overlap is not None:
+            if max_overlap > max_jaccard_overlap: return False
+        return True
+    def _generate_batch_samples(self, gt_boxes):
+        sample_boxes = []
+        for sampler in self._samplers:
+            found = 0
+            for i in range(sampler['max_trials']):
+                if found >= sampler['max_sample']: break
+                sample_box = self._generate_sample(sampler)
+                if sampler['min_jaccard_overlap'] != 0.0 or \
+                    sampler['max_jaccard_overlap'] != 1.0:
+                    ok = self._check_satisfy(sample_box, gt_boxes, sampler)
+                    if not ok: continue
+                found += 1
+                sample_boxes.append(sample_box)
+        return sample_boxes
+    def _rand_crop(self, im, rand_box, gt_boxes=None):
+        im_h = im.shape[0]
+        im_w = im.shape[1]
+        w_off = int(rand_box[0] * im_w)
+        h_off = int(rand_box[1] * im_h)
+        crop_w = int((rand_box[2] - rand_box[0]) * im_w)
+        crop_h = int((rand_box[3] - rand_box[1]) * im_h)
+        new_im = im[h_off: h_off + crop_h, w_off: w_off + crop_w, :]
+        if gt_boxes is not None:
+            ctr_x = (gt_boxes[:, 2] + gt_boxes[:, 0]) / 2.0
+            ctr_y = (gt_boxes[:, 3] + gt_boxes[:, 1]) / 2.0
+            # Keep the ground-truth box whose center is in the sample box
+            # Implement ``EmitConstraint.CENTER`` in the original SSD
+            keep_inds = np.where((ctr_x >= rand_box[0]) & (ctr_x <= rand_box[2])
+                                 & (ctr_y >= rand_box[1]) & (ctr_y <= rand_box[3]))[0]
+            gt_boxes = gt_boxes[keep_inds]
+            new_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
+            new_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w - w_off)
+            new_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h - h_off)
+            new_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w - w_off)
+            new_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h - h_off)
+            new_gt_boxes = clip_boxes(new_gt_boxes, (crop_h, crop_w))
+            new_gt_boxes[:, 0] = new_gt_boxes[:, 0] / crop_w
+            new_gt_boxes[:, 1] = new_gt_boxes[:, 1] / crop_h
+            new_gt_boxes[:, 2] = new_gt_boxes[:, 2] / crop_w
+            new_gt_boxes[:, 3] = new_gt_boxes[:, 3] / crop_h
+            return new_im, new_gt_boxes
+        return new_im, gt_boxes
+    def sample_image(self, im, gt_boxes):
+        sample_boxes = self._generate_batch_samples(gt_boxes)
+        if len(sample_boxes) > 0:
+            # Apply sampling if found at least one valid sample box
+            # Then randomly pick one
+            sample_idx = npr.randint(0, len(sample_boxes))
+            rand_box = sample_boxes[sample_idx]
+            im, gt_boxes = self._rand_crop(im, rand_box, gt_boxes)
+        return im, gt_boxes
\ No newline at end of file
--- a/lib/ssd/data/preprocessing/sample_test.py
+++ b/lib/ssd/data/preprocessing/sample_test.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import cv2
+import numpy as np
+import numpy.random as npr
+npr.seed(3)
+import sys
+sys.path.append('../../')
+from resize import Resizer
+from expand import Expander
+from distort import Distortor
+from sample import Sampler
+from lib.core.config import cfg
+if __name__ == '__main__':
+    distorter = Distortor()
+    expander = Expander()
+    sampler = Sampler(cfg.SSD.SAMPLERS)
+    resizer = Resizer()
+    while True:
+        im = cv2.imread('cat.jpg')
+        gt_boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
+        im = distorter.distort_image(im)
+        im, gt_boxes = expander.expand_image(im, gt_boxes)
+        im, gt_boxes = sampler.sample_image(im, gt_boxes)
+        if len(gt_boxes) < 1: continue
+        im = resizer.resize_image(im)
+        for gt_box in gt_boxes:
+            x1 = int(gt_box[0] * im.shape[1])
+            y1 = int(gt_box[1] * im.shape[0])
+            x2 = int(gt_box[2] * im.shape[1])
+            y2 = int(gt_box[3] * im.shape[0])
+            cv2.rectangle(im, (x1, y1), (x2, y2), (188, 119, 64), 2)
+            print(x1, y1, x2, y2)
+        cv2.imshow('Sample', im)
+        cv2.waitKey(0)
\ No newline at end of file
--- a/lib/ssd/generate_anchors.py
+++ b/lib/ssd/generate_anchors.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+def generate_anchors(min_sizes, max_sizes, ratios):
+    """Generate anchor (reference) windows by enumerating
+    aspect ratios, min_sizes, max_sizes wrt a reference ctr (x, y, w, h).
+    """
+    total_anchors = []
+    for idx, min_size in enumerate(min_sizes):
+        # Note that SSD assume it is a ctr-anchor
+        base_anchor = np.array([0, 0, min_size, min_size])
+        anchors = _ratio_enum(base_anchor, ratios)
+        if len(max_sizes) > 0:
+            max_size = max_sizes[idx]
+            _anchors = anchors[0].reshape((1, 4))
+            _anchors = np.vstack([_anchors, _max_size_enum(
+                base_anchor, min_size, max_size)])
+            anchors = np.vstack([_anchors, anchors[1:]])
+        total_anchors.append(anchors)
+    return np.vstack([total_anchors[i] for i in range(len(total_anchors))])
+def _whctrs(anchor):
+    """Return width, height, x center, and y center for an anchor (window).
+    Note that it is a little different from Faster-RCNN.
+    """
+    w = anchor[2]; h = anchor[3]
+    x_ctr = anchor[0]; y_ctr = anchor[1]
+    return w, h, x_ctr, y_ctr
+def _mkanchors(ws, hs, x_ctr, y_ctr):
+    """Given a vector of widths (ws) and heights (hs) around a center
+    (x_ctr, y_ctr), output a set of anchors (windows).
+    """
+    ws = ws[:, np.newaxis]
+    hs = hs[:, np.newaxis]
+    anchors = np.hstack((x_ctr - 0.5 * (ws),
+                         y_ctr - 0.5 * (hs),
+                         x_ctr + 0.5 * (ws),
+                         y_ctr + 0.5 * (hs)))
+    return anchors
+def _ratio_enum(anchor, ratios):
+    """Enumerate a set of anchors for each aspect ratio wrt an anchor.
+    """
+    w, h, x_ctr, y_ctr = _whctrs(anchor)
+    size = w * h
+    size_ratios = size / ratios
+    hs = np.round(np.sqrt(size_ratios))
+    ws = np.round(hs * ratios)
+    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+    return anchors
+def _max_size_enum(base_anchor, min_size, max_size):
+    """Enumerate a anchor for max_size wrt base_anchor.
+    """
+    w, h, x_ctr, y_ctr = _whctrs(base_anchor)
+    ws = hs = np.sqrt([min_size * max_size])
+    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+    return anchors
+if __name__ == '__main__':
+    print(generate_anchors(min_sizes=[30], max_sizes=[60], ratios=[1, 0.5, 2]))
--- a/lib/ssd/layers/__init__.py
+++ b/lib/ssd/layers/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
\ No newline at end of file
--- a/lib/ssd/layers/data_layer.py
+++ b/lib/ssd/layers/data_layer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import dragon.vm.torch as torch
+from lib.datasets.factory import get_imdb
+from lib.core.config import cfg
+from lib.ssd.data.data_batch import DataBatch
+class DataLayer(torch.nn.Module):
+    def __init__(self):
+        super(DataLayer, self).__init__()
+        database = get_imdb(cfg.TRAIN.DATABASE)
+        self.data_batch = DataBatch(**{
+            'source': database.source,
+            'classes': database.classes,
+            'shuffle': cfg.TRAIN.USE_SHUFFLE,
+            'multiple_nodes': True,
+            'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
+        })
+    def forward(self):
+        # Get a mini-batch from the Queue
+        blobs = self.data_batch.get()
+        # Zero-Copy from numpy
+        blobs['data'] = torch.from_numpy(blobs['data'])
+        # Switch the data to Device
+        blobs['data'].cuda(cfg.GPU_ID)
+        return blobs
\ No newline at end of file
--- a/lib/ssd/layers/hard_mining_layer.py
+++ b/lib/ssd/layers/hard_mining_layer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.utils.blob import to_tensor
+class HardMiningLayer(torch.nn.Module):
+    def __init__(self):
+        super(HardMiningLayer, self).__init__()
+    def forward(self, conf_prob, match_labels, max_overlaps):
+        # Confidence of each matched box
+        conf_prob_wide = conf_prob.numpy(True)
+        # Label of each matched box
+        match_labels_wide = match_labels
+        # Max overlaps between default boxes and gt boxes
+        max_overlaps_wide = max_overlaps
+        # label ``-1`` will be ignored
+        labels_wide = -np.ones(match_labels_wide.shape, dtype=np.int64)
+        for ix in range(match_labels_wide.shape[0]):
+            match_labels = match_labels_wide[ix]
+            max_overlaps = max_overlaps_wide[ix]
+            conf_prob = conf_prob_wide[ix]
+            conf_loss = np.zeros(match_labels.shape, dtype=np.float32)
+            inds = np.where(match_labels >= 0)[0]
+            flt_min = np.finfo(float).eps
+            # Naive softmax cross-entropy
+            conf_loss[inds] = -1.0 * np.log(np.maximum(
+                conf_prob[inds, match_labels[inds]], flt_min))
+            # Filter negatives
+            fg_inds = np.where(match_labels > 0)[0]
+            neg_inds = np.where(match_labels == 0)[0]
+            neg_overlaps = max_overlaps[neg_inds]
+            eligible_neg_inds = np.where(neg_overlaps < cfg.SSD.OHEM.NEG_OVERLAP)[0]
+            sel_inds = neg_inds[eligible_neg_inds]
+            # Do Mining
+            sel_loss = conf_loss[sel_inds]
+            num_pos = len(fg_inds)
+            num_sel = min(int(num_pos * cfg.SSD.OHEM.NEG_POS_RATIO), len(sel_inds))
+            sorted_sel_inds = sel_inds[np.argsort(-sel_loss)]
+            bg_inds = sorted_sel_inds[:num_sel]
+            labels_wide[ix][fg_inds] = match_labels[fg_inds] # Keep fg indices
+            labels_wide[ix][bg_inds] = 0 # Use hard negatives as bg indices
+        # Feed labels to compute cls loss
+        return {'labels': to_tensor(labels_wide)}
\ No newline at end of file
--- a/lib/ssd/layers/multibox_match_layer.py
+++ b/lib/ssd/layers/multibox_match_layer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.utils.cython_bbox import bbox_overlaps
+class MultiBoxMatchLayer(torch.nn.Module):
+    def __init__(self):
+        super(MultiBoxMatchLayer, self).__init__()
+    def forward(self, prior_boxes, gt_boxes):
+        num_images = cfg.TRAIN.IMS_PER_BATCH
+        gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
+        num_priors = len(prior_boxes)
+        # Do matching between prior boxes and gt boxes
+        match_inds_wide = -np.ones((num_images, num_priors), dtype=np.int32)
+        match_labels_wide = np.zeros(match_inds_wide.shape, dtype=np.int64)
+        max_overlaps_wide = np.zeros(match_inds_wide.shape, dtype=np.float32)
+        for ix in range(num_images):
+            # GT boxes (x1, y1, x2, y2, label)
+            gt_boxes = gt_boxes_wide[ix]
+            if gt_boxes.shape[0] == 0: continue
+            # Compute the overlaps between prior boxes and gt boxes
+            overlaps = bbox_overlaps(
+                np.ascontiguousarray(prior_boxes, dtype=np.float),
+                np.ascontiguousarray(gt_boxes, dtype=np.float))
+            argmax_overlaps = overlaps.argmax(axis=1)
+            max_overlaps = overlaps[np.arange(num_priors), argmax_overlaps]
+            max_overlaps_wide[ix] = max_overlaps
+            # Bipartite matching & assignments
+            bipartite_inds = overlaps.argmax(axis=0)
+            class_assignment = gt_boxes[:, 4]
+            match_inds_wide[ix][bipartite_inds] = np.arange(
+                gt_boxes.shape[0], dtype=np.int32)
+            match_labels_wide[ix][bipartite_inds] = class_assignment
+            # Per prediction matching & assignments
+            # Note that SSD match each prior box for only once
+            # We simply implement it by clobbering the assignments matched in bipartite
+            per_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
+            gt_assignment = argmax_overlaps[per_inds]
+            class_assignment = gt_boxes[gt_assignment, 4]
+            match_inds_wide[ix][per_inds] = gt_assignment
+            match_labels_wide[ix][per_inds] = class_assignment
+        return {
+            'match_inds': match_inds_wide,
+            'match_labels': match_labels_wide,
+            'max_overlaps': max_overlaps_wide,
+        }
+def _dismantle_gt_boxes(gt_boxes, num_images):
+    return [
+        gt_boxes[
+            np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
+        ] for ix in range(num_images)
+    ]
\ No newline at end of file
--- a/lib/ssd/layers/multibox_target_layer.py
+++ b/lib/ssd/layers/multibox_target_layer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.utils.blob import to_tensor
+from lib.utils.bbox_transform import bbox_transform
+class MultiBoxTargetLayer(torch.nn.Module):
+    def __init__(self):
+        super(MultiBoxTargetLayer, self).__init__()
+    def forward(self, match_inds, match_labels, prior_boxes, gt_boxes):
+        num_images = cfg.TRAIN.IMS_PER_BATCH
+        # GT assignments between default boxes and gt boxes
+        match_inds_wide = match_inds
+        # Matched labels (After hard mining possibly)
+        match_labels_wide = match_labels
+        num_priors = len(prior_boxes)
+        gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
+        bbox_targets_wide = np.zeros((num_images, num_priors, 4), dtype=np.float32)
+        bbox_inside_weights_wide = np.zeros(bbox_targets_wide.shape, dtype=np.float32)
+        bbox_outside_weights_wide = np.zeros(bbox_targets_wide.shape, dtype=np.float32)
+        # Number of matched boxes(#positive)
+        # We divide it by num of images, as SmoothLLLoss will divide it also
+        n_pos = max(len(np.where(match_labels_wide > 0)[0]), 1)
+        bbox_normalization = n_pos / num_images
+        for ix in range(num_images):
+            gt_boxes = gt_boxes_wide[ix]
+            if gt_boxes.shape[0] == 0: continue
+            # Sample fg-rois(default boxes) & gt-rois(gt boxes)
+            match_inds = match_inds_wide[ix]
+            match_labels = match_labels_wide[ix]
+            ex_inds = np.where(match_labels > 0)[0]
+            ex_rois = prior_boxes[ex_inds]
+            gt_assignment = match_inds[ex_inds]
+            gt_rois = gt_boxes[gt_assignment]
+            # Assign targets & inside weights & outside weights
+            bbox_targets_wide[ix][ex_inds] = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
+            bbox_inside_weights_wide[ix, :] = (1.0, 1.0, 1.0, 1.0)
+            bbox_outside_weights_wide[ix][ex_inds] = 1.0 / bbox_normalization
+        return {
+            'bbox_targets': to_tensor(bbox_targets_wide),
+            'bbox_inside_weights': to_tensor(bbox_inside_weights_wide),
+            'bbox_outside_weights': to_tensor(bbox_outside_weights_wide),
+        }
+def _dismantle_gt_boxes(gt_boxes, num_images):
+    return [
+        gt_boxes[
+            np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
+        ] for ix in range(num_images)
+    ]
\ No newline at end of file
--- a/lib/ssd/layers/prior_box_layer.py
+++ b/lib/ssd/layers/prior_box_layer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.utils import logger
+from lib.ssd.generate_anchors import generate_anchors
+class PriorBoxLayer(torch.nn.Module):
+    """Generate default boxes(anchors)."""
+    def __init__(self):
+        super(PriorBoxLayer, self).__init__()
+        min_sizes = cfg.SSD.MULTIBOX.MIN_SIZES
+        max_sizes = cfg.SSD.MULTIBOX.MAX_SIZES
+        if len(max_sizes) > 0:
+            if len(min_sizes) != len(max_sizes):
+                logger.fatal('Got {} min sizes and {} max sizes.'.format(
+                    len(min_sizes), len(max_sizes)))
+        self.strides = cfg.SSD.MULTIBOX.STRIDES
+        aspect_ratios = cfg.SSD.MULTIBOX.ASPECT_RATIOS
+        self.num_anchors = len(min_sizes) * len(aspect_ratios) + len(max_sizes)
+        self.base_anchors = []
+        for i in range(len(min_sizes)):
+            self.base_anchors.append(
+                generate_anchors(
+                    min_sizes[i] if isinstance(
+                        min_sizes[i], (list, tuple)) else [min_sizes[i]],
+                    max_sizes[i] if isinstance(
+                        max_sizes[i], (list, tuple)) else [max_sizes[i]],
+                    aspect_ratios[i],
+                )
+            )
+    def forward(self, features):
+        all_anchors = []
+        for i in range(len(self.strides)):
+            # 1. Generate base grids
+            height, width = features[i].shape[-2:]
+            shift_x = (np.arange(0, width) + 0.5) * self.strides[i]
+            shift_y = (np.arange(0, height) + 0.5) * self.strides[i]
+            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                                shift_x.ravel(), shift_y.ravel())).transpose()
+            # 2. Apply anchors on base grids
+            # Add A anchors (1, A, 4) to
+            # cell K shifts (K, 1, 4) to get
+            # shift anchors (K, A, 4)
+            # Reshape to (K * A, 4) shifted anchors
+            A = self.base_anchors[i].shape[0]
+            K = shifts.shape[0]  # K = map_h * map_w
+            anchors = (self.base_anchors[i].reshape((1, A, 4)) +
+                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
+            anchors = anchors.reshape((K * A, 4)).astype(np.float32)
+            all_anchors.append(anchors)
+        return np.concatenate(all_anchors, axis=0)
\ No newline at end of file
--- a/lib/ssd/test.py
+++ b/lib/ssd/test.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+try:
+    import cPickle
+except:
+    import pickle as cPickle
+import cv2
+import numpy as np
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.utils.bbox_transform import clip_boxes, bbox_transform_inv
+from lib.nms.nms_wrapper import nms, soft_nms
+from lib.utils.timer import Timer
+from lib.utils.blob import to_array
+from lib.utils.vis import vis_one_image
+def get_images(ims):
+    target_h = cfg.SSD.RESIZE.HEIGHT
+    target_w = cfg.SSD.RESIZE.WIDTH
+    processed_ims = []; im_scales = []
+    for im in ims:
+        im_scales.append((float(target_h) / im.shape[0],
+                          float(target_w) / im.shape[1]))
+        processed_ims.append(cv2.resize(im, (target_w, target_h)))
+    ims_blob = np.array(processed_ims, dtype=np.uint8)
+    return ims_blob, im_scales
+def ims_detect(detector, ims):
+    """Detect images, with the single scale."""
+    # Prepare blobs
+    data, im_scales = get_images(ims)
+    data = torch.from_numpy(data).cuda(cfg.GPU_ID)
+    # Do Forward
+    # Do Forward
+    with torch.no_grad():
+        outputs = detector.forward(inputs={'data': data})
+    # Decode results
+    scores = to_array(outputs['cls_prob'])
+    prior_boxes = to_array(outputs['prior_boxes'])
+    box_deltas = to_array(outputs['bbox_pred'])
+    batch_boxes = []
+    for ix in range(box_deltas.shape[0]):
+        boxes = bbox_transform_inv(prior_boxes, box_deltas[ix], cfg.BBOX_REG_WEIGHTS)
+        boxes[:, 0::2] /= im_scales[ix][1]
+        boxes[:, 1::2] /= im_scales[ix][0]
+        batch_boxes.append(clip_boxes(boxes, ims[ix].shape))
+    return scores, batch_boxes
+def test_net(net, server):
+    classes, num_images, num_classes = \
+        server.classes, server.num_images, server.num_classes
+    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
+    _t = {'im_detect': Timer(), 'misc': Timer()}
+    for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
+        # Collect raw images and ground-truths
+        image_ids, raw_images = [], []
+        for item_idx in range(cfg.TEST.IMS_PER_BATCH):
+            if batch_idx + item_idx >= num_images: continue
+            image_id, raw_image = server.get_image()
+            image_ids.append(image_id)
+            raw_images.append(raw_image)
+        _t['im_detect'].tic()
+        batch_scores, batch_boxes = ims_detect(net, raw_images)
+        _t['im_detect'].toc()
+        _t['misc'].tic()
+        for item_idx in range(len(batch_scores)):
+            i = batch_idx + item_idx
+            scores = batch_scores[item_idx]
+            boxes = batch_boxes[item_idx]
+            boxes_this_image = [[]]
+            for j in range(1, num_classes):
+                inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
+                cls_scores = scores[inds, j]
+                cls_boxes = boxes[inds]
+                pre_nms_inds = np.argsort(-cls_scores)[0 : cfg.TEST.NMS_TOP_K]
+                cls_scores = cls_scores[pre_nms_inds]
+                cls_boxes = cls_boxes[pre_nms_inds]
+                cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
+                    .astype(np.float32, copy=False)
+                if cfg.TEST.USE_SOFT_NMS:
+                    keep = soft_nms(
+                        cls_dets, cfg.TEST.NMS,
+                        method=cfg.TEST.SOFT_NMS_METHOD,
+                        sigma=cfg.TEST.SOFT_NMS_SIGMA)
+                else:
+                    keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True)
+                cls_dets = cls_dets[keep, :]
+                all_boxes[j][i] = cls_dets
+                boxes_this_image.append(cls_dets)
+            if cfg.VIS or cfg.VIS_ON_FILE:
+                vis_one_image(raw_images[item_idx], classes, boxes_this_image,
+                    thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
+                        filename=server.get_save_filename(image_ids[item_idx]))
+            # Limit to max_per_image detections *over all classes*
+            if cfg.TEST.DETECTIONS_PER_IM > 0:
+                image_scores = []
+                for j in range(1, num_classes):
+                    if len(all_boxes[j][i]) < 1: continue
+                    image_scores.append(all_boxes[j][i][:, -1])
+                if len(image_scores) > 0: image_scores = np.hstack(image_scores)
+                if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
+                    image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
+                    for j in range(1, num_classes):
+                        keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
+                        all_boxes[j][i] = all_boxes[j][i][keep, :]
+        _t['misc'].toc()
+        print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
+              .format(batch_idx + cfg.TEST.IMS_PER_BATCH,
+                    num_images, _t['im_detect'].average_time,
+                        _t['misc'].average_time), end='')
+    print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
+    print('Evaluating detections')
+    server.evaluate_detections(all_boxes)
\ No newline at end of file
--- a/lib/utils/__init__.py
+++ b/lib/utils/__init__.py
--- a/lib/utils/attrdict.py
+++ b/lib/utils/attrdict.py
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+"""A simple attribute dictionary used for representing configuration options."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+class AttrDict(dict):
+    IMMUTABLE = '__immutable__'
+    def __init__(self, *args, **kwargs):
+        super(AttrDict, self).__init__(*args, **kwargs)
+        self.__dict__[AttrDict.IMMUTABLE] = False
+    def __getattr__(self, name):
+        if name in self.__dict__:
+            return self.__dict__[name]
+        elif name in self:
+            return self[name]
+        else:
+            raise AttributeError(name)
+    def __setattr__(self, name, value):
+        if not self.__dict__[AttrDict.IMMUTABLE]:
+            if name in self.__dict__:
+                self.__dict__[name] = value
+            else:
+                self[name] = value
+        else:
+            raise AttributeError(
+                'Attempted to set "{}" to "{}", but AttrDict is immutable'.
+                    format(name, value)
+            )
+    def immutable(self, is_immutable):
+        """Set immutability to is_immutable and recursively apply the setting
+        to all nested AttrDicts.
+        """
+        self.__dict__[AttrDict.IMMUTABLE] = is_immutable
+        # Recursively set immutable state
+        for v in self.__dict__.values():
+            if isinstance(v, AttrDict):
+                v.immutable(is_immutable)
+        for v in self.values():
+            if isinstance(v, AttrDict):
+                v.immutable(is_immutable)
+    def is_immutable(self):
+        return self.__dict__[AttrDict.IMMUTABLE]
\ No newline at end of file
--- a/lib/utils/bbox_transform.py
+++ b/lib/utils/bbox_transform.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py>
+#
+# ------------------------------------------------------------
+import numpy as np
+from lib.core.config import cfg
+def bbox_transform(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)):
+    ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
+    ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
+    ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
+    ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
+    gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
+    gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
+    gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
+    gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
+    wx, wy, ww, wh = weights
+    targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
+    targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
+    targets_dw = ww * np.log(gt_widths / ex_widths)
+    targets_dh = wh * np.log(gt_heights / ex_heights)
+    targets = np.vstack(
+        (targets_dx, targets_dy,
+            targets_dw, targets_dh)).transpose()
+    return targets
+def bbox_transform_inv(boxes, deltas, weights=(1.0, 1.0, 1.0, 1.0)):
+    if boxes.shape[0] == 0:
+        return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
+    boxes = boxes.astype(deltas.dtype, copy=False)
+    widths = boxes[:, 2] - boxes[:, 0] + 1.0
+    heights = boxes[:, 3] - boxes[:, 1] + 1.0
+    ctr_x = boxes[:, 0] + 0.5 * widths
+    ctr_y = boxes[:, 1] + 0.5 * heights
+    wx, wy, ww, wh = weights
+    dx = deltas[:, 0::4] / wx
+    dy = deltas[:, 1::4] / wy
+    dw = deltas[:, 2::4] / ww
+    dh = deltas[:, 3::4] / wh
+    if cfg.USE_XFORM_CLIP:
+        dw = np.minimum(dw, cfg.BBOX_XFORM_CLIP)
+        dh = np.minimum(dh, cfg.BBOX_XFORM_CLIP)
+    pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
+    pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
+    pred_w = np.exp(dw) * widths[:, np.newaxis]
+    pred_h = np.exp(dh) * heights[:, np.newaxis]
+    pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
+    pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w # x1
+    pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h # y1
+    pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w # x2
+    pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h # y2
+    return pred_boxes
+def clip_boxes(boxes, im_shape):
+    # x1 >= 0
+    boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
+    # y1 >= 0
+    boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
+    # x2 < im_shape[1]
+    boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
+    # y2 < im_shape[0]
+    boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
+    return boxes
\ No newline at end of file
--- a/lib/utils/blob.py
+++ b/lib/utils/blob.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/utils/blob.py>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.utils.image import resize_image, distort_image
+def im_list_to_blob(ims):
+    """Convert a list of images into a network input.
+    Assume that images are not means subtracted, and with BGR order.
+    """
+    max_shape = np.array([im.shape for im in ims]).max(axis=0)
+    if cfg.MODEL.COARSEST_STRIDE > 0:
+        stride = float(cfg.MODEL.COARSEST_STRIDE)
+        max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride)
+        max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride)
+    unify_shape = (len(ims), max_shape[0], max_shape[1], 3)
+    blob = np.empty(unify_shape, dtype=np.uint8)
+    blob[:] = cfg.PIXEL_MEANS
+    for idx, im in enumerate(ims):
+        blob[idx, 0:im.shape[0], 0:im.shape[1], :] = im
+    return blob
+def mask_list_to_blob(masks):
+    """Convert a list of masks into a network input."""
+    max_shape = np.array([mask.shape[1:] for mask in masks]).max(axis=0)
+    num_masks = np.array([mask.shape[0] for mask in masks]).sum()
+    blob = np.zeros((num_masks, max_shape[0], max_shape[1]), dtype=np.uint8)
+    pos = 0
+    for mask in masks:
+        blob[pos : pos + mask.shape[0],
+             0 : mask.shape[1], 0 : mask.shape[2]] = mask
+        pos += mask.shape[0]
+    return blob
+def prep_im_for_blob(im, target_size, max_size):
+    """Scale an image for use in a blob."""
+    im_shape, jitter = im.shape, 1.
+    if cfg.TRAIN.COLOR_JITTERING:
+        im = distort_image(im)
+    if max_size > 0:
+        # Scale image along the shortest side
+        im_size_min = np.min(im_shape[0:2])
+        im_size_max = np.max(im_shape[0:2])
+        im_scale = float(target_size) / float(im_size_min)
+        # Prevent the biggest axis from being more than MAX_SIZE
+        if np.round(im_scale * im_size_max) > max_size:
+            im_scale = float(max_size) / float(im_size_max)
+    else:
+        # Scale image along the longest side
+        im_size_max = np.max(im_shape[0:2])
+        im_scale = float(target_size) / float(im_size_max)
+    if cfg.TRAIN.SCALE_JITTERING:
+        r = cfg.TRAIN.SCALE_RANGE
+        jitter = r[0] + np.random.rand() * (r[1] - r[0])
+        im_scale *= jitter
+    return resize_image(im, im_scale, im_scale), im_scale, jitter
+def to_tensor(blob, enforce_cpu=False):
+    if isinstance(blob, np.ndarray):
+        # Zero-Copy from numpy
+        cpu_tensor = torch.from_numpy(blob)
+    else:
+        cpu_tensor = blob
+    return cpu_tensor if enforce_cpu else \
+        cpu_tensor.cuda(cfg.GPU_ID)
+def to_array(blob, copy=False):
+    if isinstance(blob, torch.Tensor):
+        # Zero-Copy from numpy
+        array = blob.numpy(True)
+    else:
+        array = blob
+    return array.copy() if copy else array
\ No newline at end of file
--- a/lib/utils/boxes.py
+++ b/lib/utils/boxes.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/ppwwyyxx/tensorpack/blob/master/examples/FasterRCNN/utils/np_box_ops.py>
+#
+# ------------------------------------------------------------
+import numpy as np
+def area(boxes):
+    """Computes area of boxes.
+    Args:
+      boxes: Numpy array with shape [N, 4] holding N boxes
+    Returns:
+      a numpy array with shape [N*1] representing box areas
+    """
+    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+def intersection(boxes1, boxes2):
+    """Compute pairwise intersection areas between boxes.
+    Args:
+      boxes1: a numpy array with shape [N, 4] holding N boxes
+      boxes2: a numpy array with shape [M, 4] holding M boxes
+    Returns:
+      a numpy array with shape [N*M] representing pairwise intersection area
+    """
+    [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
+    [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
+    all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
+    all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
+    intersect_heights = np.maximum(
+        np.zeros(all_pairs_max_ymin.shape),
+        all_pairs_min_ymax - all_pairs_max_ymin)
+    all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
+    all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
+    intersect_widths = np.maximum(
+        np.zeros(all_pairs_max_xmin.shape),
+        all_pairs_min_xmax - all_pairs_max_xmin)
+    return intersect_heights * intersect_widths
+def iou(boxes1, boxes2):
+    """Computes pairwise intersection-over-union between box collections.
+    Args:
+      boxes1: a numpy array with shape [N, 4] holding N boxes.
+      boxes2: a numpy array with shape [M, 4] holding M boxes.
+    Returns:
+      a numpy array with shape [N, M] representing pairwise iou scores.
+    """
+    intersect = intersection(boxes1, boxes2)
+    area1 = area(boxes1)
+    area2 = area(boxes2)
+    union = np.expand_dims(area1, axis=1) + np.expand_dims(
+        area2, axis=0) - intersect
+    return intersect / union
+def ioa1(boxes1, boxes2):
+    """Computes pairwise intersection-over-area between box collections.
+    Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
+    their intersection area over box2's area. Note that ioa is not symmetric,
+    that is, IOA(box1, box2) != IOA(box2, box1).
+    Args:
+      boxes1: a numpy array with shape [N, 4] holding N boxes.
+      boxes2: a numpy array with shape [M, 4] holding N boxes.
+    Returns:
+      a numpy array with shape [N, M] representing pairwise ioa scores.
+    """
+    intersect = intersection(boxes1, boxes2)
+    areas = np.expand_dims(area(boxes1), axis=1)
+    return intersect / areas
+def ioa2(boxes1, boxes2):
+    """Computes pairwise intersection-over-area between box collections.
+    Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
+    their intersection area over box2's area. Note that ioa is not symmetric,
+    that is, IOA(box1, box2) != IOA(box2, box1).
+    Args:
+      boxes1: a numpy array with shape [N, 4] holding N boxes.
+      boxes2: a numpy array with shape [M, 4] holding N boxes.
+    Returns:
+      a numpy array with shape [N, M] representing pairwise ioa scores.
+    """
+    intersect = intersection(boxes1, boxes2)
+    areas = np.expand_dims(area(boxes2), axis=0)
+    return intersect / areas
+def expand_boxes(boxes, scale):
+    """Expand an array of boxes by a given scale.
+    """
+    w_half = (boxes[:, 2] - boxes[:, 0]) * .5
+    h_half = (boxes[:, 3] - boxes[:, 1]) * .5
+    x_c = (boxes[:, 2] + boxes[:, 0]) * .5
+    y_c = (boxes[:, 3] + boxes[:, 1]) * .5
+    w_half *= scale
+    h_half *= scale
+    boxes_exp = np.zeros(boxes.shape)
+    boxes_exp[:, 0] = x_c - w_half
+    boxes_exp[:, 2] = x_c + w_half
+    boxes_exp[:, 1] = y_c - h_half
+    boxes_exp[:, 3] = y_c + h_half
+    return boxes_exp
\ No newline at end of file
--- a/lib/utils/colormap.py
+++ b/lib/utils/colormap.py
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Codes are base on:
+#
+#       <https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/colormap.py>
+#
+##############################################################################
+"""An awesome colormap for really neat visualizations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import numpy as np
+def colormap(rgb=False):
+    color_list = np.array(
+        [
+            0.000, 0.447, 0.741,
+            0.850, 0.325, 0.098,
+            0.929, 0.694, 0.125,
+            0.494, 0.184, 0.556,
+            0.466, 0.674, 0.188,
+            0.301, 0.745, 0.933,
+            0.635, 0.078, 0.184,
+            0.300, 0.300, 0.300,
+            0.600, 0.600, 0.600,
+            1.000, 0.000, 0.000,
+            1.000, 0.500, 0.000,
+            0.749, 0.749, 0.000,
+            0.000, 1.000, 0.000,
+            0.000, 0.000, 1.000,
+            0.667, 0.000, 1.000,
+            0.333, 0.333, 0.000,
+            0.333, 0.667, 0.000,
+            0.333, 1.000, 0.000,
+            0.667, 0.333, 0.000,
+            0.667, 0.667, 0.000,
+            0.667, 1.000, 0.000,
+            1.000, 0.333, 0.000,
+            1.000, 0.667, 0.000,
+            1.000, 1.000, 0.000,
+            0.000, 0.333, 0.500,
+            0.000, 0.667, 0.500,
+            0.000, 1.000, 0.500,
+            0.333, 0.000, 0.500,
+            0.333, 0.333, 0.500,
+            0.333, 0.667, 0.500,
+            0.333, 1.000, 0.500,
+            0.667, 0.000, 0.500,
+            0.667, 0.333, 0.500,
+            0.667, 0.667, 0.500,
+            0.667, 1.000, 0.500,
+            1.000, 0.000, 0.500,
+            1.000, 0.333, 0.500,
+            1.000, 0.667, 0.500,
+            1.000, 1.000, 0.500,
+            0.000, 0.333, 1.000,
+            0.000, 0.667, 1.000,
+            0.000, 1.000, 1.000,
+            0.333, 0.000, 1.000,
+            0.333, 0.333, 1.000,
+            0.333, 0.667, 1.000,
+            0.333, 1.000, 1.000,
+            0.667, 0.000, 1.000,
+            0.667, 0.333, 1.000,
+            0.667, 0.667, 1.000,
+            0.667, 1.000, 1.000,
+            1.000, 0.000, 1.000,
+            1.000, 0.333, 1.000,
+            1.000, 0.667, 1.000,
+            0.167, 0.000, 0.000,
+            0.333, 0.000, 0.000,
+            0.500, 0.000, 0.000,
+            0.667, 0.000, 0.000,
+            0.833, 0.000, 0.000,
+            1.000, 0.000, 0.000,
+            0.000, 0.167, 0.000,
+            0.000, 0.333, 0.000,
+            0.000, 0.500, 0.000,
+            0.000, 0.667, 0.000,
+            0.000, 0.833, 0.000,
+            0.000, 1.000, 0.000,
+            0.000, 0.000, 0.167,
+            0.000, 0.000, 0.333,
+            0.000, 0.000, 0.500,
+            0.000, 0.000, 0.667,
+            0.000, 0.000, 0.833,
+            0.000, 0.000, 1.000,
+            0.000, 0.000, 0.000,
+            0.143, 0.143, 0.143,
+            0.286, 0.286, 0.286,
+            0.429, 0.429, 0.429,
+            0.571, 0.571, 0.571,
+            0.714, 0.714, 0.714,
+            0.857, 0.857, 0.857,
+            1.000, 1.000, 1.000
+        ]
+    ).astype(np.float32)
+    color_list = color_list.reshape((-1, 3)) * 255
+    if not rgb:
+        color_list = color_list[:, ::-1]
+    return color_list
\ No newline at end of file
--- a/lib/utils/image.py
+++ b/lib/utils/image.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import sys
+import cv2
+import numpy as np
+import PIL.Image
+import PIL.ImageEnhance
+from lib.core.config import cfg
+def resize_image(im, fx, fy):
+    im_shape = im.shape
+    im = PIL.Image.fromarray(im)
+    size = (int(np.ceil(im_shape[1] * fx)), int(np.ceil(im_shape[0] * fy)))
+    im = im.resize(size, PIL.Image.BILINEAR)
+    return np.array(im)
+# Faster and robust resizing than OpenCV methods
+def resize_mask(mask, size):
+    mask = PIL.Image.fromarray(mask)
+    return np.array(mask.resize(size, PIL.Image.NEAREST))
+def distort_image(im):
+    im = PIL.Image.fromarray(im)
+    if np.random.uniform() < 0.5:
+        delta_brightness = np.random.uniform(-0.3, 0.3) + 1.0
+        im = PIL.ImageEnhance.Brightness(im)
+        im = im.enhance(delta_brightness)
+    if np.random.uniform() < 0.5:
+        delta_contrast = np.random.uniform(-0.3, 0.3) + 1.0
+        im = PIL.ImageEnhance.Contrast(im)
+        im = im.enhance(delta_contrast)
+    if np.random.uniform() < 0.3:
+        delta_saturation = np.random.uniform(-0.3, 0.3) + 1.0
+        im = PIL.ImageEnhance.Color(im)
+        im = im.enhance(delta_saturation)
+    return np.array(im)
+def scale_image(im):
+    processed_ims, ims_scales = [], []
+    if cfg.TEST.MAX_SIZE > 0:
+        im_size_min = np.min(im.shape[0:2])
+        im_size_max = np.max(im.shape[0:2])
+        for target_size in cfg.TEST.SCALES:
+            im_scale = float(target_size) / float(im_size_min)
+            # Prevent the biggest axis from being more than MAX_SIZE
+            if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
+                im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
+            processed_ims.append(
+                cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
+                    interpolation=cv2.INTER_LINEAR))
+            ims_scales.append(im_scale)
+    else:
+        # Scale image along the longest side
+        im_size_max = np.max(im.shape[0:2])
+        for target_size in cfg.TEST.SCALES:
+            im_scale = float(target_size) / float(im_size_max)
+            processed_ims.append(
+                cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
+                    interpolation=cv2.INTER_LINEAR))
+            ims_scales.append(im_scale)
+    return processed_ims, ims_scales
\ No newline at end of file
--- a/lib/utils/logger.py
+++ b/lib/utils/logger.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#   <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/platform/tf_logging.py>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import inspect
+import sys as _sys
+import logging as _logging
+import threading
+from logging import DEBUG, ERROR, FATAL, INFO, WARN
+_logger = None
+_is_root = True
+_logger_lock = threading.Lock()
+def get_logger():
+    global _logger
+    # Use double-checked locking to avoid taking lock unnecessarily.
+    if _logger:
+        return _logger
+    _logger_lock.acquire()
+    try:
+        if _logger:
+            return _logger
+        logger = _logging.getLogger('detectron')
+        logger.setLevel(INFO)
+        logger.propagate = False
+        if True:
+            # Determine whether we are in an interactive environment
+            _interactive = False
+            try:
+                # This is only defined in interactive shells.
+                if _sys.ps1: _interactive = True
+            except AttributeError:
+                # Even now, we may be in an interactive shell with `python -i`.
+                _interactive = _sys.flags.interactive
+            # If we are in an interactive environment (like Jupyter), set loglevel
+            # to INFO and pipe the output to stdout.
+            if _interactive:
+                logger.setLevel(INFO)
+                _logging_target = _sys.stdout
+            else:
+                _logging_target = _sys.stderr
+            # Add the output handler.
+            _handler = _logging.StreamHandler(_logging_target)
+            _handler.setFormatter(_logging.Formatter('%(levelname)s %(message)s'))
+            logger.addHandler(_handler)
+        _logger = logger
+        return _logger
+    finally:
+        _logger_lock.release()
+def _detailed_msg(msg):
+    file, lineno = inspect.stack()[:3][2][1:3]
+    return "{}:{}] {}".format(os.path.split(file)[-1], lineno, msg)
+def log(level, msg, *args, **kwargs):
+    get_logger().log(level, _detailed_msg(msg), *args, **kwargs)
+def debug(msg, *args, **kwargs):
+    if is_root(): get_logger().debug(_detailed_msg(msg), *args, **kwargs)
+def error(msg, *args, **kwargs):
+    get_logger().error(_detailed_msg(msg), *args, **kwargs)
+    assert 0
+def fatal(msg, *args, **kwargs):
+    get_logger().fatal(_detailed_msg(msg), *args, **kwargs)
+    assert 0
+def info(msg, *args, **kwargs):
+    if is_root(): get_logger().info(_detailed_msg(msg), *args, **kwargs)
+def warn(msg, *args, **kwargs):
+    if is_root(): get_logger().warn(_detailed_msg(msg), *args, **kwargs)
+def warning(msg, *args, **kwargs):
+    if is_root(): get_logger().warning(_detailed_msg(msg), *args, **kwargs)
+def get_verbosity():
+    """Return how much logging output will be produced."""
+    return get_logger().getEffectiveLevel()
+def set_verbosity(v):
+    """Sets the threshold for what messages will be logged."""
+    get_logger().setLevel(v)
+def set_root_logger(is_root=True):
+    global _is_root
+    _is_root = is_root
+def is_root():
+    return _is_root
+_level_names = {
+    FATAL: 'FATAL',
+    ERROR: 'ERROR',
+    WARN: 'WARN',
+    INFO: 'INFO',
+    DEBUG: 'DEBUG',
+}
\ No newline at end of file
--- a/lib/utils/mask_transform.py
+++ b/lib/utils/mask_transform.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/msracver/FCIS/blob/master/lib/mask/mask_transform.py>
+#
+# ------------------------------------------------------------
+import numpy as np
+def intersect_box_mask(ex_box, gt_box, gt_mask):
+    x1 = max(ex_box[0], gt_box[0])
+    y1 = max(ex_box[1], gt_box[1])
+    x2 = min(ex_box[2], gt_box[2])
+    y2 = min(ex_box[3], gt_box[3])
+    if x1 > x2 or y1 > y2: return None
+    w = x2 - x1 + 1
+    h = y2 - y1 + 1
+    ex_starty = y1 - ex_box[1]
+    ex_startx = x1 - ex_box[0]
+    inter_maskb = gt_mask[y1 : y2 + 1 , x1 : x2 + 1]
+    regression_target = np.zeros((ex_box[3] - ex_box[1] + 1, ex_box[2] - ex_box[0] + 1), dtype=np.uint8)
+    regression_target[ex_starty: ex_starty + h, ex_startx: ex_startx + w] = inter_maskb
+    return regression_target
+def mask_overlap(box1, box2, mask1, mask2):
+    x1 = max(box1[0], box2[0])
+    y1 = max(box1[1], box2[1])
+    x2 = min(box1[2], box2[2])
+    y2 = min(box1[3], box2[3])
+    if x1 > x2 or y1 > y2: return 0
+    w = x2 - x1 + 1
+    h = y2 - y1 + 1
+    # Get masks in the intersection part
+    start_ya = y1 - box1[1]
+    start_xa = x1 - box1[0]
+    inter_maska = mask1[start_ya: start_ya + h, start_xa:start_xa + w]
+    start_yb = y1 - box2[1]
+    start_xb = x1 - box2[0]
+    inter_maskb = mask2[start_yb: start_yb + h, start_xb:start_xb + w]
+    assert inter_maska.shape == inter_maskb.shape, (inter_maska.shape, inter_maskb.shape)
+    inter = np.logical_and(inter_maskb, inter_maska).sum()
+    union = mask1.sum() + mask2.sum() - inter
+    if union < 1.0: return 0
+    return float(inter) / float(union)
\ No newline at end of file
--- a/lib/utils/stats.py
+++ b/lib/utils/stats.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/facebookresearch/Detectron/blob/master/lib/utils/logging.py>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from collections import deque
+import numpy as np
+class SmoothedValue(object):
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+    def __init__(self, window_size):
+        self.deque = deque(maxlen=window_size)
+        self.series = []
+        self.total = 0.0
+        self.count = 0
+    def AddValue(self, value):
+        self.deque.append(value)
+        self.series.append(value)
+        self.count += 1
+        self.total += value
+    def GetMedianValue(self):
+        return np.median(self.deque)
+    def GetAverageValue(self):
+        return np.mean(self.deque)
+    def GetGlobalAverageValue(self):
+        return self.total / self.count
\ No newline at end of file
--- a/lib/utils/timer.py
+++ b/lib/utils/timer.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# Codes are based on:
+#
+#      <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/utils/timer.py>
+#
+# ------------------------------------------------------------
+import time
+class Timer(object):
+    """A simple timer."""
+    def __init__(self):
+        self.total_time = 0.
+        self.calls = 0
+        self.start_time = 0.
+        self.diff = 0.
+        self.average_time = 0.
+    def tic(self):
+        # Using time.time instead of time.clock because time time.clock
+        # does not normalize for multithreading
+        self.start_time = time.time()
+    def toc(self, average=True):
+        self.diff = time.time() - self.start_time
+        self.total_time += self.diff
+        self.calls += 1
+        self.average_time = self.total_time / self.calls
+        if average:
+            return self.average_time
+        else:
+            return self.diff
--- a/lib/utils/vis.py
+++ b/lib/utils/vis.py
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Codes are base on:
+#
+#       <https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/vis.py>
+#
+##############################################################################
+"""Detection output visualization module."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import cv2
+import numpy as np
+from lib.utils.colormap import colormap
+from lib.utils.boxes import expand_boxes
+import matplotlib.pyplot as plt
+from matplotlib.patches import Polygon
+plt.rcParams['pdf.fonttype'] = 42  # For editing in Adobe Illustrator
+_GRAY = (218, 227, 218)
+_GREEN = (18, 127, 15)
+_WHITE = (255, 255, 255)
+def kp_connections(keypoints):
+    kp_lines = [
+        [keypoints.index('left_eye'), keypoints.index('right_eye')],
+        [keypoints.index('left_eye'), keypoints.index('nose')],
+        [keypoints.index('right_eye'), keypoints.index('nose')],
+        [keypoints.index('right_eye'), keypoints.index('right_ear')],
+        [keypoints.index('left_eye'), keypoints.index('left_ear')],
+        [keypoints.index('right_shoulder'), keypoints.index('right_elbow')],
+        [keypoints.index('right_elbow'), keypoints.index('right_wrist')],
+        [keypoints.index('left_shoulder'), keypoints.index('left_elbow')],
+        [keypoints.index('left_elbow'), keypoints.index('left_wrist')],
+        [keypoints.index('right_hip'), keypoints.index('right_knee')],
+        [keypoints.index('right_knee'), keypoints.index('right_ankle')],
+        [keypoints.index('left_hip'), keypoints.index('left_knee')],
+        [keypoints.index('left_knee'), keypoints.index('left_ankle')],
+        [keypoints.index('right_shoulder'), keypoints.index('left_shoulder')],
+        [keypoints.index('right_hip'), keypoints.index('left_hip')],
+    ]
+    return kp_lines
+def convert_from_cls_format(cls_boxes, cls_segms, cls_keyps):
+    """
+    Convert from the class boxes/segms/keyps format generated by the testing code.
+    """
+    box_list = [b for b in cls_boxes if len(b) > 0]
+    if len(box_list) > 0: boxes = np.concatenate(box_list)
+    else: boxes = None
+    if cls_segms is not None: segms = [s for slist in cls_segms for s in slist]
+    else: segms = None
+    if cls_keyps is not None: keyps = [k for klist in cls_keyps for k in klist]
+    else: keyps = None
+    classes = []
+    for j in range(len(cls_boxes)):
+        classes += [j] * len(cls_boxes[j])
+    return boxes, segms, keyps, classes
+def convert_from_cls_format_v2(cls_boxes, cls_segms, cls_keyps, class_names):
+    """
+    Convert from the class boxes/segms/keyps format generated by the testing code.
+    """
+    box_list, segm_list =  [], []
+    for j, name in enumerate(class_names):
+        if name == '__background__': continue
+        if len(cls_boxes[j]) > 0:
+            box_list.append(cls_boxes[j])
+            if cls_segms is not None: segm_list.append(cls_segms[j])
+    if len(box_list) > 0: boxes = np.concatenate(box_list)
+    else: boxes = None
+    if len(segm_list) > 0: segms = np.concatenate(segm_list)
+    else: segms = None
+    if cls_keyps is not None: keyps = [k for klist in cls_keyps for k in klist]
+    else: keyps = None
+    classes = []
+    for j in range(len(cls_boxes)):
+        classes += [j] * len(cls_boxes[j])
+    return boxes, segms, keyps, classes
+def get_class_string(class_name, score):
+    return class_name + ' {:0.2f}'.format(score).lstrip('0')
+def get_mask(boxes, segms, im_shape, mask_thresh=0.4):
+    i, masks = 0, np.zeros(list(im_shape) + [len(boxes)], dtype=np.uint8)
+    for det, msk in zip(boxes, segms):
+        M = msk.shape[0]
+        scale = (M + 2.0) / M
+        ref_box = expand_boxes(np.array([det[0:4]]), scale)[0]
+        ref_box = ref_box.astype(np.int32)
+        padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
+        padded_mask[1:-1, 1:-1] = msk[:, :]
+        w = ref_box[2] - ref_box[0] + 1
+        h = ref_box[3] - ref_box[1] + 1
+        w = np.maximum(w, 1)
+        h = np.maximum(h, 1)
+        mask = cv2.resize(padded_mask, (w, h))
+        mask = np.array(mask > mask_thresh, dtype=np.uint8)
+        x1 = max(ref_box[0], 0)
+        y1 = max(ref_box[1], 0)
+        x2 = min(ref_box[2] + 1, im_shape[1])
+        y2 = min(ref_box[3] + 1, im_shape[0])
+        masks[y1: y2, x1: x2, i] = mask[
+            (y1 - ref_box[1]): (y2 - ref_box[1]),
+            (x1 - ref_box[0]): (x2 - ref_box[0])]
+        i += 1
+    return masks
+def vis_mask(img, mask, col, alpha=0.4, show_border=True, border_thick=1):
+    """Visualizes a single binary mask."""
+    img = img.astype(np.float32)
+    idx = np.nonzero(mask)
+    img[idx[0], idx[1], :] *= 1.0 - alpha
+    img[idx[0], idx[1], :] += alpha * col
+    if show_border:
+        _, contours, _ = cv2.findContours(
+            mask.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+        cv2.drawContours(img, contours, -1, _WHITE, border_thick, cv2.LINE_AA)
+    return img.astype(np.uint8)
+def vis_class(img, pos, class_str, font_scale=0.35):
+    """Visualizes the class."""
+    x0, y0 = int(pos[0]), int(pos[1])
+    # Compute text size.
+    txt = class_str
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    ((txt_w, txt_h), _) = cv2.getTextSize(txt, font, font_scale, 1)
+    # Place text background.
+    back_tl = x0, y0 - int(1.3 * txt_h)
+    back_br = x0 + txt_w, y0
+    cv2.rectangle(img, back_tl, back_br, _GREEN, -1)
+    # Show text.
+    txt_tl = x0, y0 - int(0.3 * txt_h)
+    cv2.putText(img, txt, txt_tl, font, font_scale, _GRAY, lineType=cv2.LINE_AA)
+    return img
+def vis_bbox(img, bbox, thick=1):
+    """Visualizes a bounding box."""
+    (x0, y0, w, h) = bbox
+    x1, y1 = int(x0 + w), int(y0 + h)
+    x0, y0 = int(x0), int(y0)
+    cv2.rectangle(img, (x0, y0), (x1, y1), _GREEN, thickness=thick)
+    return img
+def vis_keypoints(img, kps, kp_thresh=2, alpha=0.7):
+    """Visualizes keypoints (adapted from vis_one_image).
+    kps has shape (4, #keypoints) where 4 rows are (x, y, logit, prob).
+    """
+    dataset_keypoints, _ = keypoint_utils.get_keypoints()
+    kp_lines = kp_connections(dataset_keypoints)
+    # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv.
+    cmap = plt.get_cmap('rainbow')
+    colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]
+    colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors]
+    # Perform the drawing on a copy of the image, to allow for blending.
+    kp_mask = np.copy(img)
+    # Draw mid shoulder / mid hip first for better visualization.
+    mid_shoulder = (
+        kps[:2, dataset_keypoints.index('right_shoulder')] +
+        kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
+    sc_mid_shoulder = np.minimum(
+        kps[2, dataset_keypoints.index('right_shoulder')],
+        kps[2, dataset_keypoints.index('left_shoulder')])
+    mid_hip = (
+        kps[:2, dataset_keypoints.index('right_hip')] +
+        kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
+    sc_mid_hip = np.minimum(
+        kps[2, dataset_keypoints.index('right_hip')],
+        kps[2, dataset_keypoints.index('left_hip')])
+    nose_idx = dataset_keypoints.index('nose')
+    if sc_mid_shoulder > kp_thresh and kps[2, nose_idx] > kp_thresh:
+        cv2.line(
+            kp_mask, tuple(mid_shoulder), tuple(kps[:2, nose_idx]),
+            color=colors[len(kp_lines)], thickness=2, lineType=cv2.LINE_AA)
+    if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
+        cv2.line(
+            kp_mask, tuple(mid_shoulder), tuple(mid_hip),
+            color=colors[len(kp_lines) + 1], thickness=2, lineType=cv2.LINE_AA)
+    # Draw the keypoints.
+    for l in range(len(kp_lines)):
+        i1 = kp_lines[l][0]
+        i2 = kp_lines[l][1]
+        p1 = kps[0, i1], kps[1, i1]
+        p2 = kps[0, i2], kps[1, i2]
+        if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
+            cv2.line(
+                kp_mask, p1, p2,
+                color=colors[l], thickness=2, lineType=cv2.LINE_AA)
+        if kps[2, i1] > kp_thresh:
+            cv2.circle(
+                kp_mask, p1,
+                radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
+        if kps[2, i2] > kp_thresh:
+            cv2.circle(
+                kp_mask, p2,
+                radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
+    # Blend the keypoints.
+    return cv2.addWeighted(img, 1.0 - alpha, kp_mask, alpha, 0)
+def vis_one_image_opencv(
+        im, class_names,
+            boxes, segms=None, keypoints=None,
+                thresh=0.9, kp_thresh=2,
+                    show_box=False, show_class=False):
+    """Constructs a numpy array with the detections visualized."""
+    boxes, segms, keypoints, classes = \
+        convert_from_cls_format_v2(boxes, segms, keypoints, class_names)
+    if boxes is None \
+        or boxes.shape[0] == 0 or \
+            max(boxes[:, 4]) < thresh: return im
+    mask_color_id, masks, color_list = 0, None, colormap()
+    if segms is not None and len(segms) > 0:
+        masks = get_mask(boxes, segms, im.shape[0:2])
+    # Display in largest to smallest order to reduce occlusion
+    areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+    sorted_inds = np.argsort(-areas)
+    for i in sorted_inds:
+        bbox = boxes[i, :4]
+        score = boxes[i, -1]
+        if score < thresh:
+            continue
+        # show box (off by default)
+        if show_box:
+            im = vis_bbox(
+                im, (bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]))
+        # show class (off by default)
+        if show_class:
+            class_str = get_class_string(class_names[classes[i]], score)
+            im = vis_class(im, (bbox[0], bbox[1] - 2), class_str)
+        # show mask
+        if segms is not None and len(segms) > i:
+            color_mask = color_list[mask_color_id % len(color_list), 0:3]
+            mask_color_id += 1
+            im = vis_mask(im, masks[..., i], color_mask)
+        # show keypoints
+        if keypoints is not None and len(keypoints) > i:
+            im = vis_keypoints(im, keypoints[i], kp_thresh)
+    cv2.imshow('Detectron', im)
+    cv2.waitKey(0)
+def vis_one_image(
+        im, class_names,
+            boxes, segms=None, keypoints=None,
+                thresh=0.9, kp_thresh=2, dpi=100,
+                    box_alpha=0.0, show_class=True,
+                        filename=None):
+    """Visual debugging of detections."""
+    boxes, segms, keypoints, classes = \
+        convert_from_cls_format_v2(boxes, segms, keypoints, class_names)
+    if boxes is None \
+        or boxes.shape[0] == 0 or \
+            max(boxes[:, 4]) < thresh: return
+    im, mask = im[:, :, ::-1], None
+    #dataset_keypoints, _ = keypoint_utils.get_keypoints()
+    if segms is not None and len(segms) > 0:
+        masks = get_mask(boxes, segms, im.shape[0:2])
+    color_list = colormap(rgb=True) / 255
+    # kp_lines = kp_connections(dataset_keypoints)
+    # cmap = plt.get_cmap('rainbow')
+    # colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]
+    fig = plt.figure(frameon=False)
+    fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi)
+    ax = plt.Axes(fig, [0., 0., 1., 1.])
+    ax.axis('off')
+    fig.add_axes(ax)
+    ax.imshow(im)
+    # Display in largest to smallest order to reduce occlusion
+    areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+    sorted_inds = np.argsort(-areas)
+    mask_color_id = 0
+    for i in sorted_inds:
+        bbox = boxes[i, :4]
+        score = boxes[i, -1]
+        if score < thresh:
+            continue
+        # show box (off by default)
+        ax.add_patch(
+            plt.Rectangle((bbox[0], bbox[1]),
+                          bbox[2] - bbox[0],
+                          bbox[3] - bbox[1],
+                          fill=False, edgecolor='g',
+                          linewidth=1.0, alpha=box_alpha))
+        if show_class:
+            ax.text(
+                bbox[0], bbox[1] - 2,
+                get_class_string(class_names[classes[i]], score),
+                fontsize=11,
+                family='serif',
+                bbox=dict(
+                    facecolor='g', alpha=0.4, pad=0, edgecolor='none'),
+                color='white')
+        # show mask
+        if segms is not None and len(segms) > i:
+            img = np.ones(im.shape)
+            color_mask = color_list[mask_color_id % len(color_list), 0:3]
+            mask_color_id += 1
+            w_ratio = .4
+            for c in range(3):
+                color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio
+            for c in range(3):
+                img[:, :, c] = color_mask[c]
+            e = masks[:, :, i]
+            _, contour, hier = cv2.findContours(
+                e.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+            for c in contour:
+                polygon = Polygon(
+                    c.reshape((-1, 2)),
+                    fill=True, facecolor=color_mask,
+                    edgecolor='w', linewidth=1.2,
+                    alpha=0.5)
+                ax.add_patch(polygon)
+        # show keypoints
+        if keypoints is not None and len(keypoints) > i:
+            kps = keypoints[i]
+            # plt.autoscale(False)
+            # for l in range(len(kp_lines)):
+            #     i1 = kp_lines[l][0]
+            #     i2 = kp_lines[l][1]
+            #     if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
+            #         x = [kps[0, i1], kps[0, i2]]
+            #         y = [kps[1, i1], kps[1, i2]]
+            #         line = plt.plot(x, y)
+            #         plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7)
+            #     if kps[2, i1] > kp_thresh:
+            #         plt.plot(
+            #             kps[0, i1], kps[1, i1], '.', color=colors[l],
+            #             markersize=3.0, alpha=0.7)
+            #
+            #     if kps[2, i2] > kp_thresh:
+            #         plt.plot(
+            #             kps[0, i2], kps[1, i2], '.', color=colors[l],
+            #             markersize=3.0, alpha=0.7)
+            #
+            # # add mid shoulder / mid hip for better visualization
+            # mid_shoulder = (
+            #     kps[:2, dataset_keypoints.index('right_shoulder')] +
+            #     kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
+            # sc_mid_shoulder = np.minimum(
+            #     kps[2, dataset_keypoints.index('right_shoulder')],
+            #     kps[2, dataset_keypoints.index('left_shoulder')])
+            # mid_hip = (
+            #     kps[:2, dataset_keypoints.index('right_hip')] +
+            #     kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
+            # sc_mid_hip = np.minimum(
+            #     kps[2, dataset_keypoints.index('right_hip')],
+            #     kps[2, dataset_keypoints.index('left_hip')])
+            # if (sc_mid_shoulder > kp_thresh and
+            #         kps[2, dataset_keypoints.index('nose')] > kp_thresh):
+            #     x = [mid_shoulder[0], kps[0, dataset_keypoints.index('nose')]]
+            #     y = [mid_shoulder[1], kps[1, dataset_keypoints.index('nose')]]
+            #     line = plt.plot(x, y)
+            #     plt.setp(
+            #         line, color=colors[len(kp_lines)], linewidth=1.0, alpha=0.7)
+            # if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
+            #     x = [mid_shoulder[0], mid_hip[0]]
+            #     y = [mid_shoulder[1], mid_hip[1]]
+            #     line = plt.plot(x, y)
+            #     plt.setp(
+            #         line, color=colors[len(kp_lines) + 1], linewidth=1.0,
+            #         alpha=0.7)
+    if filename is not None:
+        fig.savefig(filename, dpi=dpi)
+        plt.close('all')
+    else:
+        plt.imshow(im)
+        plt.show()
\ No newline at end of file
--- a/tools/__init__.py
+++ b/tools/__init__.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
\ No newline at end of file
--- a/tools/export.py
+++ b/tools/export.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+sys.path.insert(0, '..')
+import argparse
+import pprint
+import dragon.vm.torch as torch
+from lib.core.config import cfg
+from lib.core.coordinator import Coordinator
+from lib.modeling.detector import Detector
+import lib.utils.logger as logger
+def parse_args():
+    """Parse input arguments"""
+    parser = argparse.ArgumentParser(description='Export a Detection Network')
+    parser.add_argument('--cfg', dest='cfg_file',
+                        help='optional config file', default=None, type=str)
+    parser.add_argument('--exp_dir', dest='exp_dir',
+                        help='experiment dir',
+                        default=None, type=str)
+    parser.add_argument('--input_shape', dest='input_shape',
+                        help='The shape of dummy input',
+                        default=(1, 224, 224, 3), type=tuple)
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    args = parser.parse_args()
+    return args
+if __name__ == '__main__':
+    args = parse_args()
+    if args.exp_dir is None or \
+        not os.path.exists(args.exp_dir):
+            raise ValueError('Excepted a existing experiment dir. \nGot {}'
+                .format(os.path.abspath(args.exp_dir)) if args.exp_dir else 'None')
+    logger.info('Called with args:')
+    logger.info(args)
+    coordinator = Coordinator(args.cfg_file, exp_dir=args.exp_dir)
+    logger.info('Using config:\n' + pprint.pformat(cfg))
+    # Load the checkpoint and test engine
+    checkpoint = coordinator.checkpoint(global_step=None, wait=True)
+    # Ready to export the network
+    logger.info('Exporting model will be saved to `{:s}`'
+                .format(coordinator.exports_dir()))
+    detector = Detector().eval().cuda(cfg.GPU_ID)
+    detector.load_weights(checkpoint)
+    detector.optimize_for_inference()
+    # Mixed precision training?
+    if cfg.MODEL.DATA_TYPE.lower() == 'float16':
+        detector.half() # Powerful FP16 Support
+    data = torch.zeros(*args.input_shape).byte()
+    ims_info = torch.zeros(args.input_shape[0], 3).float()
+    torch.onnx.export(
+        model=detector,
+        args={'data': data, 'ims_info': ims_info},
+        f=checkpoint.replace(
+            'checkpoints', 'exports')
+                .replace('pth', 'onnx'),
+        verbose=True,
+    )
\ No newline at end of file
--- a/tools/mpi_train.py
+++ b/tools/mpi_train.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+import os
+import sys
+sys.path.insert(0, '..')
+import argparse
+import numpy as np
+import dragon
+import dragon.core.mpi as mpi
+from lib.core.config import cfg
+from lib.core.coordinator import Coordinator
+from lib.core.train import train_net
+from lib.datasets.factory import get_imdb
+import lib.utils.logger as logger
+def parse_args():
+    """Parse input arguments."""
+    parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
+    parser.add_argument('--cfg', dest='cfg_file',
+                        help='config file',
+                        default=None, type=str)
+    parser.add_argument('--exp_dir', dest='exp_dir',
+                        help='experiment dir',
+                        default=None, type=str)
+    parser.add_argument('--resume', dest='resume',
+                        help='resume training?',
+                        action='store_true')
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    args = parser.parse_args()
+    return args
+if __name__ == '__main__':
+    args = parse_args()
+    if args.exp_dir is None or \
+        not os.path.exists(args.exp_dir):
+            raise ValueError('Excepted a existing experiment dir. \nGot {}'
+                .format(os.path.abspath(args.exp_dir)) if args.exp_dir else 'None')
+    coordinator = Coordinator(args.cfg_file, exp_dir=args.exp_dir)
+    start_iter = 0
+    if args.resume:
+        cfg.TRAIN.WEIGHTS, start_iter = \
+            coordinator.checkpoint(global_step=None)
+    # Setup MPI
+    if cfg.NUM_GPUS != mpi.Size():
+        raise ValueError('Excepted {} mpi nodes, but got {}.'
+                         .format(len(args.gpus), mpi.Size()))
+    GPUs = [i for i in range(cfg.NUM_GPUS)]
+    cfg.GPU_ID = GPUs[mpi.Rank()]
+    mpi.Parallel([i for i in range(cfg.NUM_GPUS)])
+    mpi.SetParallelMode('NCCL' if cfg.USE_NCCL else 'MPI')
+    # Setup logger
+    if mpi.Rank() != 0:
+        logger.set_root_logger(False)
+    # Fix the random seeds (numpy and dragon) for reproducibility
+    np.random.seed(cfg.RNG_SEED)
+    dragon.SetRandomSeed(cfg.RNG_SEED)
+    # Inspect the database
+    database = get_imdb(cfg.TRAIN.DATABASE)
+    logger.info('Database({}): {} images will be used to train.'
+                .format(cfg.TRAIN.DATABASE, database.num_images))
+    # Ready to train the network
+    logger.info('Output will be saved to `{:s}`'
+                .format(coordinator.checkpoints_dir()))
+    train_net(coordinator)
+    # Finalize mpi
+    mpi.Finalize()
\ No newline at end of file
--- a/tools/test.py
+++ b/tools/test.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import sys
+sys.path.insert(0, '..')
+import argparse
+import pprint
+import importlib
+from lib.core.config import cfg
+from lib.core.coordinator import Coordinator
+from lib.core.test import TestServer
+from lib.modeling.detector import Detector
+from lib.datasets.factory import get_imdb
+from lib.utils import logger
+def parse_args():
+    """Parse input arguments"""
+    parser = argparse.ArgumentParser(description='Test a Detection Network')
+    parser.add_argument('--cfg', dest='cfg_file',
+                        help='optional config file', default=None, type=str)
+    parser.add_argument('--exp_dir', dest='exp_dir',
+                        help='experiment dir',
+                        default=None, type=str)
+    parser.add_argument('--iter', dest='iter', help='global step',
+                        default=0, type=int)
+    parser.add_argument('--wait', dest='wait',
+                        help='wait the checkpoint?',
+                        action='store_true')
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    args = parser.parse_args()
+    return args
+if __name__ == '__main__':
+    args = parse_args()
+    logger.info('Called with args:\n' + str(args))
+    coordinator = Coordinator(args.cfg_file, exp_dir=args.exp_dir)
+    logger.info('Using config:\n' + pprint.pformat(cfg))
+    # Load the checkpoint and test engine
+    checkpoint = coordinator.checkpoint(global_step=args.iter, wait=args.wait)
+    if checkpoint is None:
+        raise RuntimeError('The checkpoint of global step {} does not exist.'.format(args.iter))
+    test_engine = importlib.import_module('lib.{}.test'.format(cfg.MODEL.TYPE))
+    # Inspect the database
+    database = get_imdb(cfg.TEST.DATABASE)
+    logger.info('Database({}): {} images will be used to test.'
+                .format(cfg.TEST.DATABASE, database.num_images))
+    # Ready to test the network
+    logger.info('Results will be saved to `{:s}`'
+                .format(coordinator.results_dir(checkpoint)))
+    detector = Detector().eval().cuda(cfg.GPU_ID)
+    detector.load_weights(checkpoint)
+    detector.optimize_for_inference()
+    # Mixed precision training?
+    if cfg.MODEL.DATA_TYPE.lower() == 'float16':
+        detector.half() # Powerful FP16 Support
+    server = TestServer(coordinator.results_dir(checkpoint))
+    test_engine.test_net(detector, server)
\ No newline at end of file
--- a/tools/test_all.py
+++ b/tools/test_all.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+sys.path.insert(0, '..')
+import argparse
+import numpy as np
+from lib.core.coordinator import Coordinator
+from lib.utils import logger
+def parse_args():
+    """Parse input arguments"""
+    parser = argparse.ArgumentParser(description='Test a Detection Network')
+    parser.add_argument('--cfg', dest='cfg_file',
+                        help='optional config file', default=None, type=str)
+    parser.add_argument('--exp_dir', dest='exp_dir',
+                        help='experiment dir',
+                        default=None, type=str)
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    args = parser.parse_args()
+    return args
+def test(cfg_file, exp_dir, global_step):
+    """Call test.py to test models on specific global step.
+    Parameters
+    ----------
+    cfg_file : str
+        The path of the cfg file.
+    global_step : int
+        The iteration to test.
+    """
+    import subprocess
+    args = '--cfg {} --exp_dir {} --iter {}'.format(
+        os.path.abspath(cfg_file), exp_dir, global_step)
+    return subprocess.call('{} {} {}'.format(
+        sys.executable, 'test.py', args), shell=True)
+if __name__ == '__main__':
+    args = parse_args()
+    coordinator = Coordinator(args.cfg_file, exp_dir=args.exp_dir)
+    global_steps = []
+    files = os.listdir(coordinator.checkpoints_dir())
+    for ix, file in enumerate(files):
+        step = int(file.split('_iter_')[-1].split('.')[0])
+        global_steps.append(step)
+    order = np.argsort(-np.array(global_steps))
+    for test_idx in order:
+        logger.info('Testing net at global step: {}......'.format(global_steps[test_idx]))
+        logger.info(' - Using model file: {}'.format(files[test_idx]))
+        test(args.cfg_file, args.exp_dir, global_steps[test_idx])
\ No newline at end of file
--- a/tools/train.py
+++ b/tools/train.py
+# ------------------------------------------------------------
+# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
+#
+# Licensed under the BSD 2-Clause License.
+# You should have received a copy of the BSD 2-Clause License
+# along with the software. If not, See,
+#
+#      <https://opensource.org/licenses/BSD-2-Clause>
+#
+# ------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import sys
+sys.path.insert(0, '..')
+import os.path as osp
+import argparse
+import pprint
+import dragon
+import numpy as np
+from lib.core.config import cfg
+from lib.core.coordinator import Coordinator
+from lib.core.train import train_net
+from lib.datasets.factory import get_imdb
+import lib.utils.logger as logger
+def parse_args():
+    """Parse input arguments."""
+    parser = argparse.ArgumentParser(description='Train a Detection Network')
+    parser.add_argument('--cfg', dest='cfg_file',
+                        help='optional config file',
+                        default=None, type=str)
+    parser.add_argument('--exp_dir', dest='exp_dir',
+                        help='experiment dir',
+                        default=None, type=str)
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    args = parser.parse_args()
+    return args
+def mpi_train(cfg_file, exp_dir):
+    """Call mpi to train models on multiple GPUs.
+    Parameters
+    ----------
+    cfg_file : str
+        The path of the cfg file.
+    exp_dir : str
+        The existing experiment dir.
+    """
+    import subprocess
+    args = '--cfg {} --exp_dir {}'.format(osp.abspath(cfg_file), exp_dir)
+    mpi_args = 'mpirun --allow-run-as-root -n {}'.format(cfg.NUM_GPUS)
+    if len(cfg.HOSTS) > 0:
+        mpi_args += ' -x NCCL_DEBUG=INFO' \
+                    ' -x NCCL_IB_CUDA_SUPPORT=1' \
+                    ' -mca btl_openib_allow_ib 1' \
+                    ' -mca mpi_warn_on_fork 0 -H '
+        for i, host in enumerate(cfg.HOSTS):
+            mpi_args += (host + ':{},'.format(cfg.NUM_GPUS // len(cfg.HOSTS)))
+            if i > 0: subprocess.call('scp -r {} {}:{}'.format(
+                osp.abspath(exp_dir), host, osp.abspath(exp_dir)), shell=True)
+    return subprocess.call('{} {} {} {}'.format(
+        mpi_args, sys.executable, 'mpi_train.py', args), shell=True)
+if __name__ == '__main__':
+    args = parse_args()
+    logger.info('Called with args:\n' + str(args))
+    coordinator = Coordinator(args.cfg_file, args.exp_dir)
+    logger.info('Using config:\n' + pprint.pformat(cfg))
+    if cfg.NUM_GPUS > 1:
+        # Dispatch the MPI to start a multi-nodes task
+        coordinator.checkpoints_dir()
+        mpi_train(args.cfg_file, coordinator.experiment_dir)
+    else:
+        # Fix the random seeds (numpy and dragon) for reproducibility
+        np.random.seed(cfg.RNG_SEED)
+        dragon.SetRandomSeed(cfg.RNG_SEED)
+        # Inspect the database
+        database = get_imdb(cfg.TRAIN.DATABASE)
+        logger.info('Database({}): {} images will be used to train.'
+                    .format(cfg.TRAIN.DATABASE, database.num_images))
+        # Ready to train the network
+        logger.info('Output will be saved to `{:s}`'
+                    .format(coordinator.checkpoints_dir()))
+        train_net(coordinator)
\ No newline at end of file