Commit f4ecc7c7 by Ting PAN

Change the code structure

1 parent d3ed62db
Showing with 4844 additions and 3750 deletions
------------------------------------------------------------------------ ------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet. The list of most significant changes made over time in SeetaDet.
SeetaDet 0.4.0 (20200408)
Dragon Minimum Required (Version 0.3.0.dev20200408)
Changes:
Preview Features:
- Optimize the code structure.
- DALI support for SSD, RetinaNet, and Faster-RCNN.
- Use KPLRecord instead of SeetaRecord.
Bugs fixed:
- Fix the frozen Affine issue.
------------------------------------------------------------------------
SeetaDet 0.3.0 (20191121) SeetaDet 0.3.0 (20191121)
Dragon Minimum Required (Version 0.3.0.dev20191121) Dragon Minimum Required (Version 0.3.0.dev20191121)
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
## WHAT's SeetaDet? ## WHAT's SeetaDet?
SeetaDet contains many useful object detectors, including R-CNN series, SSD, SeetaDet is a platform implementing popular object detection algorithms,
and the recent RetinaNet. including R-CNN series, SSD, and RetinaNet.
We have achieved the same or higher performance than the baseline reported by the original paper. We have achieved the same or higher performance than the baseline reported by the original paper.
...@@ -14,22 +14,33 @@ The torch-style codes help us to simplify the hierarchical pipeline of modern de ...@@ -14,22 +14,33 @@ The torch-style codes help us to simplify the hierarchical pipeline of modern de
## Requirements ## Requirements
seeta-dragon >= 0.3.0.dev20191121 seeta-dragon >= 0.3.0.dev20200408
## Installation ## Installation
#### 1. Install the required python packages #### Build From Source
If you prefer to develop modules as well as running experiments,
following commands will build but not install to ***site-packages***:
```bash ```bash
pip install cython pyyaml matplotlib cd SeetaDet && python setup.py build
pip install opencv-python Pillow
``` ```
#### 2. Compile the C Extensions #### Install From Source
Clone this repository to local disk and install:
```bash
cd SeetaDet && python setup.py install
```
#### Install From Git
You can also install it from remote repository:
```bash ```bash
cd SeetaDet/compile pip install git+https://gitlab.seetatech.com/seetaresearch/SeetaDet.git@master
bash ./make.sh
``` ```
## Quick Start ## Quick Start
...@@ -37,7 +48,7 @@ bash ./make.sh ...@@ -37,7 +48,7 @@ bash ./make.sh
#### Train a detection model #### Train a detection model
```bash ```bash
cd SeetaDet/tools cd tools
python train.py --cfg <MODEL_YAML> python train.py --cfg <MODEL_YAML>
``` ```
...@@ -46,20 +57,20 @@ We have provided the default YAML examples into ``SeetaDet/configs``. ...@@ -46,20 +57,20 @@ We have provided the default YAML examples into ``SeetaDet/configs``.
#### Test a detection model #### Test a detection model
```bash ```bash
cd SeetaDet/tools cd tools
python test.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> --iter <ITERATION> python test.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> --iter <ITERATION>
``` ```
Or Or
```bash ```bash
cd SeetaDet/tools cd tools
python test_all.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> python test_all.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR>
``` ```
#### Export a detection model to ONNX #### Export a detection model to ONNX
```bash ```bash
cd SeetaDet/tools cd tools
python export.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> --iter <ITERATION> python export.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> --iter <ITERATION>
``` ```
......
PROJECT(gpu_nms)
CMAKE_MINIMUM_REQUIRED(VERSION 3.0.2)
# ---------------- User Config ----------------
# Set your python "interpreter" if necessary
# if not, a default interpreter will be used
# here, provide several examples:
# set(PYTHON_EXECUTABLE /usr/bin/python) # Linux & OSX, Builtin Python
# set(PYTHON_EXECUTABLE /X/anaconda/bin/python) # Linux & OSX, Anaconda
# set(PYTHON_EXECUTABLE X:/Anaconda/python) # Win, Anaconda
# Set CUDA compiling architecture
# Remove "compute_70/sm_70" if using CUDA 8.0
set(CUDA_ARCH -gencode arch=compute_30,code=sm_30
-gencode arch=compute_35,code=sm_35
-gencode arch=compute_50,code=sm_50
-gencode arch=compute_60,code=sm_60
-gencode arch=compute_70,code=sm_70)
# ---------------- User Config ----------------
# ---[ Dependencies
include(${PROJECT_SOURCE_DIR}/cmake/FindPythonLibs.cmake)
include(${PROJECT_SOURCE_DIR}/cmake/FindNumPy.cmake)
FIND_PACKAGE(CUDA REQUIRED)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
message(STATUS "C++11 support has been enabled by default.")
# ---[ Config types
set(CMAKE_BUILD_TYPE Release CACHE STRING "set build type to release")
set(CMAKE_CONFIGURATION_TYPES Release CACHE STRING "set build type to release" FORCE)
# ---[ Includes
set(INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
include_directories(${INCLUDE_DIR})
include_directories(${PROJECT_SOURCE_DIR}/src)
include_directories(${PYTHON_INCLUDE_DIRS})
include_directories(${NUMPY_INCLUDE_DIR})
include_directories(${CUDA_INCLUDE_DIRS})
# ---[ libs
link_directories(${PYTHON_LIBRARIES})
# ---[ Install
set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR} CACHE STRING "set install prefix" FORCE)
set(CMAKE_SHARED_LIBRARY_PREFIX "")
# ---[ Flags
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${CUDA_ARCH}")
if(WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP /O2 /Oi /GL /Ot /Gy")
endif()
if(UNIX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -s -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -s -w -fPIC -O3 -m64 -std=c++11")
endif()
# ---[ Files
set(HEADER_FILES gpu_nms.h)
set(SRC_FILES gpu_nms.cpp nms_kernel.cu)
# ---[ Add Target
CUDA_ADD_LIBRARY(${PROJECT_NAME} SHARED ${HEADER_FILES} ${SRC_FILES})
# ---[ Link Libs
TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${CUDA_LIBRARIES} ${CUDA_cublas_LIBRARY} ${CUDA_curand_LIBRARY})
if(WIN32)
TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${PYTHON_LIBRARIES})
endif()
# ---[ Install Target
set_target_properties(${PROJECT_NAME} PROPERTIES OUTPUT_NAME "gpu_nms")
install (TARGETS ${PROJECT_NAME} DESTINATION ${PROJECT_BINARY_DIR}/../install/lib/nms)
# - Find the NumPy libraries
# This module finds if NumPy is installed, and sets the following variables
# indicating where it is.
#
# TODO: Update to provide the libraries and paths for linking npymath lib.
#
# NUMPY_FOUND - was NumPy found
# NUMPY_VERSION - the version of NumPy found as a string
# NUMPY_VERSION_MAJOR - the major version number of NumPy
# NUMPY_VERSION_MINOR - the minor version number of NumPy
# NUMPY_VERSION_PATCH - the patch version number of NumPy
# NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601
# NUMPY_INCLUDE_DIR - path to the NumPy include files
unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR)
if(PYTHONINTERP_FOUND)
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import numpy as n; print(n.__version__); print(n.get_include());"
RESULT_VARIABLE __result
OUTPUT_VARIABLE __output
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(__result MATCHES 0)
string(REGEX REPLACE ";" "\\\\;" __values ${__output})
string(REGEX REPLACE "\r?\n" ";" __values ${__values})
list(GET __values 0 NUMPY_VERSION)
list(GET __values 1 NUMPY_INCLUDE_DIR)
string(REGEX MATCH "^([0-9])+\\.([0-9])+\\.([0-9])+" __ver_check "${NUMPY_VERSION}")
if(NOT "${__ver_check}" STREQUAL "")
set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1})
set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2})
set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3})
math(EXPR NUMPY_VERSION_DECIMAL
"(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}")
string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR})
else()
unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR)
message(STATUS "Requested NumPy version and include path, but got instead:\n${__output}\n")
endif()
endif()
else()
message("Can not find Python interpretator.")
message(FATAL_ERROR "Do you set PYTHON_EXECUTABLE correctly?")
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(NumPy REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION
VERSION_VAR NUMPY_VERSION)
if(NUMPY_FOUND)
message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})")
endif()
\ No newline at end of file
# - Find python libraries
# This module finds the libraries corresponding to the Python interpeter
# FindPythonInterp provides.
# This code sets the following variables:
#
# PYTHONLIBS_FOUND - have the Python libs been found
# PYTHON_PREFIX - path to the Python installation
# PYTHON_LIBRARIES - path to the python library
# PYTHON_INCLUDE_DIRS - path to where Python.h is found
# PYTHON_MODULE_EXTENSION - lib extension, e.g. '.so' or '.pyd'
# PYTHON_MODULE_PREFIX - lib name prefix: usually an empty string
# PYTHON_SITE_PACKAGES - path to installation site-packages
# PYTHON_IS_DEBUG - whether the Python interpreter is a debug build
#
# Thanks to talljimbo for the patch adding the 'LDVERSION' config
# variable usage.
#=============================================================================
# Copyright 2001-2009 Kitware, Inc.
# Copyright 2012 Continuum Analytics, Inc.
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# * Neither the names of Kitware, Inc., the Insight Software Consortium,
# nor the names of their contributors may be used to endorse or promote
# products derived from this software without specific prior written
# permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#=============================================================================
# Checking for the extension makes sure that `LibsNew` was found and not just `Libs`.
if(PYTHONLIBS_FOUND AND PYTHON_MODULE_EXTENSION)
return()
endif()
# Use the Python interpreter to find the libs.
if(PythonLibsNew_FIND_REQUIRED)
find_package(PythonInterp ${PythonLibsNew_FIND_VERSION} REQUIRED)
else()
find_package(PythonInterp ${PythonLibsNew_FIND_VERSION})
endif()
if(NOT PYTHONINTERP_FOUND)
set(PYTHONLIBS_FOUND FALSE)
return()
endif()
# According to http://stackoverflow.com/questions/646518/python-how-to-detect-debug-interpreter
# testing whether sys has the gettotalrefcount function is a reliable, cross-platform
# way to detect a CPython debug interpreter.
#
# The library suffix is from the config var LDVERSION sometimes, otherwise
# VERSION. VERSION will typically be like "2.7" on unix, and "27" on windows.
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"from distutils import sysconfig as s;import sys;import struct;
print('.'.join(str(v) for v in sys.version_info));
print(sys.prefix);
print(s.get_python_inc(plat_specific=True));
print(s.get_python_lib(plat_specific=True));
print(s.get_config_var('SO'));
print(hasattr(sys, 'gettotalrefcount')+0);
print(struct.calcsize('@P'));
print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
print(s.get_config_var('LIBDIR') or '');
print(s.get_config_var('MULTIARCH') or '');
"
RESULT_VARIABLE _PYTHON_SUCCESS
OUTPUT_VARIABLE _PYTHON_VALUES
ERROR_VARIABLE _PYTHON_ERROR_VALUE)
if(NOT _PYTHON_SUCCESS MATCHES 0)
if(PythonLibsNew_FIND_REQUIRED)
message(FATAL_ERROR
"Python config failure:\n${_PYTHON_ERROR_VALUE}")
endif()
set(PYTHONLIBS_FOUND FALSE)
return()
endif()
# Convert the process output into a list
string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES})
string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES})
list(GET _PYTHON_VALUES 0 _PYTHON_VERSION_LIST)
list(GET _PYTHON_VALUES 1 PYTHON_PREFIX)
list(GET _PYTHON_VALUES 2 PYTHON_INCLUDE_DIR)
list(GET _PYTHON_VALUES 3 PYTHON_SITE_PACKAGES)
list(GET _PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION)
list(GET _PYTHON_VALUES 5 PYTHON_IS_DEBUG)
list(GET _PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P)
list(GET _PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX)
list(GET _PYTHON_VALUES 8 PYTHON_LIBDIR)
list(GET _PYTHON_VALUES 9 PYTHON_MULTIARCH)
# Make sure the Python has the same pointer-size as the chosen compiler
# Skip if CMAKE_SIZEOF_VOID_P is not defined
if(CMAKE_SIZEOF_VOID_P AND (NOT "${PYTHON_SIZEOF_VOID_P}" STREQUAL "${CMAKE_SIZEOF_VOID_P}"))
if(PythonLibsNew_FIND_REQUIRED)
math(EXPR _PYTHON_BITS "${PYTHON_SIZEOF_VOID_P} * 8")
math(EXPR _CMAKE_BITS "${CMAKE_SIZEOF_VOID_P} * 8")
message(FATAL_ERROR
"Python config failure: Python is ${_PYTHON_BITS}-bit, "
"chosen compiler is ${_CMAKE_BITS}-bit")
endif()
set(PYTHONLIBS_FOUND FALSE)
return()
endif()
# The built-in FindPython didn't always give the version numbers
string(REGEX REPLACE "\\." ";" _PYTHON_VERSION_LIST ${_PYTHON_VERSION_LIST})
list(GET _PYTHON_VERSION_LIST 0 PYTHON_VERSION_MAJOR)
list(GET _PYTHON_VERSION_LIST 1 PYTHON_VERSION_MINOR)
list(GET _PYTHON_VERSION_LIST 2 PYTHON_VERSION_PATCH)
# Make sure all directory separators are '/'
string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX})
string(REGEX REPLACE "\\\\" "/" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDE_DIR})
string(REGEX REPLACE "\\\\" "/" PYTHON_SITE_PACKAGES ${PYTHON_SITE_PACKAGES})
if(CMAKE_HOST_WIN32)
set(PYTHON_LIBRARY
"${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
# when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the
# original python installation. They may be found relative to PYTHON_INCLUDE_DIR.
if(NOT EXISTS "${PYTHON_LIBRARY}")
get_filename_component(_PYTHON_ROOT ${PYTHON_INCLUDE_DIR} DIRECTORY)
set(PYTHON_LIBRARY
"${_PYTHON_ROOT}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
endif()
# raise an error if the python libs are still not found.
if(NOT EXISTS "${PYTHON_LIBRARY}")
message(FATAL_ERROR "Python libraries not found")
endif()
else()
if(PYTHON_MULTIARCH)
set(_PYTHON_LIBS_SEARCH "${PYTHON_LIBDIR}/${PYTHON_MULTIARCH}" "${PYTHON_LIBDIR}")
else()
set(_PYTHON_LIBS_SEARCH "${PYTHON_LIBDIR}")
endif()
#message(STATUS "Searching for Python libs in ${_PYTHON_LIBS_SEARCH}")
# Probably this needs to be more involved. It would be nice if the config
# information the python interpreter itself gave us were more complete.
find_library(PYTHON_LIBRARY
NAMES "python${PYTHON_LIBRARY_SUFFIX}"
PATHS ${_PYTHON_LIBS_SEARCH}
NO_DEFAULT_PATH)
# If all else fails, just set the name/version and let the linker figure out the path.
if(NOT PYTHON_LIBRARY)
set(PYTHON_LIBRARY python${PYTHON_LIBRARY_SUFFIX})
endif()
endif()
MARK_AS_ADVANCED(
PYTHON_LIBRARY
PYTHON_INCLUDE_DIR
)
# We use PYTHON_INCLUDE_DIR, PYTHON_LIBRARY and PYTHON_DEBUG_LIBRARY for the
# cache entries because they are meant to specify the location of a single
# library. We now set the variables listed by the documentation for this
# module.
SET(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}")
SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}")
SET(PYTHON_DEBUG_LIBRARIES "${PYTHON_DEBUG_LIBRARY}")
find_package_message(PYTHON
"Found PythonLibs: ${PYTHON_LIBRARY}"
"${PYTHON_EXECUTABLE}${PYTHON_VERSION}")
set(PYTHONLIBS_FOUND TRUE)
void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
int boxes_dim, float nms_overlap_thresh, int device_id);
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
import numpy as np
cimport numpy as np
assert sizeof(int) == sizeof(np.int32_t)
cdef extern from "gpu_nms.h":
void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, float thresh, int device_id=0):
cdef int boxes_num = dets.shape[0]
cdef int boxes_dim = dets.shape[1]
cdef int num_out
cdef np.ndarray[np.int32_t, ndim=1] \
keep = np.zeros(boxes_num, dtype=np.int32)
cdef np.ndarray[np.float32_t, ndim=1] \
scores = dets[:, 4]
cdef np.ndarray[np.intp_t, ndim=1] \
order = scores.argsort()[::-1]
cdef np.ndarray[np.float32_t, ndim=2] \
sorted_dets = dets[order, :]
_nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
keep = keep[:num_out]
return list(order[keep])
#!/bin/sh
# Delete cache
rm -r build install *.c *.cpp
# Compile cpp modules
python setup.py build_ext --inplace
# Compile cuda modules
cd build && cmake .. && make install && cd ..
# Copy to the library root
cp -r install/lib ../
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// ------------------------------------------------------------
#include <vector>
#include "gpu_nms.h"
#define CUDA_CHECK(condition) \
/* Code block avoids redefinition of cudaError_t error */ \
do { \
cudaError_t error = condition; \
if (error != cudaSuccess) { \
\
} \
} while (0)
void SetDevice(int device_id) {
int current_device;
CUDA_CHECK(cudaGetDevice(&current_device));
if (current_device == device_id) return;
CUDA_CHECK(cudaSetDevice(device_id));
}
#define DIV_UP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define NMS_BLOCK_SIZE 64
template <typename T>
__device__ T iou(const T* A, const T* B) {
const T x1 = max(A[0], B[0]);
const T y1 = max(A[1], B[1]);
const T x2 = min(A[2], B[2]);
const T y2 = min(A[3], B[3]);
const T width = max((T)0, x2 - x1 + 1);
const T height = max((T)0, y2 - y1 + 1);
const T area = width * height;
const T A_area = (A[2] - A[0] + 1) * (A[3] - A[1] + 1);
const T B_area = (B[2] - B[0] + 1) * (B[3] - B[1] + 1);
return area / (A_area + B_area - area);
}
template <typename T>
__global__ void nms_mask(const int num_boxes, const T nms_thresh,
const T* boxes, unsigned long long* mask) {
const int i_start = blockIdx.x * NMS_BLOCK_SIZE;
const int di_end = min(num_boxes - i_start, NMS_BLOCK_SIZE);
const int j_start = blockIdx.y * NMS_BLOCK_SIZE;
const int dj_end = min(num_boxes - j_start, NMS_BLOCK_SIZE);
const int num_blocks = DIV_UP(num_boxes, NMS_BLOCK_SIZE);
const int bid = blockIdx.x;
const int tid = threadIdx.x;
__shared__ T boxes_i[NMS_BLOCK_SIZE * 4];
if (tid < di_end) {
boxes_i[tid * 4 + 0] = boxes[(i_start + tid) * 5 + 0];
boxes_i[tid * 4 + 1] = boxes[(i_start + tid) * 5 + 1];
boxes_i[tid * 4 + 2] = boxes[(i_start + tid) * 5 + 2];
boxes_i[tid * 4 + 3] = boxes[(i_start + tid) * 5 + 3];
}
__syncthreads();
if (tid < dj_end) {
const T* const box_j = boxes + (j_start + tid) * 5;
unsigned long long mask_j = 0;
const int di_start = (i_start == j_start) ? (tid + 1) : 0;
for (int di = di_start; di < di_end; ++di)
if (iou(box_j, boxes_i + di * 4) > nms_thresh)
mask_j |= 1ULL << di;
mask[(j_start + tid) * num_blocks + bid] = mask_j;
}
}
template <typename T>
void ApplyNMS(const int num_boxes, const int max_keeps, const float thresh,
const T* boxes, int* keep_indices, int& num_keep) {
const int num_blocks = DIV_UP(num_boxes, NMS_BLOCK_SIZE);
const dim3 blocks(num_blocks, num_blocks);
size_t mask_nbytes = num_boxes * num_blocks * sizeof(unsigned long long);
size_t boxes_nbytes = num_boxes * 5 * sizeof(T);
void* boxes_dev, *mask_dev;
CUDA_CHECK(cudaMalloc(&boxes_dev, boxes_nbytes));
CUDA_CHECK(cudaMalloc(&mask_dev, mask_nbytes));
CUDA_CHECK(cudaMemcpy(boxes_dev, boxes, boxes_nbytes, cudaMemcpyHostToDevice));
nms_mask<T> << <blocks, NMS_BLOCK_SIZE >> > (num_boxes, thresh,
(T*)boxes_dev,
(unsigned long long*)mask_dev);
CUDA_CHECK(cudaPeekAtLastError());
std::vector<unsigned long long> mask_host(num_boxes * num_blocks);
CUDA_CHECK(cudaMemcpy(&mask_host[0], mask_dev, mask_nbytes, cudaMemcpyDeviceToHost));
std::vector<unsigned long long> dead_bit(num_blocks);
memset(&dead_bit[0], 0, sizeof(unsigned long long) * num_blocks);
int num_selected = 0;
for (int i = 0; i < num_boxes; ++i) {
const int nblock = i / NMS_BLOCK_SIZE;
const int inblock = i % NMS_BLOCK_SIZE;
if (!(dead_bit[nblock] & (1ULL << inblock))) {
keep_indices[num_selected++] = i;
unsigned long long* mask_i = &mask_host[0] + i * num_blocks;
for (int j = nblock; j < num_blocks; ++j) dead_bit[j] |= mask_i[j];
if (num_selected == max_keeps) break;
}
}
num_keep = num_selected;
CUDA_CHECK(cudaFree(mask_dev));
CUDA_CHECK(cudaFree(boxes_dev));
}
void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
int boxes_dim, float nms_overlap_thresh, int device_id) {
// set the device to use
SetDevice(device_id);
// apply gpu nms
ApplyNMS<float>(boxes_num, boxes_num, nms_overlap_thresh,
boxes_host, keep_out, *num_out);
}
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from distutils.extension import Extension
from distutils.core import setup
from Cython.Distutils import build_ext
import numpy as np
numpy_include = np.get_include()
ext_modules = [
Extension(
"install.lib.utils.cython_bbox",
["bbox.pyx"],
extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
include_dirs = [numpy_include]),
Extension(
"install.lib.nms.cpu_nms",
["cpu_nms.pyx"],
extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
include_dirs = [numpy_include]),
Extension(
"install.deprecated.gpu_nms",
["gpu_nms.pyx"],
extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
language='c++',
include_dirs = [numpy_include]),
Extension(
'install.lib.pycocotools._mask',
['../lib/pycocotools/maskApi.c', '../lib/pycocotools/_mask.pyx'],
include_dirs=[numpy_include, 'pycocotools'],
extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99']),
]
setup(name='Detectron',ext_modules=ext_modules,cmdclass = {'build_ext': build_ext})
NUM_GPUS: 8 NUM_GPUS: 8
VIS: False VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: faster_rcnn TYPE: faster_rcnn
BACKBONE: resnet101.fpn BACKBONE: resnet101.fpn
CLASSES: ['__background__', CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'fire hydrant', 'stop sign', 'parking meter', 'bench',
'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife',
'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush'] 'teddy bear', 'hair drier', 'toothbrush']
NUM_CLASSES: 81 NUM_CLASSES: 81
SOLVER: SOLVER:
BASE_LR: 0.02 BASE_LR: 0.02
DECAY_STEPS: [60000, 80000] DECAY_STEPS: [60000, 80000]
MAX_STEPS: 90000 MAX_STEPS: 90000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_faster_rcnn SNAPSHOT_PREFIX: coco_faster_rcnn
FRCNN: FRCNN:
ROI_XFORM_METHOD: RoIAlign ROI_XFORM_METHOD: RoIAlign
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
TRAIN: TRAIN:
WEIGHTS: '/model/R-101.Affine.pth' WEIGHTS: '/model/R-101.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 2 USE_DIFF: False # Do not use crowd objects
USE_DIFF: False # Do not use crowd objects IMS_PER_BATCH: 2
BATCH_SIZE: 512 BATCH_SIZE: 512
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
TEST: TEST:
DATABASE: '/data/coco_2014_minival' DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000 RPN_POST_NMS_TOP_N: 1000
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
NMS: 0.5 NMS: 0.5
NUM_GPUS: 8 NUM_GPUS: 8
VIS: False VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: faster_rcnn TYPE: faster_rcnn
BACKBONE: resnet101.fpn BACKBONE: resnet101.fpn
CLASSES: ['__background__', CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'fire hydrant', 'stop sign', 'parking meter', 'bench',
'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife',
'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush'] 'teddy bear', 'hair drier', 'toothbrush']
NUM_CLASSES: 81 NUM_CLASSES: 81
SOLVER: SOLVER:
BASE_LR: 0.02 BASE_LR: 0.02
DECAY_STEPS: [120000, 160000] DECAY_STEPS: [120000, 160000]
MAX_STEPS: 180000 MAX_STEPS: 180000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_faster_rcnn SNAPSHOT_PREFIX: coco_faster_rcnn
FRCNN: FRCNN:
ROI_XFORM_METHOD: RoIAlign ROI_XFORM_METHOD: RoIAlign
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
TRAIN: TRAIN:
WEIGHTS: '/model/R-101.Affine.pth' WEIGHTS: '/model/R-101.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 2 USE_DIFF: False # Do not use crowd objects
USE_DIFF: False # Do not use crowd objects IMS_PER_BATCH: 2
BATCH_SIZE: 512 BATCH_SIZE: 512
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
TEST: TEST:
DATABASE: '/data/coco_2014_minival' DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000 RPN_POST_NMS_TOP_N: 1000
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
NMS: 0.5 NMS: 0.5
NUM_GPUS: 1 NUM_GPUS: 1
VIS: False VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: faster_rcnn TYPE: faster_rcnn
BACKBONE: resnet50.fpn BACKBONE: resnet50.fpn
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor'] 'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.002 BASE_LR: 0.002
DECAY_STEPS: [100000, 140000] DECAY_STEPS: [100000, 140000]
MAX_STEPS: 140000 MAX_STEPS: 140000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_faster_rcnn SNAPSHOT_PREFIX: voc_faster_rcnn
FRCNN: FRCNN:
ROI_XFORM_METHOD: RoIAlign ROI_XFORM_METHOD: RoIAlign
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 128 BATCH_SIZE: 128
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
TEST: TEST:
DATABASE: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_POST_NMS_TOP_N: 1000 RPN_POST_NMS_TOP_N: 1000
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
NMS: 0.45 NMS: 0.45
\ No newline at end of file
NUM_GPUS: 1 NUM_GPUS: 1
VIS: False VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: faster_rcnn TYPE: faster_rcnn
BACKBONE: vgg16.c4 BACKBONE: vgg16.c4
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor'] 'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.001 BASE_LR: 0.001
WEIGHT_DECAY: 0.0005 WEIGHT_DECAY: 0.0005
DECAY_STEPS: [100000, 140000] DECAY_STEPS: [100000, 140000]
MAX_STEPS: 140000 MAX_STEPS: 140000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_faster_rcnn SNAPSHOT_PREFIX: voc_faster_rcnn
RPN: RPN:
STRIDES: [16] STRIDES: [16]
SCALES: [8, 16, 32] # RField: [128, 256, 512] SCALES: [8, 16, 32] # RField: [128, 256, 512]
ASPECT_RATIOS: [0.5, 1.0, 2.0] ASPECT_RATIOS: [0.5, 1.0, 2.0]
FRCNN: FRCNN:
ROI_XFORM_METHOD: RoIPool ROI_XFORM_METHOD: RoIPool
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
MLP_HEAD_DIM: 4096 MLP_HEAD_DIM: 4096
TRAIN: TRAIN:
WEIGHTS: '/model/VGG16.RCNN.pth' WEIGHTS: '/model/VGG16.RCNN.pth'
DATABASE: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
RPN_MIN_SIZE: 16 RPN_MIN_SIZE: 16
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 128 BATCH_SIZE: 128
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
TEST: TEST:
DATABASE: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_MIN_SIZE: 16 RPN_MIN_SIZE: 16
RPN_POST_NMS_TOP_N: 300 RPN_POST_NMS_TOP_N: 300
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
NMS: 0.45 NMS: 0.45
\ No newline at end of file
NUM_GPUS: 4 NUM_GPUS: 4
VIS: False VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: retinanet TYPE: retinanet
BACKBONE: resnet50.fpn BACKBONE: resnet50.fpn
CLASSES: ['__background__', CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'fire hydrant', 'stop sign', 'parking meter', 'bench',
'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife',
'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush'] 'teddy bear', 'hair drier', 'toothbrush']
NUM_CLASSES: 81 NUM_CLASSES: 81
SOLVER: SOLVER:
BASE_LR: 0.02 BASE_LR: 0.01
DECAY_STEPS: [30000, 40000] DECAY_STEPS: [60000, 80000]
MAX_STEPS: 45000 MAX_STEPS: 90000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_retinanet_400 SNAPSHOT_PREFIX: coco_retinanet_400
FPN: FPN:
RPN_MIN_LEVEL: 3 RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7 RPN_MAX_LEVEL: 7
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 8 USE_DIFF: False # Do not use crowd objects
SCALES: [400] USE_COLOR_JITTER: True
MAX_SIZE: 666 IMS_PER_BATCH: 8
TEST: SCALES: [400]
DATABASE: '/data/coco_2014_minival' MAX_SIZE: 666
JSON_FILE: '/data/instances_minival2014.json' RANDOM_SCALES: [0.75, 1.0]
PROTOCOL: 'coco' TEST:
IMS_PER_BATCH: 1 DATASET: '/data/coco_2014_minival'
SCALES: [400] JSON_FILE: '/data/instances_minival2014.json'
MAX_SIZE: 666 PROTOCOL: 'coco'
IMS_PER_BATCH: 1
SCALES: [400]
MAX_SIZE: 666
NMS: 0.5 NMS: 0.5
\ No newline at end of file
NUM_GPUS: 4
VIS: False
ENABLE_TENSOR_BOARD: False
MODEL:
TYPE: retinanet
BACKBONE: resnet50.fpn
CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench',
'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife',
'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush']
NUM_CLASSES: 81
SOLVER:
BASE_LR: 0.02
WARM_UP_STEPS: 2000 # default: 500
DECAY_STEPS: [120000, 160000]
MAX_STEPS: 180000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_retinanet_400
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
DROPBLOCK:
DROP_ON: True
DECREMENT: 0.000005 # * 20000 = 0.1
TRAIN:
WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 8
SCALES: [400]
MAX_SIZE: 666
USE_SCALE_JITTER: True
USE_COLOR_JITTER: True
SCALE_JITTER_RANGE: [0.75, 1.33]
TEST:
DATABASE: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
IMS_PER_BATCH: 1
SCALES: [400]
MAX_SIZE: 666
NMS: 0.5
\ No newline at end of file
NUM_GPUS: 1
VIS: False
VIS_ON_FILE: False
MODEL:
TYPE: retinanet
BACKBONE: resnet18.fpn
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.01
DECAY_STEPS: [40000, 50000, 60000]
WARM_UP_STEPS: 2000
MAX_STEPS: 60000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_retinanet_300
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/model/R-18.Affine.pth'
DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
SCALE_JITTER_RANGE: [0.5, 2.0]
USE_SCALE_JITTER: True
USE_COLOR_JITTER: True
TEST:
DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1
SCALES: [300]
MAX_SIZE: 500
NMS: 0.45
\ No newline at end of file
NUM_GPUS: 1 NUM_GPUS: 1
VIS: False VIS: False
VIS_ON_FILE: False VIS_ON_FILE: False
MODEL: MODEL:
TYPE: retinanet TYPE: retinanet
BACKBONE: airnet.fpn BACKBONE: airnet.fpn
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor'] 'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.02 BASE_LR: 0.01
DECAY_STEPS: [40000, 50000, 60000] DECAY_STEPS: [40000, 50000, 60000]
MAX_STEPS: 60000 MAX_STEPS: 60000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_retinanet_300 SNAPSHOT_PREFIX: voc_retinanet_320
FPN: FPN:
RPN_MIN_LEVEL: 3 RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7 RPN_MAX_LEVEL: 7
TRAIN: TRAIN:
WEIGHTS: '/model/AirNet.Affine.pth' WEIGHTS: '/model/AirNet.Affine.pth'
DATABASE: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32 USE_COLOR_JITTER: True
SCALES: [300] IMS_PER_BATCH: 32
MAX_SIZE: 500 SCALES: [320]
SCALE_JITTER_RANGE: [0.5, 2.0] RANDOM_SCALES: [0.5, 1.0]
USE_SCALE_JITTER: True TEST:
USE_COLOR_JITTER: True DATASET: '/data/voc_2007_test'
TEST: PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
DATABASE: '/data/voc_2007_test' IMS_PER_BATCH: 1
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' SCALES: [320]
IMS_PER_BATCH: 1
SCALES: [300]
MAX_SIZE: 500
NMS: 0.45 NMS: 0.45
\ No newline at end of file
NUM_GPUS: 1 NUM_GPUS: 1
VIS: False VIS: False
VIS_ON_FILE: False VIS_ON_FILE: False
MODEL: MODEL:
TYPE: retinanet TYPE: retinanet
BACKBONE: resnet34.fpn BACKBONE: resnet34.fpn
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor'] 'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.01 BASE_LR: 0.01
DECAY_STEPS: [40000, 50000, 60000] DECAY_STEPS: [40000, 50000, 60000]
WARM_UP_STEPS: 2000 WARM_UP_STEPS: 2000
MAX_STEPS: 60000 MAX_STEPS: 60000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_retinanet_300 SNAPSHOT_PREFIX: voc_retinanet_320
FPN: FPN:
RPN_MIN_LEVEL: 3 RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7 RPN_MAX_LEVEL: 7
TRAIN: TRAIN:
WEIGHTS: '/model/R-34.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32 USE_COLOR_JITTER: True
SCALES: [300] IMS_PER_BATCH: 32
MAX_SIZE: 500 SCALES: [320]
SCALE_JITTER_RANGE: [0.5, 2.0] RANDOM_SCALES: [0.5, 2.0]
USE_SCALE_JITTER: True TEST:
USE_COLOR_JITTER: True DATASET: '/data/voc_2007_test'
TEST: PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
DATABASE: '/data/voc_2007_test' IMS_PER_BATCH: 1
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' SCALES: [320]
IMS_PER_BATCH: 1
SCALES: [300]
MAX_SIZE: 500
NMS: 0.45 NMS: 0.45
\ No newline at end of file
NUM_GPUS: 1 NUM_GPUS: 1
VIS: False VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: ssd TYPE: ssd
BACKBONE: airnet5b.mbox BACKBONE: airnet5b.mbox
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor'] 'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.001 BASE_LR: 0.001
DECAY_STEPS: [80000, 100000, 120000] DECAY_STEPS: [80000, 100000, 120000]
MAX_STEPS: 120000 MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_300 SNAPSHOT_PREFIX: voc_ssd_320
SSD: SSD:
RESIZE: NUM_CONVS: 2
HEIGHT: 300 MULTIBOX:
WIDTH: 300 STRIDES: [8, 16, 32]
MULTIBOX: MIN_SIZES: [30, 90, 150]
MIN_SIZES: [30, 90, 150] MAX_SIZES: [90, 150, 210]
MAX_SIZES: [90, 150, 210] ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]]
STRIDES: [8, 16, 32] TRAIN:
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]] WEIGHTS: '/model/AirNet.Affine.pth'
TRAIN: DATASET: '/data/voc_0712_trainval'
WEIGHTS: '/model/AirNet.Affine.pth' SCALES: [320]
DATABASE: '/data/voc_0712_trainval' RANDOM_SCALES: [0.25, 1.00]
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
TEST: TEST:
DATABASE: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8 IMS_PER_BATCH: 8
NMS_TOP_K: 400 SCALES: [320]
NMS: 0.45 NMS_TOP_K: 400
SCORE_THRESH: 0.01 NMS: 0.45
SCORE_THRESH: 0.01
DETECTIONS_PER_IM: 200 DETECTIONS_PER_IM: 200
\ No newline at end of file
NUM_GPUS: 1 NUM_GPUS: 1
VIS: False VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: ssd TYPE: ssd
BACKBONE: vgg16_reduced_300.mbox BACKBONE: vgg16_reduced_300.mbox
FREEZE_AT: 0 FREEZE_AT: 0
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor'] 'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.001 BASE_LR: 0.001
WARM_UP_FACTOR: 0. WEIGHT_DECAY: 0.0005
WEIGHT_DECAY: 0.0005 DECAY_STEPS: [80000, 100000, 120000]
DECAY_STEPS: [80000, 100000, 120000] MAX_STEPS: 120000
MAX_STEPS: 120000 SNAPSHOT_EVERY: 5000
SNAPSHOT_EVERY: 5000 SNAPSHOT_PREFIX: voc_ssd_300
SNAPSHOT_PREFIX: voc_ssd_300 SSD:
SSD: MULTIBOX:
RESIZE: STRIDES: [8, 16, 32, 64, 100, 300]
HEIGHT: 300 MIN_SIZES: [30, 60, 110, 162, 213, 264]
WIDTH: 300 MAX_SIZES: [60, 110, 162, 213, 264, 315]
MULTIBOX: ASPECT_RATIOS: [
STRIDES: [8, 16, 32, 64, 100, 300] [1, 2, 0.5],
MIN_SIZES: [30, 60, 110, 162, 213, 264] [1, 2, 0.5, 3, 0.33],
MAX_SIZES: [60, 110, 162, 213, 264, 315] [1, 2, 0.5, 3, 0.33],
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]] [1, 2, 0.5],
TRAIN: [1, 2, 0.5]
WEIGHTS: '/model/VGG16.SSD.pth' ]
DATABASE: '/data/voc_0712_trainval' TRAIN:
IMS_PER_BATCH: 32 WEIGHTS: '/model/VGG16.SSD.pth'
TEST: DATASET: '/data/voc_0712_trainval'
DATABASE: '/data/voc_2007_test' IMS_PER_BATCH: 32
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' SCALES: [300]
IMS_PER_BATCH: 8 RANDOM_SCALES: [0.25, 1.00]
NMS_TOP_K: 400 TEST:
NMS: 0.45 DATASET: '/data/voc_2007_test'
SCORE_THRESH: 0.01 PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
DETECTIONS_PER_IM: 200 IMS_PER_BATCH: 8
SCALES: [300]
NMS_TOP_K: 400
NMS: 0.45
SCORE_THRESH: 0.01
DETECTIONS_PER_IM: 200
...@@ -22,23 +22,29 @@ SOLVER: ...@@ -22,23 +22,29 @@ SOLVER:
SNAPSHOT_PREFIX: voc_ssd_320 SNAPSHOT_PREFIX: voc_ssd_320
SSD: SSD:
NUM_CONVS: 2 NUM_CONVS: 2
RESIZE:
HEIGHT: 320
WIDTH: 320
MULTIBOX: MULTIBOX:
STRIDES: [8, 16, 32, 64, 100, 300] STRIDES: [8, 16, 32, 64, 100, 300]
MIN_SIZES: [30, 60, 110, 162, 213, 264] MIN_SIZES: [30, 60, 110, 162, 213, 264]
MAX_SIZES: [60, 110, 162, 213, 264, 315] MAX_SIZES: [60, 110, 162, 213, 264, 315]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33], ASPECT_RATIOS: [
[1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]] [1, 2, 0.5],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5],
[1, 2, 0.5]
]
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
SCALES: [320]
RANDOM_SCALES: [0.25, 1.00]
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
TEST: TEST:
DATABASE: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8 IMS_PER_BATCH: 8
SCALES: [320]
NMS_TOP_K: 400 NMS_TOP_K: 400
NMS: 0.45 NMS: 0.45
SCORE_THRESH: 0.01 SCORE_THRESH: 0.01
......
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h"
#include "nms_op.h"
namespace dragon {
template <class Context> template <typename T>
void NonMaxSuppressionOp<Context>::DoRunWithType() {
int num_selected;
utils::detection::ApplyNMS(
Output(0)->count(),
Output(0)->count(),
iou_threshold_,
Input(0).template mutable_data<T, Context>(),
Output(0)->template mutable_data<int64_t, CPUContext>(),
num_selected, ctx()
);
Output(0)->Reshape({ num_selected });
}
template <class Context>
void NonMaxSuppressionOp<Context>::RunOnDevice() {
CHECK(Input(0).ndim() == 2 && Input(0).dim(1) == 5)
<< "\nThe dimensions of boxes should be (num_boxes, 5).";
Output(0)->Reshape({ Input(0).dim(0) });
DispatchHelper<TensorTypes<float>>::Call(this, Input(0));
}
DEPLOY_CPU(NonMaxSuppression);
#ifdef USE_CUDA
DEPLOY_CUDA(NonMaxSuppression);
#endif
OPERATOR_SCHEMA(NonMaxSuppression).NumInputs(1).NumOutputs(1);
NO_GRADIENT(NonMaxSuppression);
} // namespace dragon
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef SEETADET_CXX_OPERATORS_NMS_OP_H_
#define SEETADET_CXX_OPERATORS_NMS_OP_H_
#include "dragon/core/operator.h"
namespace dragon {
template <class Context>
class NonMaxSuppressionOp final : public Operator<Context> {
public:
NonMaxSuppressionOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
iou_threshold_(OpArg<float>("iou_threshold", 0.5f)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T>
void DoRunWithType();
protected:
float iou_threshold_;
};
} // namespace dragon
#endif // SEETADET_CXX_OPERATORS_NMS_OP_H_
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h"
#include "retinanet_decoder_op.h"
namespace dragon {
template <class Context> template <typename T>
void RetinaNetDecoderOp<Context>::DoRunWithType() {
using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox
int feat_h, feat_w;
int C = Input(-3).dim(2), A, K;
int total_proposals = 0;
int num_candidates, num_boxes, num_proposals;
auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>();
auto* y = Output(0)->template mutable_data<BT, BC>();
for (int n = 0; n < num_images_; ++n) {
BT im_h = im_info[0];
BT im_w = im_info[1];
BT im_scale_h = im_info[2];
BT im_scale_w = im_info[2];
if (Input(-1).dim(1) == 4) im_scale_w = im_info[3];
auto* scores = batch_scores + n * Input(-3).stride(0);
auto* deltas = batch_deltas + n * Input(-2).stride(0);
CHECK_EQ(strides_.size(), InputSize() - 3)
<< "\nGiven " << strides_.size() << " strides "
<< "and " << InputSize() - 3 << " features";
// Select the top-k candidates as proposals
num_boxes = Input(-3).dim(1);
num_candidates = Input(-3).count(1);
roi_indices_.resize(num_candidates);
num_candidates = 0;
for (int i = 0; i < roi_indices_.size(); ++i)
if (scores[i] > score_thr_)
roi_indices_[num_candidates++] = i;
scores_.resize(num_candidates);
for (int i = 0; i < num_candidates; ++i)
scores_[i] = scores[roi_indices_[i]];
num_proposals = std::min(
num_candidates,
(int)pre_nms_topn_
);
utils::math::ArgPartition(
num_candidates,
num_proposals,
true,
scores_.data(),
indices_
);
for (int i = 0; i < num_proposals; ++i)
indices_[i] = roi_indices_[indices_[i]];
// Decode the candidates
int base_offset = 0;
for (int i = 0; i < strides_.size(); i++) {
feat_h = Input(i).dim(2);
feat_w = Input(i).dim(3);
K = feat_h * feat_w;
A = int(ratios_.size() * scales_.size());
anchors_.resize((size_t)(A * 4));
utils::detection::GenerateAnchors(
strides_[i],
(int)ratios_.size(),
(int)scales_.size(),
ratios_.data(),
scales_.data(),
anchors_.data()
);
utils::detection::GenerateGridAnchors(
num_proposals, C, A,
feat_h, feat_w,
strides_[i],
base_offset,
anchors_.data(),
indices_.data(),
y
);
base_offset += (A * K);
}
utils::detection::GenerateMCProposals(
num_proposals,
num_boxes, C,
n,
im_h,
im_w,
im_scale_h,
im_scale_w,
scores,
deltas,
indices_.data(),
y
);
total_proposals += num_proposals;
y += (num_proposals * 7);
im_info += Input(-1).dim(1);
}
Output(0)->Reshape({ total_proposals, 7 });
}
template <class Context>
void RetinaNetDecoderOp<Context>::RunOnDevice() {
num_images_ = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_
<< " groups info, got "
<< Input(-1).dim(0) << ".";
Output(0)->Reshape({ num_images_ * pre_nms_topn_, 7 });
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
}
DEPLOY_CPU(RetinaNetDecoder);
#ifdef USE_CUDA
DEPLOY_CUDA(RetinaNetDecoder);
#endif
OPERATOR_SCHEMA(RetinaNetDecoder)
.NumInputs(3, INT_MAX)
.NumOutputs(1, INT_MAX);
} // namespace dragon
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef SEETADET_CXX_OPERATORS_RETINANET_DECODER_OP_H_
#define SEETADET_CXX_OPERATORS_RETINANET_DECODER_OP_H_
#include "dragon/core/operator.h"
namespace dragon {
template <class Context>
class RetinaNetDecoderOp final : public Operator<Context> {
public:
RetinaNetDecoderOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
strides_(OpArgs<int64_t>("strides")),
ratios_(OpArgs<float>("ratios")),
scales_(OpArgs<float>("scales")),
pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)),
score_thr_(OpArg<float>("score_thresh", 0.05f)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T>
void DoRunWithType();
protected:
float score_thr_;
vec64_t strides_, indices_, roi_indices_;
vector<float> ratios_, scales_, scores_, anchors_;
int64_t num_images_, pre_nms_topn_;
};
} // namespace dragon
#endif // SEETADET_CXX_OPERATORS_RETINANET_DECODER_OP_H_
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h"
#include "rpn_decoder_op.h"
namespace dragon {
template <class Context> template <typename T>
void RPNDecoderOp<Context>::DoRunWithType() {
using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox
int feat_h, feat_w, K, A;
int total_rois = 0, num_rois;
int num_candidates, num_proposals;
auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>();
auto* y = Output(0)->template mutable_data<BT, BC>();
for (int n = 0; n < num_images_; ++n) {
const BT im_h = im_info[0];
const BT im_w = im_info[1];
const BT scale = im_info[2];
const BT min_box_h = min_size_ * scale;
const BT min_box_w = min_size_ * scale;
auto* scores = batch_scores + n * Input(-3).stride(0);
auto* deltas = batch_deltas + n * Input(-2).stride(0);
if (strides_.size() == 1) {
// Case 1: single stride
feat_h = Input(0).dim(2);
feat_w = Input(0).dim(3);
K = feat_h * feat_w;
A = int(ratios_.size() * scales_.size());
// Select the Top-K candidates as proposals
num_candidates = A * K;
num_proposals = std::min(
num_candidates,
(int)pre_nms_topn_
);
utils::math::ArgPartition(
num_candidates,
num_proposals,
true, scores, indices_
);
// Decode the candidates
anchors_.resize((size_t)(A * 4));
proposals_.Reshape({ num_proposals, 5 });
utils::detection::GenerateAnchors(
strides_[0],
(int)ratios_.size(),
(int)scales_.size(),
ratios_.data(),
scales_.data(),
anchors_.data()
);
utils::detection::GenerateGridAnchors(
num_proposals, A,
feat_h, feat_w,
strides_[0],
0,
anchors_.data(),
indices_.data(),
proposals_.template mutable_data<BT, BC>()
);
utils::detection::GenerateSSProposals(
K, num_proposals,
im_h, im_w,
min_box_h, min_box_w,
scores,
deltas,
indices_.data(),
proposals_.template mutable_data<BT, BC>()
);
// Sort, NMS and Retrieve
utils::detection::SortProposals(
0,
num_proposals - 1,
num_proposals,
proposals_.template mutable_data<BT, BC>()
);
utils::detection::ApplyNMS(
num_proposals,
post_nms_topn_,
nms_thr_,
proposals_.template mutable_data<BT, Context>(),
roi_indices_.data(),
num_rois, ctx()
);
utils::detection::RetrieveRoIs(
num_rois,
n,
proposals_.template data<BT, BC>(),
roi_indices_.data(),
y
);
} else if (strides_.size() > 1) {
// Case 2: multiple strides
CHECK_EQ(strides_.size(), InputSize() - 3)
<< "\nGiven " << strides_.size() << " strides "
<< "and " << InputSize() - 3 << " feature inputs";
CHECK_EQ(strides_.size(), scales_.size())
<< "\nGiven " << strides_.size() << " strides "
<< "and " << scales_.size() << " scales";
// Select the top-k candidates as proposals
num_candidates = Input(-3).dim(1);
num_proposals = std::min(
num_candidates,
(int)pre_nms_topn_
);
utils::math::ArgPartition(
num_candidates,
num_proposals,
true, scores, indices_
);
// Decode the candidates
int base_offset = 0;
proposals_.Reshape({ num_proposals, 5 });
auto* proposals = proposals_
.template mutable_data<BT, BC>();
for (int i = 0; i < strides_.size(); i++) {
feat_h = Input(i).dim(2);
feat_w = Input(i).dim(3);
K = feat_h * feat_w;
A = (int)ratios_.size();
anchors_.resize((size_t)(A * 4));
utils::detection::GenerateAnchors(
strides_[i],
(int)ratios_.size(),
1,
ratios_.data(),
scales_.data(),
anchors_.data()
);
utils::detection::GenerateGridAnchors(
num_proposals, A,
feat_h, feat_w,
strides_[i],
base_offset,
anchors_.data(),
indices_.data(),
proposals
);
base_offset += (A * K);
}
utils::detection::GenerateMSProposals(
num_candidates,
num_proposals,
im_h, im_w,
min_box_h, min_box_w,
scores,
deltas,
&indices_[0],
proposals
);
// Sort, NMS and Retrieve
utils::detection::SortProposals(
0,
num_proposals - 1,
num_proposals,
proposals
);
utils::detection::ApplyNMS(
num_proposals,
post_nms_topn_,
nms_thr_,
proposals_.template mutable_data<BT, Context>(),
roi_indices_.data(),
num_rois, ctx()
);
utils::detection::RetrieveRoIs(
num_rois,
n,
proposals,
roi_indices_.data(),
y
);
} else {
LOG(FATAL) << "Excepted at least one stride for proposals.";
}
total_rois += num_rois;
y += (num_rois * 5);
im_info += Input(-1).dim(1);
}
Output(0)->Reshape({ total_rois, 5 });
// Distribute rois into K bins
if (OutputSize() > 1) {
CHECK_EQ(max_level_ - min_level_ + 1, OutputSize())
<< "\nExcepted " << OutputSize() << " outputs for levels "
"between [" << min_level_ << ", " << max_level_ << "].";
vector<BT*> ys(OutputSize());
vector<vec64_t> bins(OutputSize());
Tensor RoIs; RoIs.ReshapeLike(*Output(0));
auto* rois = RoIs.template mutable_data<BT, BC>();
ctx()->template Copy<BT, BC, BC>(
Output(0)->count(),
rois, Output(0)->template data<BT, BC>()
);
utils::detection::CollectRoIs(
total_rois,
min_level_,
max_level_,
canonical_level_,
canonical_scale_,
rois, bins
);
for (int i = 0; i < OutputSize(); i++) {
Output(i)->Reshape({ std::max((int)bins[i].size(), 1), 5 });
ys[i] = Output(i)->template mutable_data<BT, BC>();
}
utils::detection::DistributeRoIs(bins, rois, ys);
}
}
template <class Context>
void RPNDecoderOp<Context>::RunOnDevice() {
num_images_ = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_
<< " groups info, got "
<< Input(-1).dim(0) << ".";
roi_indices_.resize(post_nms_topn_);
Output(0)->Reshape({ num_images_ * post_nms_topn_, 5 });
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
}
DEPLOY_CPU(RPNDecoder);
#ifdef USE_CUDA
DEPLOY_CUDA(RPNDecoder);
#endif
OPERATOR_SCHEMA(RPNDecoder)
.NumInputs(3, INT_MAX)
.NumOutputs(1, INT_MAX);
} // namespace dragon
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef SEETADET_CXX_OPERATORS_RPN_DECODER_OP_H_
#define SEETADET_CXX_OPERATORS_RPN_DECODER_OP_H_
#include "dragon/core/operator.h"
namespace dragon {
template <class Context>
class RPNDecoderOp final : public Operator<Context> {
public:
RPNDecoderOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
strides_(OpArgs<int64_t>("strides")),
ratios_(OpArgs<float>("ratios")),
scales_(OpArgs<float>("scales")),
pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)),
post_nms_topn_(OpArg<int64_t>("post_nms_top_n", 300)),
nms_thr_(OpArg<float>("nms_thresh", 0.7f)),
min_size_(OpArg<int64_t>("min_size", 16)),
min_level_(OpArg<int64_t>("min_level", 2)),
max_level_(OpArg<int64_t>("max_level", 5)),
canonical_level_(OpArg<int64_t>("canonical_level", 4)),
canonical_scale_(OpArg<int64_t>("canonical_scale", 224)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T>
void DoRunWithType();
protected:
float nms_thr_;
vec64_t strides_, indices_, roi_indices_;
vector<float> ratios_, scales_, scores_, anchors_;
int64_t min_size_, pre_nms_topn_, post_nms_topn_;
int64_t num_images_, min_level_, max_level_;
int64_t canonical_level_, canonical_scale_;
Tensor proposals_;
};
} // namespace dragon
#endif // SEETADET_CXX_OPERATORS_RPN_DECODER_OP_H_
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Build cxx sources."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from distutils.core import setup
from dragon.tools import cpp_extension
if cpp_extension.CUDA_HOME is not None and \
cpp_extension._cuda.is_available():
Extension = cpp_extension.CUDAExtension
else:
Extension = cpp_extension.CppExtension
ext_modules = [
Extension(
name='install.lib.modules._C',
sources=[
'utils/detection_utils.cc',
'utils/detection_utils.cu',
'operators/nms_op.cc',
'operators/retinanet_decoder_op.cc',
'operators/rpn_decoder_op.cc',
],
),
]
setup(
name='SeetaDet',
ext_modules=ext_modules,
cmdclass={'build_ext': cpp_extension.BuildExtension}
)
#include <dragon/core/context.h>
#include "detection_utils.h"
namespace dragon {
namespace utils {
namespace detection {
template <typename T>
T IoU(const T A[], const T B[]) {
if (A[0] > B[2] || A[1] > B[3] ||
A[2] < B[0] || A[3] < B[1]) return 0;
const T x1 = std::max(A[0], B[0]);
const T y1 = std::max(A[1], B[1]);
const T x2 = std::min(A[2], B[2]);
const T y2 = std::min(A[3], B[3]);
const T width = std::max((T)0, x2 - x1 + 1);
const T height = std::max((T)0, y2 - y1 + 1);
const T area = width * height;
const T A_area = (A[2] - A[0] + 1) * (A[3] - A[1] + 1);
const T B_area = (B[2] - B[0] + 1) * (B[3] - B[1] + 1);
return area / (A_area + B_area - area);
}
template <> void ApplyNMS<float, CPUContext>(
const int num_boxes,
const int max_keeps,
const float thresh,
const float* boxes,
int64_t* keep_indices,
int& num_keep,
CPUContext* ctx) {
int count = 0;
std::vector<char> is_dead(num_boxes);
for (int i = 0; i < num_boxes; ++i) is_dead[i] = 0;
for (int i = 0; i < num_boxes; ++i) {
if (is_dead[i]) continue;
keep_indices[count++] = i;
if (count == max_keeps) break;
for (int j = i + 1; j < num_boxes; ++j)
if (!is_dead[j] && IoU(&boxes[i * 5],
&boxes[j * 5]) > thresh)
is_dead[j] = 1;
}
num_keep = count;
}
} // namespace detection
} // namespace utils
} // namespace dragon
#ifdef USE_CUDA
#include <dragon/core/context_cuda.h>
#include "detection_utils.h"
namespace dragon {
namespace utils {
namespace detection {
#define DIV_UP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define NUM_THREADS 64
namespace {
template <typename T>
__device__ bool _CheckIoU(
const T* a,
const T* b,
const float thresh) {
const T x1 = max(a[0], b[0]);
const T y1 = max(a[1], b[1]);
const T x2 = min(a[2], b[2]);
const T y2 = min(a[3], b[3]);
const T width = max(T(0), x2 - x1 + 1);
const T height = max(T(0), y2 - y1 + 1);
const T inter = width * height;
const T Sa = (a[2] - a[0] + T(1)) * (a[3] - a[1] + T(1));
const T Sb = (b[2] - b[0] + T(1)) * (b[3] - b[1] + T(1));
return inter > thresh * (Sa + Sb - inter);
}
template <typename T>
__global__ void _NonMaxSuppression(
const int num_blocks,
const int num_boxes,
const T thresh,
const T* dev_boxes,
uint64_t* dev_mask) {
const int row_start = blockIdx.y;
const int col_start = blockIdx.x;
if (row_start > col_start) return;
const int row_size = min(num_boxes - row_start * NUM_THREADS, NUM_THREADS);
const int col_size = min(num_boxes - col_start * NUM_THREADS, NUM_THREADS);
__shared__ T block_boxes[NUM_THREADS * 4];
if (threadIdx.x < col_size) {
const int c1 = threadIdx.x * 4;
const int c2 = (col_start * NUM_THREADS + threadIdx.x) * 5;
block_boxes[c1] = dev_boxes[c2];
block_boxes[c1 + 1] = dev_boxes[c2 + 1];
block_boxes[c1 + 2] = dev_boxes[c2 + 2];
block_boxes[c1 + 3] = dev_boxes[c2 + 3];
}
__syncthreads();
if (threadIdx.x < row_size) {
const int index = row_start * NUM_THREADS + threadIdx.x;
const T* dev_box = dev_boxes + index * 5;
unsigned long long val = 0;
const int start = (row_start == col_start) ? (threadIdx.x + 1) : 0;
for (int i = start; i < col_size; ++i) {
if (_CheckIoU(dev_box, block_boxes + i * 4, thresh)) {
val |= 1ULL << i;
}
}
dev_mask[index * num_blocks + col_start] = val;
}
}
} // namespace
template <> void ApplyNMS<float, CUDAContext>(
const int num_boxes,
const int max_keeps,
const float thresh,
const float* boxes,
int64_t* keep_indices,
int& num_keep,
CUDAContext* ctx) {
const int num_blocks = DIV_UP(num_boxes, NUM_THREADS);
vector<uint64_t> mask_host(num_boxes * num_blocks);
auto* mask_dev = (uint64_t*)ctx->New(mask_host.size() * sizeof(uint64_t));
_NonMaxSuppression
<<< dim3(num_blocks, num_blocks), NUM_THREADS,
0, ctx->cuda_stream() >>>(
num_blocks,
num_boxes,
thresh,
boxes,
mask_dev
);
CUDA_CHECK(cudaMemcpyAsync(
mask_host.data(),
mask_dev,
mask_host.size() * sizeof(uint64_t),
cudaMemcpyDeviceToHost,
ctx->cuda_stream()
));
ctx->FinishDeviceComputation();
vector<uint64_t> dead_bit(num_blocks);
memset(&dead_bit[0], 0, sizeof(uint64_t) * num_blocks);
int num_selected = 0;
for (int i = 0; i < num_boxes; ++i) {
const int nblock = i / NUM_THREADS;
const int inblock = i % NUM_THREADS;
if (!(dead_bit[nblock] & (1ULL << inblock))) {
keep_indices[num_selected++] = i;
auto* mask_i = &mask_host[0] + i * num_blocks;
for (int j = nblock; j < num_blocks; ++j) dead_bit[j] |= mask_i[j];
if (num_selected == max_keeps) break;
}
}
num_keep = num_selected;
ctx->Delete(mask_dev);
}
} // namespace detection
} // namespace utils
} // namespace dragon
#endif // USE_CUDA
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef SEETADET_CXX_UTILS_DETECTION_UTILS_H_
#define SEETADET_CXX_UTILS_DETECTION_UTILS_H_
#include "dragon/core/context.h"
#include "dragon/core/operator.h"
namespace dragon {
namespace utils {
namespace detection {
#define ROUND(x) ((int)((x) + (T)0.5))
/******************** BBox ********************/
template <typename T>
inline int FilterBoxes(
const T dx,
const T dy,
const T d_log_w,
const T d_log_h,
const T im_w,
const T im_h,
const T min_box_w,
const T min_box_h,
T* bbox) {
const T w = bbox[2] - bbox[0] + 1;
const T h = bbox[3] - bbox[1] + 1;
const T ctr_x = bbox[0] + (T)0.5 * w;
const T ctr_y = bbox[1] + (T)0.5 * h;
const T pred_ctr_x = dx * w + ctr_x;
const T pred_ctr_y = dy * h + ctr_y;
const T pred_w = exp(d_log_w) * w;
const T pred_h = exp(d_log_h) * h;
bbox[0] = pred_ctr_x - (T)0.5 * pred_w;
bbox[1] = pred_ctr_y - (T)0.5 * pred_h;
bbox[2] = pred_ctr_x + (T)0.5 * pred_w;
bbox[3] = pred_ctr_y + (T)0.5 * pred_h;
bbox[0] = std::max((T)0, std::min(bbox[0], im_w - 1));
bbox[1] = std::max((T)0, std::min(bbox[1], im_h - 1));
bbox[2] = std::max((T)0, std::min(bbox[2], im_w - 1));
bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1));
const T bbox_w = bbox[2] - bbox[0] + 1;
const T bbox_h = bbox[3] - bbox[1] + 1;
return (bbox_w >= min_box_w) * (bbox_h >= min_box_h);
}
template <typename T>
inline void BBoxTransform(
const T dx,
const T dy,
const T d_log_w,
const T d_log_h,
const T im_w,
const T im_h,
const T im_scale_h,
const T im_scale_w,
T* bbox) {
const T w = bbox[2] - bbox[0] + 1;
const T h = bbox[3] - bbox[1] + 1;
const T ctr_x = bbox[0] + (T)0.5 * w;
const T ctr_y = bbox[1] + (T)0.5 * h;
const T pred_ctr_x = dx * w + ctr_x;
const T pred_ctr_y = dy * h + ctr_y;
const T pred_w = exp(d_log_w) * w;
const T pred_h = exp(d_log_h) * h;
bbox[0] = pred_ctr_x - (T)0.5 * pred_w;
bbox[1] = pred_ctr_y - (T)0.5 * pred_h;
bbox[2] = pred_ctr_x + (T)0.5 * pred_w;
bbox[3] = pred_ctr_y + (T)0.5 * pred_h;
bbox[0] = std::max((T)0, std::min(bbox[0], im_w - 1)) / im_scale_w;
bbox[1] = std::max((T)0, std::min(bbox[1], im_h - 1)) / im_scale_h;
bbox[2] = std::max((T)0, std::min(bbox[2], im_w - 1)) / im_scale_w;
bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1)) / im_scale_h;
}
/******************** Anchor ********************/
template <typename T>
inline void GenerateAnchors(
int base_size,
const int num_ratios,
const int num_scales,
const T* ratios,
const T* scales,
T* anchors) {
const T base_area = (T)(base_size * base_size);
const T center = (T)0.5 * (base_size - (T)1);
T* offset_anchors = anchors;
for (int i = 0; i < num_ratios; ++i) {
const T ratio_w = (T)ROUND(sqrt(base_area / ratios[i]));
const T ratio_h = (T)ROUND(ratio_w * ratios[i]);
for (int j = 0; j < num_scales; ++j) {
const T scale_w = (T)0.5 * (ratio_w * scales[j] - (T)1);
const T scale_h = (T)0.5 * (ratio_h * scales[j] - (T)1);
offset_anchors[0] = center - scale_w;
offset_anchors[1] = center - scale_h;
offset_anchors[2] = center + scale_w;
offset_anchors[3] = center + scale_h;
offset_anchors += 4;
}
}
}
template <typename T>
inline void GenerateGridAnchors(
const int num_proposals,
const int num_anchors,
const int feat_h,
const int feat_w,
const int stride,
const int base_offset,
const T* anchors,
const int64_t* indices,
T* proposals) {
T x, y;
int idx_3d, a, h, w;
int idx_range = num_anchors * feat_h * feat_w;
for (int i = 0; i < num_proposals; ++i) {
idx_3d = (int)indices[i] - base_offset;
if (idx_3d >= 0 && idx_3d < idx_range) {
w = idx_3d % feat_w;
h = (idx_3d / feat_w) % feat_h;
a = idx_3d / feat_w / feat_h;
x = (T)w * stride, y = (T)h * stride;
auto* A = anchors + a * 4;
auto* P = proposals + i * 5;
P[0] = x + A[0], P[1] = y + A[1];
P[2] = x + A[2], P[3] = y + A[3];
}
}
}
template <typename T>
inline void GenerateGridAnchors(
const int num_proposals,
const int num_classes,
const int num_anchors,
const int feat_h,
const int feat_w,
const int stride,
const int base_offset,
const T* anchors,
const int64_t* indices,
T* proposals) {
T x, y;
int idx_4d, a, h, w;
int lr = num_classes * base_offset;
int rr = num_classes * (num_anchors * feat_h * feat_w);
for (int i = 0; i < num_proposals; ++i) {
idx_4d = (int)indices[i] - lr;
if (idx_4d >= 0 && idx_4d < rr) {
idx_4d /= num_classes;
w = idx_4d % feat_w;
h = (idx_4d / feat_w) % feat_h;
a = idx_4d / feat_w / feat_h;
x = (T)w * stride, y = (T)h * stride;
auto* A = anchors + a * 4;
auto* P = proposals + i * 7 + 1;
P[0] = x + A[0], P[1] = y + A[1];
P[2] = x + A[2], P[3] = y + A[3];
}
}
}
/******************** Proposal ********************/
template <typename T>
void GenerateSSProposals(
const int K,
const int num_proposals,
const float im_h,
const float im_w,
const float min_box_h,
const float min_box_w,
const T* scores,
const T* deltas,
const int64_t* indices,
T* proposals) {
int64_t index, a, k;
const float* delta;
float* proposal = proposals;
float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) {
index = indices[i];
a = index / K, k = index % K;
delta = deltas + k;
dx = delta[(a * 4 + 0) * K];
dy = delta[(a * 4 + 1) * K];
d_log_w = delta[(a * 4 + 2) * K];
d_log_h = delta[(a * 4 + 3) * K];
proposal[4] = FilterBoxes(
dx, dy,
d_log_w, d_log_h,
im_w, im_h,
min_box_w, min_box_h,
proposal
) * scores[index];
proposal += 5;
}
}
template <typename T>
void GenerateMSProposals(
const int num_candidates,
const int num_proposals,
const float im_h,
const float im_w,
const float min_box_h,
const float min_box_w,
const T* scores,
const T* deltas,
const int64_t* indices,
T* proposals) {
int64_t index;
int64_t num_candidates_2x = 2 * num_candidates;
int64_t num_candidates_3x = 3 * num_candidates;
float* proposal = proposals;
float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) {
index = indices[i];
dx = deltas[index];
dy = deltas[num_candidates + index];
d_log_w = deltas[num_candidates_2x + index];
d_log_h = deltas[num_candidates_3x + index];
proposal[4] = FilterBoxes(
dx, dy,
d_log_w, d_log_h,
im_w, im_h,
min_box_w, min_box_h,
proposal
) * scores[index];
proposal += 5;
}
}
template <typename T>
void GenerateMCProposals(
const int num_proposals,
const int num_boxes,
const int num_classes,
const int im_idx,
const float im_h,
const float im_w,
const float im_scale_h,
const float im_scale_w,
const T* scores,
const T* deltas,
const int64_t* indices,
T* proposals) {
int64_t index, cls;
int64_t num_boxes_2x = 2 * num_boxes;
int64_t num_boxes_3x = 3 * num_boxes;
float* proposal = proposals;
float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) {
cls = indices[i] % num_classes;
index = indices[i] / num_classes;
dx = deltas[index];
dy = deltas[num_boxes + index];
d_log_w = deltas[num_boxes_2x + index];
d_log_h = deltas[num_boxes_3x + index];
proposal[0] = im_idx;
BBoxTransform(
dx, dy,
d_log_w, d_log_h,
im_w, im_h,
im_scale_h, im_scale_w,
proposal + 1
);
proposal[5] = scores[indices[i]];
proposal[6] = cls + 1;
proposal += 7;
}
}
template <typename T>
inline void SortProposals(
const int start,
const int end,
const int num_top,
T* proposals) {
const T pivot_score = proposals[start * 5 + 4];
int left = start + 1, right = end;
while (left <= right) {
while (left <= end && proposals[left * 5 + 4] >= pivot_score) ++left;
while (right > start && proposals[right * 5 + 4] <= pivot_score) --right;
if (left <= right) {
for (int i = 0; i < 5; ++i)
std::swap(proposals[left * 5 + i], proposals[right * 5 + i]);
++left;
--right;
}
}
if (right > start) {
for (int i = 0; i < 5; ++i)
std::swap(proposals[start * 5 + i], proposals[right * 5 + i]);
}
if (start < right - 1) SortProposals(start, right - 1, num_top, proposals);
if (right + 1 < num_top && right + 1 < end)
SortProposals(right + 1, end, num_top, proposals);
}
template <typename T>
inline void RetrieveRoIs(
const int num_rois,
const int roi_batch_ind,
const T* proposals,
const int64_t* roi_indices,
T* rois) {
for (int i = 0; i < num_rois; ++i) {
const T* proposal = proposals + roi_indices[i] * 5;
rois[i * 5 + 0] = (T)roi_batch_ind;
rois[i * 5 + 1] = proposal[0];
rois[i * 5 + 2] = proposal[1];
rois[i * 5 + 3] = proposal[2];
rois[i * 5 + 4] = proposal[3];
}
}
template <typename T>
inline int roi_level(
const int min_level,
const int max_level,
const int canonical_level,
const int canonical_scale,
T* roi) {
T w = roi[3] - roi[1] + 1;
T h = roi[4] - roi[2] + 1;
// Refer the settings of paper
int level = canonical_level + std::log2(
std::max(std::sqrt(w * h), (T)1) / (T)canonical_scale);
return std::min(max_level, std::max(min_level, level));
}
template <typename T>
inline void CollectRoIs(
const int num_rois,
const int min_level,
const int max_level,
const int canonical_level,
const int canonical_scale,
const T* rois,
vector<vec64_t>& roi_bins) {
const T* roi = rois;
for (int i = 0; i < num_rois; ++i) {
int bin_idx = roi_level(min_level, max_level,
canonical_level, canonical_scale, roi);
bin_idx = std::max(bin_idx - min_level, 0);
roi_bins[bin_idx].push_back(i);
roi += 5;
}
}
template <typename T>
inline void DistributeRoIs(
const vector<vec64_t>& roi_bins,
const T* rois,
vector<T*> outputs) {
for (int i = 0; i < roi_bins.size(); i++) {
auto* y = outputs[i];
if (roi_bins[i].size() == 0) {
// Fake a tiny roi to avoid empty roi pooling
y[0] = 0, y[1] = 0, y[2] = 0, y[3] = 1, y[4] = 1;
} else {
for (int j = 0; j < roi_bins[i].size(); ++j) {
const T* roi = rois + roi_bins[i][j] * 5;
for (int k = 0; k < 5; ++k) y[k] = roi[k];
y += 5;
}
}
}
}
/******************** NMS ********************/
template <typename T, class Context>
void ApplyNMS(
const int num_boxes,
const int max_keeps,
const T thresh,
const T* boxes,
int64_t* keep_indices,
int& num_keep,
Context* ctx);
} // namespace detection
} // namespace utils
} // namespace dragon
#endif // SEETADET_CXX_UTILS_DETECTION_UTILS_H_
# -------------------------------------------------------- # --------------------------------------------------------
# Fast R-CNN # Fast R-CNN
# Copyright (c) 2015 Microsoft # Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details] # Licensed under The MIT License [see LICENSE for details]
# Written by Sergey Karayev # Written by Sergey Karayev
# -------------------------------------------------------- # --------------------------------------------------------
cimport cython cimport cython
import numpy as np import numpy as np
cimport numpy as np cimport numpy as np
DTYPE = np.float DTYPE = np.float
ctypedef np.float_t DTYPE_t ctypedef np.float_t DTYPE_t
@cython.boundscheck(False) @cython.boundscheck(False)
def bbox_overlaps( def bbox_overlaps(
np.ndarray[DTYPE_t, ndim=2] boxes, np.ndarray[DTYPE_t, ndim=2] boxes,
np.ndarray[DTYPE_t, ndim=2] query_boxes): np.ndarray[DTYPE_t, ndim=2] query_boxes):
""" """
Parameters Parameters
---------- ----------
boxes: (N, 4) ndarray of float boxes: (N, 4) ndarray of float
query_boxes: (K, 4) ndarray of float query_boxes: (K, 4) ndarray of float
Returns Returns
------- -------
overlaps: (N, K) ndarray of overlap between boxes and query_boxes overlaps: (N, K) ndarray of overlap between boxes and query_boxes
""" """
cdef unsigned int N = boxes.shape[0] cdef unsigned int N = boxes.shape[0]
cdef unsigned int K = query_boxes.shape[0] cdef unsigned int K = query_boxes.shape[0]
cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
cdef DTYPE_t iw, ih, box_area cdef DTYPE_t iw, ih, box_area
cdef DTYPE_t ua cdef DTYPE_t ua
cdef unsigned int k, n cdef unsigned int k, n
with nogil: with nogil:
for k in range(K): for k in range(K):
box_area = ( box_area = (
(query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
(query_boxes[k, 3] - query_boxes[k, 1] + 1) (query_boxes[k, 3] - query_boxes[k, 1] + 1)
) )
for n in range(N): for n in range(N):
iw = ( iw = (
min(boxes[n, 2], query_boxes[k, 2]) - min(boxes[n, 2], query_boxes[k, 2]) -
max(boxes[n, 0], query_boxes[k, 0]) + 1 max(boxes[n, 0], query_boxes[k, 0]) + 1
) )
if iw > 0: if iw > 0:
ih = ( ih = (
min(boxes[n, 3], query_boxes[k, 3]) - min(boxes[n, 3], query_boxes[k, 3]) -
max(boxes[n, 1], query_boxes[k, 1]) + 1 max(boxes[n, 1], query_boxes[k, 1]) + 1
) )
if ih > 0: if ih > 0:
ua = float( ua = float(
(boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 2] - boxes[n, 0] + 1) *
(boxes[n, 3] - boxes[n, 1] + 1) + (boxes[n, 3] - boxes[n, 1] + 1) +
box_area - iw * ih box_area - iw * ih
) )
overlaps[n, k] = iw * ih / ua overlaps[n, k] = iw * ih / ua
return overlaps return overlaps
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Compile the cython extensions."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from distutils.extension import Extension
from distutils.core import setup
import os
from Cython.Distutils import build_ext
import numpy as np
ext_modules = [
Extension(
'install.lib.utils.cython_bbox',
['cython_bbox.pyx'],
extra_compile_args=['-w'],
include_dirs=[np.get_include()]
),
Extension(
'install.lib.utils.cython_nms',
['cython_nms.pyx'],
extra_compile_args=['-w'],
include_dirs=[np.get_include()]
),
Extension(
'install.lib.pycocotools._mask',
['maskApi.c', '_mask.pyx'],
include_dirs=[np.get_include(), os.path.dirname(os.path.abspath(__file__))],
extra_compile_args=['-w']
),
]
setup(
name='SeetaDet',
ext_modules=ext_modules,
cmdclass={'build_ext': build_ext},
)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.data_transformer import DataTransformer
from lib.datasets.factory import get_imdb
from lib.utils import logger
from lib.utils.blob import im_list_to_blob
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_WORKERS,
})
def __call__(self):
outputs = self.data_batch.get()
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class DataBatch(mp.Process):
"""Prefetch the batch of data."""
def __init__(self, **kwargs):
"""Construct a ``DataBatch``.
Parameters
----------
dataset : lambda
The creator of a dataset.
classes : Sequence[str]
The class names.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=0
The number of chunks to split.
batch_size : int, optional, default=2
The size of a mini-batch.
num_transformers : int, optional, default=3
The number of workers to transform data.
"""
super(DataBatch, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and kwargs.get(
'phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
kwargs['group_size'] = group_size
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1)
self.daemon = True
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size)
self.Q21 = mp.Queue(num_batches * self._batch_size)
self.Q22 = mp.Queue(num_batches * self._batch_size)
self.Q3 = mp.Queue(num_batches)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
num_parts=num_parts, part_idx=part_idx, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.Q1
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._seed += (i + rank * self._num_transformers)
transformer.q_in = self.Q1
transformer.q1_out, transformer.q2_out = self.Q21, self.Q22
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
# Initialize batch-producer
self.start()
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for process in processes:
process.terminate()
process.join()
terminate([self])
logger.info('Terminate DataBatch.')
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def get(self):
"""Get a batch.
Returns
-------
dict
The batch dict.
"""
return self.Q3.get()
def run(self):
"""Start the process to produce batches."""
def produce(q_in):
processed_ims, ims_info, all_boxes = [], [], []
for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
im, im_scale, gt_boxes = q_in.get()
processed_ims.append(im)
ims_info.append(list(im.shape[:2]) + [im_scale])
im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
im_boxes[:, :gt_boxes.shape[1]], im_boxes[:, -1] = gt_boxes, image_index
all_boxes.append(im_boxes)
return {
'data': im_list_to_blob(processed_ims),
'ims_info': np.array(ims_info, dtype=np.float32),
'gt_boxes': np.concatenate(all_boxes, axis=0),
}
# Two queues to implement aspect-grouping
# This is necessary to reduce the gpu memory
# from fetching a huge square batch blob
q1, q2 = self.Q21, self.Q22
# Main prefetch loop
while True:
if q1.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q1))
elif q2.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q2))
q1, q2 = q2, q1 # Uniform sampling trick
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.mask_rcnn.data_transformer import DataTransformer
from lib.datasets.factory import get_imdb
from lib.utils import logger
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import mask_list_to_blob
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_WORKERS,
})
def __call__(self):
outputs = self.data_batch.get()
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class DataBatch(mp.Process):
"""Prefetch the batch of data."""
def __init__(self, **kwargs):
"""Construct a ``DataBatch``.
Parameters
----------
dataset : lambda
The creator of a dataset.
classes : Sequence[str]
The class names.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=0
The number of chunks to split.
batch_size : int, optional, default=2
The size of a mini-batch.
num_transformers : int, optional, default=3
The number of workers to transform data.
"""
super(DataBatch, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and kwargs.get(
'phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
kwargs['group_size'] = group_size
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1)
self.daemon = True
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size)
self.Q21 = mp.Queue(num_batches * self._batch_size)
self.Q22 = mp.Queue(num_batches * self._batch_size)
self.Q3 = mp.Queue(num_batches)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
num_parts=num_parts, part_idx=part_idx, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.Q1
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._seed += (i + rank * self._num_transformers)
transformer.q_in = self.Q1
transformer.q1_out, transformer.q2_out = self.Q21, self.Q22
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
# Initialize batch-producer
self.start()
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for process in processes:
process.terminate()
process.join()
terminate([self])
logger.info('Terminate DataBatch.')
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def get(self):
"""Get a batch.
Returns
-------
dict
The batch dict.
"""
return self.Q3.get()
def run(self):
"""Start the process to produce batches."""
def produce(q_in):
processed_ims, ims_info = [], []
packed_boxes, packed_masks = [], []
for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
im, im_scale, gt_boxes, gt_masks = q_in.get()
processed_ims.append(im)
ims_info.append(list(im.shape[:2]) + [im_scale])
im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
im_boxes[:, :gt_boxes.shape[1]], im_boxes[:, -1] = gt_boxes, image_index
packed_boxes.append(im_boxes)
packed_masks.append(gt_masks)
return {
'data': im_list_to_blob(processed_ims),
'ims_info': np.array(ims_info, 'float32'),
'gt_boxes': np.concatenate(packed_boxes, 0),
'gt_masks': mask_list_to_blob(packed_masks),
}
# Two queues to implement aspect-grouping
# This is necessary to reduce the gpu memory
# from fetching a huge square batch blob
q1, q2 = self.Q21, self.Q22
# Main prefetch loop
while True:
if q1.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q1))
elif q2.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q2))
q1, q2 = q2, q1 # Uniform sampling trick
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import importlib
_STORE = collections.defaultdict(dict)
###########################################
# #
# Body #
# #
###########################################
# ResNet
for D in [18, 34, 50, 101, 152, 200, 269]:
_STORE['BODY']['resnet{}'.format(D)] = \
'lib.modeling.resnet.make_resnet_{}'.format(D)
# VGG
for D in [16, 19]:
for T in ['', '_reduced_300', '_reduced_512']:
_STORE['BODY']['vgg{}{}'.format(D, T)] = \
'lib.modeling.vgg.make_vgg_{}{}'.format(D, T)
# AirNet
for D in ['', '3b', '4b', '5b']:
_STORE['BODY']['airnet{}'.format(D)] = \
'lib.modeling.airnet.make_airnet_{}'.format(D)
# MobileNet
for D in ['a1', 'v2']:
_STORE['BODY']['mobilenet_{}'.format(D)] = \
'lib.modeling.mobilenet.make_mobilenet_{}'.format(D)
def get_template_func(name, sets, desc):
name = name.lower()
if name not in sets:
raise ValueError(
'The {} for {} was not registered.\n'
'Registered modules: [{}]'
.format(name, desc, ', '.join(sets.keys()))
)
module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1]
try:
module = importlib.import_module(module_name)
return getattr(module, func_name)
except ImportError as e:
raise ValueError('Can not import module from: ' + module_name)
def get_body_func(name):
return get_template_func(
name, _STORE['BODY'], 'Body')
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define some basic structures."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch import nn
from lib.core.config import cfg
class Affine(object):
"""Affine transformation with weight and bias fixed."""
def __new__(cls, dim_in, bias=True, inplace=True):
return nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
class Conv1x1(object):
"""1x1 convolution."""
def __new__(cls, dim_in, dim_out, stride=1, bias=False):
return nn.Conv2d(
dim_in,
dim_out,
kernel_size=1,
stride=stride,
bias=bias,
)
class Conv3x3(object):
"""3x3 convolution."""
def __new__(cls, dim_in, dim_out, stride=1, dilation=1, bias=False):
return nn.Conv2d(
dim_in,
dim_out,
kernel_size=3,
stride=stride,
padding=1 * dilation,
bias=bias,
)
class CrossEntropyLoss(object):
"""Cross entropy loss."""
def __new__(cls):
return nn.CrossEntropyLoss(ignore_index=-1)
class Identity(nn.Module):
"""Pass input to the output."""
def __init__(self, *args, **kwargs):
super(Identity, self).__init__()
_, _ = args, kwargs
def forward(self, x):
return x
class SigmoidFocalLoss(object):
"""Sigmoid focal loss."""
def __new__(cls):
return nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
)
class SmoothL1Loss(object):
"""Smoothed l1 loss."""
def __new__(cls, beta=1.):
return nn.SmoothL1Loss(
beta=beta,
reduction='batch_size',
)
def is_conv2d(module):
"""Return a bool indicating the module is a Conv2d."""
return isinstance(module, nn.Conv2d) or \
isinstance(module, nn.DepthwiseConv2d)
AvgPool2d = nn.AvgPool2d
BatchNorm2d = nn.BatchNorm2d
BCEWithLogitsLoss = nn.BCEWithLogitsLoss
Conv2d = nn.Conv2d
ConvTranspose2d = nn.ConvTranspose2d
DepthwiseConv2d = nn.DepthwiseConv2d
Linear = nn.Linear
MaxPool2d = nn.MaxPool2d
Module = nn.Module
ModuleList = nn.ModuleList
Sequential = nn.Sequential
ReLU = nn.ReLU
Sigmoid = nn.Sigmoid
Softmax = nn.Softmax
Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The views and conclusions contained in the software and documentation are those
of the authors and should not be interpreted as representing official policies,
either expressed or implied, of the FreeBSD Project.
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.datasets.factory import get_imdb
from lib.ssd.data_transformer import DataTransformer
from lib.utils import logger
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_WORKERS,
})
def __call__(self):
outputs = self.data_batch.get()
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class DataBatch(mp.Process):
"""Prefetch the batch of data."""
def __init__(self, **kwargs):
"""Construct a ``DataBatch``.
Parameters
----------
dataset : lambda
The creator of a dataset.
classes : Sequence[str]
The class names.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=0
The number of chunks to split.
batch_size : int, optional, default=2
The size of a mini-batch.
num_transformers : int, optional, default=3
The number of workers to transform data.
"""
super(DataBatch, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and kwargs.get(
'phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
kwargs['group_size'] = group_size
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 32)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1)
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size)
self.Q2 = mp.Queue(num_batches * self._batch_size)
self.Q3 = mp.Queue(num_batches)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
num_parts=num_parts, part_idx=part_idx, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.Q1
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._seed += (i + rank * self._num_transformers)
transformer.q_in, transformer.q_out = self.Q1, self.Q2
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
# Initialize batch-producer
self.start()
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for process in processes:
process.terminate()
process.join()
terminate([self])
logger.info('Terminate DataBatch.')
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def get(self):
"""Get a batch.
Returns
-------
dict
The batch dict.
"""
return self.Q3.get()
def run(self):
"""Start the process to produce batches."""
image_batch_shape = (
cfg.TRAIN.IMS_PER_BATCH,
cfg.SSD.RESIZE.HEIGHT,
cfg.SSD.RESIZE.WIDTH, 3,
)
# Main prefetch loop
while True:
boxes_to_pack = []
img, gt_boxes = self.Q2.get()
ims_blob = np.zeros(image_batch_shape, img.dtype)
for i in range(cfg.TRAIN.IMS_PER_BATCH):
ims_blob[i] = img
boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
boxes[:, :gt_boxes.shape[1]], boxes[:, -1] = gt_boxes, i
boxes_to_pack.append(boxes)
if i != (cfg.TRAIN.IMS_PER_BATCH - 1):
img, gt_boxes = self.Q2.get()
self.Q3.put({
'data': ims_blob,
'gt_boxes': np.concatenate(boxes_to_pack),
})
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/ppwwyyxx/tensorpack/blob/master/examples/FasterRCNN/utils/np_box_ops.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from lib.utils import cython_bbox
def intersection(boxes1, boxes2):
"""Compute pairwise intersection areas between boxes.
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes
boxes2: a numpy array with shape [M, 4] holding M boxes
Returns:
a numpy array with shape [N*M] representing pairwise intersection area
"""
[y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
[y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
intersect_heights = np.maximum(
np.zeros(all_pairs_max_ymin.shape),
all_pairs_min_ymax - all_pairs_max_ymin)
all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
intersect_widths = np.maximum(
np.zeros(all_pairs_max_xmin.shape),
all_pairs_min_xmax - all_pairs_max_xmin)
return intersect_heights * intersect_widths
def iou(boxes1, boxes2):
"""Computes pairwise intersection-over-union between box collections.
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding M boxes.
Returns:
a numpy array with shape [N, M] representing pairwise iou scores.
"""
intersect = intersection(boxes1, boxes2)
area1 = boxes_area(boxes1)
area2 = boxes_area(boxes2)
union = \
np.expand_dims(area1, axis=1) + \
np.expand_dims(area2, axis=0) - intersect
return intersect / union
def ioa1(boxes1, boxes2):
"""Computes pairwise intersection-over-area between box collections.
Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
their intersection area over box2's area. Note that ioa is not symmetric,
that is, IOA(box1, box2) != IOA(box2, box1).
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding N boxes.
Returns:
a numpy array with shape [N, M] representing pairwise ioa scores.
"""
intersect = intersection(boxes1, boxes2)
areas = np.expand_dims(boxes_area(boxes1), axis=1)
return intersect / areas
def ioa2(boxes1, boxes2):
"""Computes pairwise intersection-over-area between box collections.
Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
their intersection area over box2's area. Note that ioa is not symmetric,
that is, IOA(box1, box2) != IOA(box2, box1).
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding N boxes.
Returns:
a numpy array with shape [N, M] representing pairwise ioa scores.
"""
intersect = intersection(boxes1, boxes2)
areas = np.expand_dims(boxes_area(boxes2), axis=0)
return intersect / areas
def bbox_overlaps(boxes1, boxes2):
"""Compute the overlaps between two group of boxes."""
return cython_bbox.bbox_overlaps(
np.ascontiguousarray(boxes1, dtype=np.float),
np.ascontiguousarray(boxes2, dtype=np.float),
)
def bbox_transform(ex_rois, gt_rois, weights=(1., 1., 1., 1.)):
"""Transform the boxes to the regression targets."""
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
wx, wy, ww, wh = weights
targets = [wx * (gt_ctr_x - ex_ctr_x) / ex_widths]
targets += [wy * (gt_ctr_y - ex_ctr_y) / ex_heights]
targets += [ww * np.log(gt_widths / ex_widths)]
targets += [wh * np.log(gt_heights / ex_heights)]
return np.vstack(targets).transpose()
def bbox_transform_inv(boxes, deltas, weights=(1., 1., 1., 1.)):
"""Decode the final boxes according to the deltas."""
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.
heights = boxes[:, 3] - boxes[:, 1] + 1.
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
wx, wy, ww, wh = weights
dx = deltas[:, 0::4] / wx
dy = deltas[:, 1::4] / wy
dw = deltas[:, 2::4] / ww
dh = deltas[:, 3::4] / wh
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w # x1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h # y1
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w # x2
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h # y2
return pred_boxes
def boxes_area(boxes):
"""Compute the area of an array of boxes."""
w = (boxes[:, 2] - boxes[:, 0] + 1)
h = (boxes[:, 3] - boxes[:, 1] + 1)
areas = w * h
assert np.all(areas >= 0), 'Negative areas founds'
return areas
def clip_boxes(boxes, im_shape):
# x1 >= 0
boxes[:, 0] = np.maximum(np.minimum(boxes[:, 0], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1] = np.maximum(np.minimum(boxes[:, 1], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2] = np.maximum(np.minimum(boxes[:, 2], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3] = np.maximum(np.minimum(boxes[:, 3], im_shape[0] - 1), 0)
return boxes
def clip_tiled_boxes(boxes, im_shape):
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
def expand_boxes(boxes, scale):
"""Expand an array of boxes by a given scale."""
w_half = (boxes[:, 2] - boxes[:, 0]) * .5
h_half = (boxes[:, 3] - boxes[:, 1]) * .5
x_c = (boxes[:, 2] + boxes[:, 0]) * .5
y_c = (boxes[:, 3] + boxes[:, 1]) * .5
w_half *= scale
h_half *= scale
boxes_exp = np.zeros(boxes.shape)
boxes_exp[:, 0] = x_c - w_half
boxes_exp[:, 2] = x_c + w_half
boxes_exp[:, 1] = y_c - h_half
boxes_exp[:, 3] = y_c + h_half
return boxes_exp
def flip_boxes(boxes, width):
"""Flip the boxes horizontally."""
flip_boxes = boxes.copy()
old_x1 = boxes[:, 0].copy()
old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - old_x1 - 1
return flip_boxes
def filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min size."""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
def dismantle_boxes(gt_boxes, num_images):
"""Dismantle the packed ground-truth boxes."""
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == i)[0]
][:, :-1] for i in range(num_images)
]
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon
from dragon.core.framework import tensor_util
from dragon.core.util import six
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
def feed_tensor(tensor, array):
tensor_util.set_array(tensor, array)
def get_param_groups(module, bias_lr=1., bias_decay=0.):
"""Separate weight and bias into parameters groups.
Parameters
----------
module : dragon.vm.torch.nn.Module
The module to collect parameters.
bias_lr : float, optional, default=1.
The lr multiplier of bias.
bias_decay : float, optional, default=0.
The decay multiplier of bias.
Returns
-------
Sequence[ParamGroup]
The parameter groups.
"""
param_groups = [
{
'params': [],
'lr_mult': 1.,
'decay_mult': 1.,
},
{
'params': [],
'lr_mult': bias_lr,
'decay_mult': bias_decay,
}
]
for name, param in module.named_parameters():
gi = 0 if 'weight' in name and param.dim() > 1 else 1
param_groups[gi]['params'].append(param)
if len(param_groups[1]['params']) == 0:
param_groups.pop() # Remove empty group
return param_groups
def get_workspace():
"""Return the current default workspace.
Returns
-------
dragon.Workspace
The default workspace.
"""
return dragon.get_workspace()
def new_placeholder(device=None):
"""Create a new tensor to feed data.
Parameters
----------
device : int, optional
The device index.
Returns
-------
dragon.vm.torch.Tensor
The placeholder tensor.
"""
value = torch.zeros(1)
if device is not None:
return value.cuda(device)
return value
def new_tensor(data, enforce_cpu=False):
"""Create a new tensor from the data.
Parameters
----------
data : array_like
The data value.
enforce_cpu : bool, optional, default=False
**True** to enforce the cpu storage.
Returns
-------
dragon.vm.torch.Tensor
The tensor taken with the data.
"""
if isinstance(data, np.ndarray):
tensor = torch.from_numpy(data)
elif isinstance(data, torch.Tensor):
tensor = data
else:
tensor = torch.tensor(data)
if not enforce_cpu:
tensor = tensor.cuda(cfg.GPU_ID)
return tensor
def new_workspace(merge_default=True):
"""Create a new workspace.
Parameters
----------
merge_default : bool, optional, default=True
**True** to merge tensors from default workspace.
Returns
-------
dragon.Workspace
The new workspace.
"""
workspace = dragon.Workspace()
if merge_default:
workspace.merge_from(get_workspace())
return workspace
def reset_workspace(workspace=None, merge_default=True):
"""Reset a workspace and return a new one.
Parameters
----------
workspace : dragon.Workspace, optional
The workspace to reset.
merge_default : bool, optional, default=True
**True** to merge tensors from default workspace.
Returns
-------
dragon.Workspace
The new workspace.
"""
if workspace is not None:
workspace.Clear() # Block the GIL
return new_workspace(merge_default)
class Graph(object):
"""Simple sequential graph to accelerate inference.
Graph reduces the overhead of python functions
under eager execution. Such cost will be at least 15ms
for common backbones, which limits to about 60FPS.
For more details, see the eager mechanism of Dragon.
"""
def __init__(self, inputs, outputs, constants=None):
def canonicalize(input_dict):
if input_dict is None:
return {}
for k, v in input_dict.items():
input_dict[k] = v.name if hasattr(v, 'name') else v
return input_dict
self.placeholders = {}
self._inputs = canonicalize(inputs)
self._outputs = canonicalize(outputs)
self._constants = canonicalize(constants)
self._workspace = get_workspace()
self._tracer = torch.jit.get_tracer()
@property
def workspace(self):
return self._workspace
@workspace.setter
def workspace(self, value):
self._workspace = value
def forward(self, **kwargs):
# Assign inputs
for name, tensor in self._inputs.items():
value = kwargs.get(name, None)
tensor_util.set_array(tensor, value)
# Replay the traced expressions
self._tracer.replay()
# Collect outputs
# 1) Target results
# 2) Constant values
outputs = collections.OrderedDict()
for name, tensor in self._outputs.items():
outputs[name] = tensor_util.to_array(tensor, True)
for name, value in self._constants.items():
outputs[name] = value
return outputs
def __call__(self, **kwargs):
with self._workspace.as_default():
return self.forward(**kwargs)
# Aliases
pickle = six.moves.pickle
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
"""Make record file for COCO dataset.""" """Make record file for COCO dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
import shutil import shutil
...@@ -37,8 +41,8 @@ if __name__ == '__main__': ...@@ -37,8 +41,8 @@ if __name__ == '__main__':
record_file=os.path.join(COCO_ROOT, 'coco_2014_trainval35k'), record_file=os.path.join(COCO_ROOT, 'coco_2014_trainval35k'),
images_path=[os.path.join(COCO_ROOT, 'images/train2014'), images_path=[os.path.join(COCO_ROOT, 'images/train2014'),
os.path.join(COCO_ROOT, 'images/val2014')], os.path.join(COCO_ROOT, 'images/val2014')],
splits_path=[os.path.join(COCO_ROOT, 'ImageSets'), splits_path=[os.path.join(COCO_ROOT, 'splits'),
os.path.join(COCO_ROOT, 'ImageSets')], os.path.join(COCO_ROOT, 'splits')],
mask_file='build/coco_2014_trainval35k_mask.pkl', mask_file='build/coco_2014_trainval35k_mask.pkl',
splits=['train', 'valminusminival'], splits=['train', 'valminusminival'],
) )
...@@ -48,7 +52,7 @@ if __name__ == '__main__': ...@@ -48,7 +52,7 @@ if __name__ == '__main__':
record_file=os.path.join(COCO_ROOT, 'coco_2014_minival'), record_file=os.path.join(COCO_ROOT, 'coco_2014_minival'),
images_path=os.path.join(COCO_ROOT, 'images/val2014'), images_path=os.path.join(COCO_ROOT, 'images/val2014'),
mask_file='build/coco_2014_minival_mask.pkl', mask_file='build/coco_2014_minival_mask.pkl',
splits_path=os.path.join(COCO_ROOT, 'ImageSets'), splits_path=os.path.join(COCO_ROOT, 'splits'),
splits=['minival'], splits=['minival'],
) )
......
...@@ -86,7 +86,7 @@ def make_record( ...@@ -86,7 +86,7 @@ def make_record(
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
writer = dragon.io.SeetaRecordWriter( writer = dragon.io.KPLRecordWriter(
path=record_file, path=record_file,
protocol={ protocol={
'id': 'string', 'id': 'string',
...@@ -133,6 +133,6 @@ def make_record( ...@@ -133,6 +133,6 @@ def make_record(
writer.close() writer.close()
end_time = time.time() end_time = time.time()
data_size = os.path.getsize(record_file + '/data.data') * 1e-6 data_size = os.path.getsize(record_file + '/root.data') * 1e-6
print('{} images take {:.2f} MB in {:.2f} sec.' print('{} images take {:.2f} MB in {:.2f} sec.'
.format(total_line, data_size, end_time - start_time)) .format(total_line, data_size, end_time - start_time))
...@@ -20,11 +20,11 @@ except: ...@@ -20,11 +20,11 @@ except:
import pickle as cPickle import pickle as cPickle
sys.path.insert(0, '../..') sys.path.insert(0, '../..')
from lib.pycocotools.coco import COCO from seetadet.pycocotools.coco import COCO
from lib.pycocotools import mask_utils from seetadet.pycocotools import mask_utils
class imdb(object): class COCOWrapper(object):
def __init__(self, image_set, year, data_dir): def __init__(self, image_set, year, data_dir):
self._year = year self._year = year
self._image_set = image_set self._image_set = image_set
...@@ -120,8 +120,6 @@ class imdb(object): ...@@ -120,8 +120,6 @@ class imdb(object):
# running out of the image bound # running out of the image bound
# Do not use them or decoding error is inevitable # Do not use them or decoding error is inevitable
mask_bytes = mask_utils.poly2bytes(obj['segmentation'], height, width) mask_bytes = mask_utils.poly2bytes(obj['segmentation'], height, width)
if not isinstance(mask_bytes, bytes):
print(type(mask_bytes))
if obj['area'] > 0 and x2 > x1 and y2 > y1: if obj['area'] > 0 and x2 > x1 and y2 > y1:
obj['clean_bbox'] = [x1, y1, x2, y2] obj['clean_bbox'] = [x1, y1, x2, y2]
valid_objects.append({ valid_objects.append({
...@@ -146,10 +144,11 @@ class imdb(object): ...@@ -146,10 +144,11 @@ class imdb(object):
def make_mask(split, year, data_dir): def make_mask(split, year, data_dir):
coco = imdb(split, year, data_dir) coco = COCOWrapper(split, year, data_dir)
print('Preparing to make split: {}, total {} images'.format(split, coco.num_images)) print('Preparing to make split: {}, total {} images'
if not osp.exists(osp.join(coco._data_path, 'ImageSets')): .format(split, coco.num_images))
os.makedirs(osp.join(coco._data_path, 'ImageSets')) if not osp.exists(osp.join(coco._data_path, 'splits')):
os.makedirs(osp.join(coco._data_path, 'splits'))
gt_recs = OrderedDict() gt_recs = OrderedDict()
for i in range(coco.num_images): for i in range(coco.num_images):
...@@ -157,14 +156,14 @@ def make_mask(split, year, data_dir): ...@@ -157,14 +156,14 @@ def make_mask(split, year, data_dir):
h, w, objects = coco.annotation_at(i) h, w, objects = coco.annotation_at(i)
gt_recs[filename] = objects gt_recs[filename] = objects
with open(osp.join('build', with open(osp.join('build', 'coco_' + year + '_' + split + '_mask.pkl'), 'wb') as f:
'coco_' + year + '_' + split + '_mask.pkl'), 'wb') as f: cPickle.dump(gt_recs, f, cPickle.HIGHEST_PROTOCOL)
cPickle.dump(gt_recs, f, cPickle.HIGHEST_PROTOCOL)
with open(osp.join(coco._data_path, 'ImageSets', split + '.txt'), 'w') as f: with open(osp.join(coco._data_path, 'splits', split + '.txt'), 'w') as f:
for i in range(coco.num_images): for i in range(coco.num_images):
filename = (coco.image_path_at(i).split('/')[-1]).split('.')[0] filename = (coco.image_path_at(i).split('/')[-1]).split('.')[0]
if i != coco.num_images - 1: filename += '\n' if i != coco.num_images - 1:
filename += '\n'
f.write(filename) f.write(filename)
......
...@@ -26,6 +26,6 @@ if __name__ == '__main__': ...@@ -26,6 +26,6 @@ if __name__ == '__main__':
record_file=osp.join(data_root, 'rotated_train'), record_file=osp.join(data_root, 'rotated_train'),
images_path=[osp.join(data_root, 'JPEGImages')], images_path=[osp.join(data_root, 'JPEGImages')],
annotations_path=[osp.join(data_root, 'Annotations')], annotations_path=[osp.join(data_root, 'Annotations')],
imagesets_path=[osp.join(data_root, 'ImageSets')], splits_path=[osp.join(data_root, 'ImageSets')],
splits=['train'] splits=['train']
) )
...@@ -57,7 +57,7 @@ def make_record( ...@@ -57,7 +57,7 @@ def make_record(
record_file, record_file,
images_path, images_path,
annotations_path, annotations_path,
imagesets_path, splits_path,
splits splits
): ):
if os.path.exists(record_file): if os.path.exists(record_file):
...@@ -68,15 +68,15 @@ def make_record( ...@@ -68,15 +68,15 @@ def make_record(
images_path = [images_path] images_path = [images_path]
if not isinstance(annotations_path, list): if not isinstance(annotations_path, list):
annotations_path = [annotations_path] annotations_path = [annotations_path]
if not isinstance(imagesets_path, list): if not isinstance(splits_path, list):
imagesets_path = [imagesets_path] splits_path = [splits_path]
assert len(splits) == len(imagesets_path) assert len(splits) == len(splits_path)
assert len(splits) == len(images_path) assert len(splits) == len(images_path)
assert len(splits) == len(annotations_path) assert len(splits) == len(annotations_path)
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
writer = dragon.io.SeetaRecordWriter( writer = dragon.io.KPLRecordWriter(
path=record_file, path=record_file,
protocol={ protocol={
'id': 'string', 'id': 'string',
...@@ -99,31 +99,37 @@ def make_record( ...@@ -99,31 +99,37 @@ def make_record(
} }
) )
count, total_line = 0, 0 # Scan all available entries
start_time = time.time() print('Scan entries...')
entries = []
for db_idx, split in enumerate(splits): for i, split in enumerate(splits):
split_file = os.path.join(imagesets_path[db_idx], split + '.txt') split_file = os.path.join(splits_path[i], split + '.txt')
assert os.path.exists(split_file)
with open(split_file, 'r') as f: with open(split_file, 'r') as f:
lines = f.readlines() lines = f.readlines()
total_line += len(lines)
for line in lines: for line in lines:
count += 1
if count % 2000 == 0:
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(
count, total_line, now_time - start_time))
filename = line.strip() filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + '.jpg') img_file = os.path.join(images_path[i], filename + '.jpg')
xml_file = os.path.join(annotations_path[db_idx], filename + '.xml') ann_file = os.path.join(annotations_path[i], filename + '.xml')
writer.write(make_example(image_file, xml_file)) entries.append((img_file, ann_file))
# Parse and write into record file
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
start_time = time.time()
for i, (img_file, ann_file) in enumerate(entries):
if i > 0 and i % 2000 == 0:
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(
i, len(entries), now_time - start_time))
writer.write(make_example(img_file, ann_file))
now_time = time.time() now_time = time.time()
print('{} / {} in {:.2f} sec'.format(count, total_line, now_time - start_time)) print('{} / {} in {:.2f} sec'.format(
len(entries), len(entries), now_time - start_time))
writer.close() writer.close()
end_time = time.time() end_time = time.time()
data_size = os.path.getsize(record_file + '/data.data') * 1e-6 data_size = os.path.getsize(record_file + '/root.data') * 1e-6
print('{} images take {:.2f} MB in {:.2f} sec.' print('{} images take {:.2f} MB in {:.2f} sec.'
.format(total_line, data_size, end_time - start_time)) .format(len(entries), data_size, end_time - start_time))
...@@ -28,7 +28,7 @@ if __name__ == '__main__': ...@@ -28,7 +28,7 @@ if __name__ == '__main__':
osp.join(voc_root, 'VOCdevkit2012/VOC2012/JPEGImages')], osp.join(voc_root, 'VOCdevkit2012/VOC2012/JPEGImages')],
annotations_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'), annotations_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/Annotations')], osp.join(voc_root, 'VOCdevkit2012/VOC2012/Annotations')],
imagesets_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'), splits_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/ImageSets/Main')], osp.join(voc_root, 'VOCdevkit2012/VOC2012/ImageSets/Main')],
splits=['trainval', 'trainval'] splits=['trainval', 'trainval']
) )
...@@ -37,6 +37,6 @@ if __name__ == '__main__': ...@@ -37,6 +37,6 @@ if __name__ == '__main__':
record_file=osp.join(voc_root, 'voc_2007_test'), record_file=osp.join(voc_root, 'voc_2007_test'),
images_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'), images_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'),
annotations_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'), annotations_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
imagesets_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'), splits_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
splits=['test'] splits=['test']
) )
...@@ -26,11 +26,17 @@ def make_example(image_file, xml_file): ...@@ -26,11 +26,17 @@ def make_example(image_file, xml_file):
tree = ET.parse(xml_file) tree = ET.parse(xml_file)
filename = os.path.split(xml_file)[-1] filename = os.path.split(xml_file)[-1]
objs = tree.findall('object') objs = tree.findall('object')
size = tree.find('size')
example = {'id': filename.split('.')[0], 'object': []} example = {'id': filename.split('.')[0], 'object': []}
with open(image_file, 'rb') as f: with open(image_file, 'rb') as f:
img_bytes = bytes(f.read()) img_bytes = bytes(f.read())
img = cv2.imdecode(np.frombuffer(img_bytes, 'uint8'), 1) if size is not None:
example['height'], example['width'], example['depth'] = img.shape example['height'] = int(size.find('height').text)
example['width'] = int(size.find('width').text)
example['depth'] = int(size.find('depth').text)
else:
img = cv2.imdecode(np.frombuffer(img_bytes, 'uint8'), 3)
example['height'], example['width'], example['depth'] = img.shape
example['content'] = img_bytes example['content'] = img_bytes
for ix, obj in enumerate(objs): for ix, obj in enumerate(objs):
bbox = obj.find('bndbox') bbox = obj.find('bndbox')
...@@ -53,7 +59,7 @@ def make_record( ...@@ -53,7 +59,7 @@ def make_record(
record_file, record_file,
images_path, images_path,
annotations_path, annotations_path,
imagesets_path, splits_path,
splits splits
): ):
if os.path.exists(record_file): if os.path.exists(record_file):
...@@ -64,15 +70,13 @@ def make_record( ...@@ -64,15 +70,13 @@ def make_record(
images_path = [images_path] images_path = [images_path]
if not isinstance(annotations_path, list): if not isinstance(annotations_path, list):
annotations_path = [annotations_path] annotations_path = [annotations_path]
if not isinstance(imagesets_path, list): if not isinstance(splits_path, list):
imagesets_path = [imagesets_path] splits_path = [splits_path]
assert len(splits) == len(imagesets_path) assert len(splits) == len(splits_path)
assert len(splits) == len(images_path) assert len(splits) == len(images_path)
assert len(splits) == len(annotations_path) assert len(splits) == len(annotations_path)
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) writer = dragon.io.KPLRecordWriter(
writer = dragon.io.SeetaRecordWriter(
path=record_file, path=record_file,
protocol={ protocol={
'id': 'string', 'id': 'string',
...@@ -91,31 +95,36 @@ def make_record( ...@@ -91,31 +95,36 @@ def make_record(
} }
) )
count, total_line = 0, 0 # Scan all available entries
start_time = time.time() print('Scan entries...')
entries = []
for db_idx, split in enumerate(splits): for i, split in enumerate(splits):
split_file = os.path.join(imagesets_path[db_idx], split + '.txt') split_file = os.path.join(splits_path[i], split + '.txt')
assert os.path.exists(split_file)
with open(split_file, 'r') as f: with open(split_file, 'r') as f:
lines = f.readlines() lines = f.readlines()
total_line += len(lines)
for line in lines: for line in lines:
count += 1
if count % 2000 == 0:
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(
count, total_line, now_time - start_time))
filename = line.strip() filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + '.jpg') img_file = os.path.join(images_path[i], filename + '.jpg')
xml_file = os.path.join(annotations_path[db_idx], filename + '.xml') ann_file = os.path.join(annotations_path[i], filename + '.xml')
writer.write(make_example(image_file, xml_file)) entries.append((img_file, ann_file))
# Parse and write into record file
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
start_time = time.time()
for i, (img_file, ann_file) in enumerate(entries):
if i > 0 and i % 2000 == 0:
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(
i, len(entries), now_time - start_time))
writer.write(make_example(img_file, ann_file))
now_time = time.time() now_time = time.time()
print('{} / {} in {:.2f} sec'.format(count, total_line, now_time - start_time)) print('{} / {} in {:.2f} sec'.format(
len(entries), len(entries), now_time - start_time))
writer.close() writer.close()
end_time = time.time() end_time = time.time()
data_size = os.path.getsize(record_file + '/data.data') * 1e-6 data_size = os.path.getsize(record_file + '/root.data') * 1e-6
print('{} images take {:.2f} MB in {:.2f} sec.' print('{} images take {:.2f} MB in {:.2f} sec.'
.format(total_line, data_size, end_time - start_time)) .format(len(entries), data_size, end_time - start_time))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from seetadet.algo.faster_rcnn.anchor_target import AnchorTarget
from seetadet.algo.faster_rcnn.data_loader import DataLoader
from seetadet.algo.faster_rcnn.proposal import Proposal
from seetadet.algo.faster_rcnn.proposal_target import ProposalTarget
from seetadet.algo.faster_rcnn.utils import generate_grid_anchors
from seetadet.algo.faster_rcnn.utils import map_blobs_by_levels
from seetadet.algo.faster_rcnn.utils import map_rois_to_levels
from seetadet.algo.faster_rcnn.utils import map_returns_to_blobs
...@@ -16,11 +16,11 @@ from __future__ import print_function ...@@ -16,11 +16,11 @@ from __future__ import print_function
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
from lib.core.config import cfg from seetadet.algo.faster_rcnn.generate_anchors import generate_anchors
from lib.faster_rcnn.generate_anchors import generate_anchors from seetadet.algo.faster_rcnn.utils import generate_grid_anchors
from lib.faster_rcnn.utils import generate_grid_anchors from seetadet.core.config import cfg
from lib.utils import boxes as box_util from seetadet.utils import boxes as box_util
from lib.utils.framework import new_tensor from seetadet.utils.env import new_tensor
class AnchorTarget(object): class AnchorTarget(object):
...@@ -62,9 +62,7 @@ class AnchorTarget(object): ...@@ -62,9 +62,7 @@ class AnchorTarget(object):
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care # Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
labels_wide = -np.ones((num_images, num_anchors,), 'float32') labels_wide = -np.ones((num_images, num_anchors,), 'float32')
bbox_targets_wide = np.zeros((num_images, num_anchors, 4), 'float32') bbox_indices_wide, bbox_anchors_wide, bbox_targets_wide = [], [], []
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
for ix in range(num_images): for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label, ...) # GT boxes (x1, y1, x2, y2, label, ...)
...@@ -95,13 +93,13 @@ class AnchorTarget(object): ...@@ -95,13 +93,13 @@ class AnchorTarget(object):
np.arange(overlaps.shape[1])] np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
# fg label: for each gt, anchor with highest overlap # Foreground: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1 labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU # Foreground: above threshold IoU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
# bg label: below threshold IOU # Background: below threshold IoU
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many # Subsample positive labels if we have too many
...@@ -112,6 +110,11 @@ class AnchorTarget(object): ...@@ -112,6 +110,11 @@ class AnchorTarget(object):
labels[disable_inds] = -1 labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0] fg_inds = np.where(labels == 1)[0]
# Retract the clamping if we don't have one
if len(fg_inds) == 0:
labels[gt_argmax_overlaps] = 1
fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many # Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0] bg_inds = np.where(labels == 0)[0]
...@@ -119,51 +122,27 @@ class AnchorTarget(object): ...@@ -119,51 +122,27 @@ class AnchorTarget(object):
disable_inds = npr.choice(bg_inds, len(bg_inds) - num_bg, False) disable_inds = npr.choice(bg_inds, len(bg_inds) - num_bg, False)
labels[disable_inds] = -1 labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), 'float32') labels_wide[ix, inds_inside] = labels
bbox_targets[fg_inds, :] = \ bbox_anchors_wide.append(anchors[fg_inds])
bbox_indices_wide.append(inds_inside[fg_inds] + (num_anchors * ix))
bbox_targets_wide.append(
box_util.bbox_transform( box_util.bbox_transform(
anchors[fg_inds, :], anchors[fg_inds],
gt_boxes[argmax_overlaps[fg_inds], :4], gt_boxes[argmax_overlaps[fg_inds], :4],
) )
bbox_inside_weights = np.zeros((num_inside, 4), 'float32') )
bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), 'float32') if self.num_strides == 1:
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
labels_wide[ix, inds_inside] = labels # label
bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
if self.num_strides > 1:
labels = labels_wide.reshape((num_images, num_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
else:
A = self.base_anchors[0].shape[0] A = self.base_anchors[0].shape[0]
height, width = features[0].shape[-2:] height, width = features[0].shape[-2:]
labels = labels_wide \ labels_wide = labels_wide \
.reshape((num_images, height, width, A)) \ .reshape((num_images, height, width, A)) \
.transpose(0, 3, 1, 2) \ .transpose(0, 3, 1, 2) \
.reshape((num_images, num_anchors)) .reshape((num_images, num_anchors))
bbox_targets = bbox_targets_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_inside_weights = bbox_inside_weights_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_outside_weights = bbox_outside_weights_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
return { return {
'labels': new_tensor(labels), 'labels': new_tensor(labels_wide),
'bbox_targets': new_tensor(bbox_targets), 'bbox_indices': new_tensor(np.concatenate(bbox_indices_wide)),
'bbox_inside_weights': new_tensor(bbox_inside_weights), 'bbox_targets': new_tensor(np.concatenate(bbox_targets_wide).astype('float32')),
'bbox_outside_weights': new_tensor(bbox_outside_weights), 'bbox_anchors': new_tensor(np.concatenate(bbox_anchors_wide).astype('float32')),
} }
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from seetadet.algo.faster_rcnn import data_transformer
from seetadet.core.config import cfg
from seetadet.datasets.factory import get_dataset
from seetadet.utils import logger
from seetadet.utils.blob import im_list_to_blob
class DataLoader(object):
"""Load mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
dataset = get_dataset(cfg.TRAIN.DATASET)
if cfg.USE_DALI:
from seetadet.dali import rcnn_pipeline as pipe
self.iterator = pipe.new_iterator(dataset.source)
else:
self.iterator = Iterator(**{
'dataset': dataset.cls,
'source': dataset.source,
'classes': dataset.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_THREADS - 1,
})
def __call__(self):
outputs = self.iterator.next()
if isinstance(outputs['data'], np.ndarray):
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class Iterator(mp.Process):
"""Iterator to return the batch of data."""
def __init__(self, **kwargs):
super(Iterator, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and \
kwargs.get('phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self.daemon = True
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.q_in = mp.Queue(num_batches * self._batch_size)
self.q1_out = mp.Queue(num_batches * self._batch_size)
self.q2_out = mp.Queue(num_batches * self._batch_size)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
part_idx=part_idx, num_parts=num_parts, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.q_in
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
p = data_transformer.DataTransformer(**kwargs)
p._seed += (i + rank * self._num_transformers)
p.q_in = self.q_in
p.q1_out, p.q2_out = self.q1_out, self.q2_out
p.start()
self._transformers.append(p)
time.sleep(0.1)
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for p in processes:
p.terminate()
p.join()
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def next(self):
"""Return the next batch of data."""
return self.__next__()
def __iter__(self):
"""Return the iterator self."""
return self
def __next__(self):
"""Return the next batch of data."""
q_out = None
# Two queues to implement aspect-grouping
# This is necessary to reduce the gpu memory
# from fetching a huge square batch blob
while q_out is None:
if self.q1_out.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
q_out = self.q1_out
elif self.q2_out.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
q_out = self.q2_out
self.q1_out, self.q2_out = self.q2_out, self.q1_out
images, images_info, boxes_to_pack = [], [], []
for i in range(cfg.TRAIN.IMS_PER_BATCH):
image, image_scale, boxes = q_out.get()
images.append(image)
images_info.append(list(image.shape[:2]) + [image_scale])
gt_boxes = np.zeros((boxes.shape[0], boxes.shape[1] + 1), 'float32')
gt_boxes[:, :boxes.shape[1]], gt_boxes[:, -1] = boxes, i
boxes_to_pack.append(gt_boxes)
return {
'data': im_list_to_blob(images),
'ims_info': np.array(images_info, dtype=np.float32),
'gt_boxes': np.concatenate(boxes_to_pack),
}
...@@ -15,19 +15,19 @@ from __future__ import print_function ...@@ -15,19 +15,19 @@ from __future__ import print_function
import multiprocessing import multiprocessing
import cv2
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.datasets.example import Example from seetadet.datasets.example import Example
from lib.utils import boxes as box_util from seetadet.utils import boxes as box_util
from lib.utils.blob import prep_im_for_blob from seetadet.utils.blob import prep_im_for_blob
from lib.utils.image import get_image_with_target_size
class DataTransformer(multiprocessing.Process): class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(DataTransformer, self).__init__() super(DataTransformer, self).__init__()
self._scales = cfg.TRAIN.SCALES
self._max_size = cfg.TRAIN.MAX_SIZE
self._seed = cfg.RNG_SEED self._seed = cfg.RNG_SEED
self._use_flipped = cfg.TRAIN.USE_FLIPPED self._use_flipped = cfg.TRAIN.USE_FLIPPED
self._use_diff = cfg.TRAIN.USE_DIFF self._use_diff = cfg.TRAIN.USE_DIFF
...@@ -37,13 +37,7 @@ class DataTransformer(multiprocessing.Process): ...@@ -37,13 +37,7 @@ class DataTransformer(multiprocessing.Process):
self.q_in = self.q1_out = self.q2_out = None self.q_in = self.q1_out = self.q2_out = None
self.daemon = True self.daemon = True
def make_roi_dict( def make_roi_dict(self, example, im_scale, apply_flip=False):
self,
example,
im_scale,
apply_flip=False,
offsets=None,
):
objects, n_objects = example.objects, 0 objects, n_objects = example.objects, 0
height, width = example.height, example.width height, width = example.height, example.width
if not self._use_diff: if not self._use_diff:
...@@ -86,15 +80,6 @@ class DataTransformer(multiprocessing.Process): ...@@ -86,15 +80,6 @@ class DataTransformer(multiprocessing.Process):
# Scale the boxes to the detecting scale # Scale the boxes to the detecting scale
roi_dict['boxes'] *= im_scale roi_dict['boxes'] *= im_scale
# Apply the offsets from scale jitter
if offsets is not None:
roi_dict['boxes'][:, 0::2] += offsets[0]
roi_dict['boxes'][:, 1::2] += offsets[1]
roi_dict['boxes'][:, :] = np.minimum(
np.maximum(roi_dict['boxes'][:, :], 0),
[offsets[2][1] - 1, offsets[2][0] - 1] * 2,
)
return roi_dict return roi_dict
def get(self, example): def get(self, example):
...@@ -102,9 +87,8 @@ class DataTransformer(multiprocessing.Process): ...@@ -102,9 +87,8 @@ class DataTransformer(multiprocessing.Process):
img = example.image img = example.image
# Scale # Scale
max_size = cfg.TRAIN.MAX_SIZE target_size = self._scales[np.random.randint(len(self._scales))]
target_size = cfg.TRAIN.SCALES[np.random.randint(len(cfg.TRAIN.SCALES))] img, im_scale = prep_im_for_blob(img, target_size, self._max_size)
img, im_scale, jitter = prep_im_for_blob(img, target_size, max_size)
# Flip # Flip
apply_flip = False apply_flip = False
...@@ -113,19 +97,8 @@ class DataTransformer(multiprocessing.Process): ...@@ -113,19 +97,8 @@ class DataTransformer(multiprocessing.Process):
img = img[:, ::-1] img = img[:, ::-1]
apply_flip = True apply_flip = True
# Random Crop or RandomPad
offsets = None
if cfg.TRAIN.MAX_SIZE > 0:
if jitter != 1:
# To a rectangle (scale, max_size)
target_size = (np.array(img.shape[:2]) / jitter).astype(np.int32)
img, offsets = get_image_with_target_size(target_size, img)
else:
# To a square (target_size, target_size)
img, offsets = get_image_with_target_size([target_size] * 2, img)
# Example -> RoIDict # Example -> RoIDict
roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets) roi_dict = self.make_roi_dict(example, im_scale, apply_flip)
# Post-Process for gt boxes # Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}] # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
......
...@@ -17,11 +17,11 @@ import collections ...@@ -17,11 +17,11 @@ import collections
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.algo.faster_rcnn.generate_anchors import generate_anchors
from lib.faster_rcnn.generate_anchors import generate_anchors from seetadet.algo.faster_rcnn.utils import generate_grid_anchors
from lib.faster_rcnn.utils import generate_grid_anchors from seetadet.core.config import cfg
from lib.nms import nms_wrapper from seetadet.utils import boxes as box_util
from lib.utils import boxes as box_util from seetadet.utils import nms
class Proposal(object): class Proposal(object):
...@@ -67,8 +67,8 @@ class Proposal(object): ...@@ -67,8 +67,8 @@ class Proposal(object):
# Prepare for the outputs # Prepare for the outputs
batch_rois = [] batch_rois = []
cls_prob = cls_prob.numpy(True) cls_prob = cls_prob.numpy()
bbox_pred = bbox_pred.numpy(True) bbox_pred = bbox_pred.numpy()
if self.num_strides > 1: if self.num_strides > 1:
# (?, 4, A * K) -> (?, A * K, 4) # (?, 4, A * K) -> (?, A * K, 4)
bbox_pred = bbox_pred.transpose((0, 2, 1)) bbox_pred = bbox_pred.transpose((0, 2, 1))
...@@ -113,7 +113,7 @@ class Proposal(object): ...@@ -113,7 +113,7 @@ class Proposal(object):
# Apply nms (e.g. threshold = 0.7) # Apply nms (e.g. threshold = 0.7)
# Take after_nms_topN (e.g. 300) # Take after_nms_topN (e.g. 300)
# Return the top proposals (-> RoIs top) # Return the top proposals (-> RoIs top)
keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh) keep = nms.gpu_nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_top_n > 0: if post_nms_top_n > 0:
keep = keep[:post_nms_top_n] keep = keep[:post_nms_top_n]
proposals = proposals[keep, :] proposals = proposals[keep, :]
......
...@@ -18,12 +18,10 @@ import collections ...@@ -18,12 +18,10 @@ import collections
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
from lib.core.config import cfg from seetadet.algo.faster_rcnn import utils as rcnn_util
from lib.faster_rcnn.utils import map_blobs_to_outputs from seetadet.core.config import cfg
from lib.faster_rcnn.utils import map_returns_to_blobs from seetadet.utils import boxes as box_util
from lib.faster_rcnn.utils import map_rois_to_levels from seetadet.utils.env import new_tensor
from lib.utils import boxes as box_util
from lib.utils.framework import new_tensor
class ProposalTarget(object): class ProposalTarget(object):
...@@ -35,10 +33,8 @@ class ProposalTarget(object): ...@@ -35,10 +33,8 @@ class ProposalTarget(object):
self.num_classes = cfg.MODEL.NUM_CLASSES self.num_classes = cfg.MODEL.NUM_CLASSES
self.defaults = collections.OrderedDict([ self.defaults = collections.OrderedDict([
('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')), ('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
('labels', np.array([-1], 'float32')), ('labels', np.array([-1], 'int64')),
('bbox_targets', np.zeros((1, self.num_classes * 4), 'float32')), ('bbox_targets', np.zeros((1, 4), 'float32')),
('bbox_inside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_outside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
]) ])
def __call__(self, rpn_rois, gt_boxes): def __call__(self, rpn_rois, gt_boxes):
...@@ -63,86 +59,65 @@ class ProposalTarget(object): ...@@ -63,86 +59,65 @@ class ProposalTarget(object):
# Sample a batch of RoIs for training # Sample a batch of RoIs for training
rois_per_image = cfg.TRAIN.BATCH_SIZE rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
map_returns_to_blobs( rcnn_util.map_returns_to_blobs(
sample_rois( sample_rois(
rois, rois,
gt_boxes, gt_boxes,
rois_per_image, rois_per_image,
fg_rois_per_image, fg_rois_per_image,
self.num_classes,
), blobs, keys, ), blobs, keys,
) )
# Stack into continuous blobs # Stack into continuous blobs
for k, v in blobs.items(): blobs = dict((k, np.concatenate(blobs[k])) for k in blobs.keys())
blobs[k] = np.concatenate(blobs[k], 0)
if self.num_strides > 1: if self.num_strides > 1:
# Distribute RoIs into pyramids # Distribute RoIs into pyramids
min_lvl = cfg.FPN.ROI_MIN_LEVEL min_lvl = cfg.FPN.ROI_MIN_LEVEL
max_lvl = cfg.FPN.ROI_MAX_LEVEL max_lvl = cfg.FPN.ROI_MAX_LEVEL
k = max_lvl - min_lvl + 1 num_levels = max_lvl - min_lvl + 1
levels = map_rois_to_levels(blobs['rois'], min_lvl, max_lvl) levels = rcnn_util.map_rois_to_levels(blobs['rois'], min_lvl, max_lvl)
outputs = map_blobs_to_outputs( lvl_blobs = rcnn_util.map_blobs_by_levels(
blobs, blobs,
self.defaults, self.defaults,
[np.where(levels == (i + min_lvl))[0] for i in range(k)], [np.where(levels == (i + min_lvl))[0] for i in range(num_levels)],
) )
return { blobs = dict((k, np.concatenate(lvl_blobs[k])) for k in blobs.keys())
'rois': [new_tensor(outputs['rois'][i]) for i in range(k)], rois_wide = [lvl_blobs['rois'][i] for i in range(num_levels)]
'labels': new_tensor(np.concatenate(outputs['labels'], 0)),
'bbox_targets': new_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': new_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': new_tensor(np.vstack(outputs['bbox_outside_weights'])),
}
else: else:
# Return RoIs directly for CX-stride # Return RoIs directly for specified stride
return { rois_wide = [blobs['rois']]
'rois': [new_tensor(blobs['rois'])],
'labels': new_tensor(blobs['labels']), # Select the foreground RoIs only for bbox branch
'bbox_targets': new_tensor(blobs['bbox_targets']), fg_inds = np.where(blobs['labels'] > 0)[0]
'bbox_inside_weights': new_tensor(blobs['bbox_inside_weights']), cls_inds = np.arange(len(blobs['rois'])) * self.num_classes
'bbox_outside_weights': new_tensor(blobs['bbox_outside_weights']),
} return {
'rois': [new_tensor(rois) for rois in rois_wide],
'labels': new_tensor(blobs['labels']),
def get_targets(ex_rois, gt_rois, gt_labels, num_classes): 'bbox_indices': new_tensor(cls_inds[fg_inds] + blobs['labels'][fg_inds]),
"""Compute bounding-box regression targets for an image.""" 'bbox_targets': new_tensor(blobs['bbox_targets'][fg_inds].astype('float32')),
assert ex_rois.shape[0] == gt_rois.shape[0] 'bbox_anchors': new_tensor(blobs['rois'][fg_inds, 1:].astype('float32')),
assert ex_rois.shape[1] == 4 }
assert gt_rois.shape[1] == 4
# Compute bbox regression targets
fg_inds = np.where(gt_labels > 0)[0] def sample_rois(all_rois, gt_boxes, num_rois, num_fg_rois):
targets = box_util.bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
bbox_targets = np.zeros((ex_rois.shape[0], 4 * num_classes), 'float32')
inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
for i in fg_inds:
start = int(4 * gt_labels[i])
bbox_targets[i, start:start + 4] = targets[i]
inside_weights[i, start:start + 4] = (1., 1., 1., 1.)
outside_weights = np.array(inside_weights > 0).astype('float32')
return bbox_targets, inside_weights, outside_weights
def sample_rois(
all_rois,
gt_boxes,
num_rois,
num_fg_rois,
num_classes,
):
"""Sample a batch of RoIs comprising foreground and background examples.""" """Sample a batch of RoIs comprising foreground and background examples."""
overlaps = box_util.bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4]) overlaps = box_util.bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1) gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1) max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4] labels = gt_boxes[gt_assignment, 4].astype('int64')
# Select foreground RoIs as those with >= FG_THRESH overlap # Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] fg_thresh = cfg.TRAIN.FG_THRESH
fg_rois_per_this_image = int(min(num_fg_rois, fg_inds.size)) fg_inds = np.where(max_overlaps >= fg_thresh)[0]
while fg_inds.size == 0:
fg_thresh -= 0.01
fg_inds = np.where(max_overlaps >= fg_thresh)[0]
# Sample foreground regions without replacement # Sample foreground regions without replacement
if fg_inds.size > 0: fg_rois_per_this_image = int(min(num_fg_rois, fg_inds.size))
fg_inds = npr.choice(fg_inds, fg_rois_per_this_image, False) fg_inds = npr.choice(fg_inds, fg_rois_per_this_image, False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
...@@ -160,15 +135,14 @@ def sample_rois( ...@@ -160,15 +135,14 @@ def sample_rois(
rois, labels = all_rois[keep_inds], labels[keep_inds] rois, labels = all_rois[keep_inds], labels[keep_inds]
# Clamp labels for the background RoIs to 0 # Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0 labels[fg_rois_per_this_image:] = 0
# Clamp the image indices for the background RoIs to -1
rois[fg_rois_per_this_image:][0] = -1
# Compute the target from RoIs # Compute the target from RoIs
outputs = [rois, labels] return [
outputs += get_targets( rois,
rois[:, 1:5],
gt_boxes[gt_assignment[keep_inds], :4],
labels, labels,
num_classes, box_util.bbox_transform(
) rois[:, 1:5],
return outputs gt_boxes[gt_assignment[keep_inds], :4],
cfg.BBOX_REG_WEIGHTS,
)
]
...@@ -13,17 +13,18 @@ from __future__ import absolute_import ...@@ -13,17 +13,18 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import types
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.modeling.detector import new_detector from seetadet.modeling.detector import new_detector
from lib.nms import nms_wrapper from seetadet.utils import boxes as box_util
from lib.utils import boxes as box_util from seetadet.utils import nms as nms_util
from lib.utils import framework from seetadet.utils import time_util
from lib.utils import time_util from seetadet.utils.blob import im_list_to_blob
from lib.utils.blob import im_list_to_blob from seetadet.utils.image import scale_image
from lib.utils.image import scale_image
def im_detect(detector, raw_image): def im_detect(detector, raw_image):
...@@ -31,49 +32,41 @@ def im_detect(detector, raw_image): ...@@ -31,49 +32,41 @@ def im_detect(detector, raw_image):
ims, ims_scale = scale_image(raw_image) ims, ims_scale = scale_image(raw_image)
# Prepare blobs # Prepare blobs
blobs = {'data': im_list_to_blob(ims)} data = im_list_to_blob(ims)
blobs['ims_info'] = np.array([ ims_info = np.array([list(data.shape[1:3]) + [im_scale]
list(blobs['data'].shape[1:3]) + [im_scale] for im_scale in ims_scale], dtype=np.float32)
for im_scale in ims_scale
], dtype=np.float32)
# Do Forward # Do Forward
if not hasattr(detector, 'graph'): data = torch.from_numpy(data)
with framework.new_workspace().as_default(): ims_info = torch.from_numpy(ims_info)
data = torch.from_numpy(blobs['data'])
ims_info = torch.from_numpy(blobs['ims_info']) if not hasattr(detector, 'script_forward'):
with torch.no_grad(): def script_forward(self, data, ims_info):
with torch.jit.Tracer(retain_ops=True): return self.forward({'data': data, 'ims_info': ims_info})
inputs = {'data': data, 'ims_info': ims_info} detector.script_forward = torch.jit.trace(
outputs = detector.forward(inputs) func=types.MethodType(script_forward, detector),
detector.graph = \ example_inputs=[data, ims_info],
framework.Graph(inputs, { )
'rois': outputs['rois'],
'cls_prob': outputs['cls_prob'], outputs = detector.script_forward(data, ims_info)
'bbox_pred': outputs['bbox_pred'] outputs = dict((k, outputs[k].numpy()) for k in outputs.keys())
})
outputs = detector.graph(**blobs)
# Decode results # Decode results
rois = outputs['rois'] all_scores, all_boxes = [], []
scores, boxes, batch_inds = [], [], []
pred_boxes = \ pred_boxes = \
box_util.bbox_transform_inv( box_util.bbox_transform_inv(
rois[:, 1:5], outputs['rois'][:, 1:5],
outputs['bbox_pred'], outputs['bbox_pred'],
cfg.BBOX_REG_WEIGHTS, cfg.BBOX_REG_WEIGHTS,
) )
for i in range(len(ims)): for i in range(len(ims)):
inds = np.where(rois[:, 0].astype(np.int32) == i)[0] inds = np.where(outputs['rois'][:, 0].astype(np.int32) == i)[0]
im_boxes = pred_boxes[inds] / ims_scale[i] boxes = pred_boxes[inds] / ims_scale[i]
scores.append(outputs['cls_prob'][inds]) all_scores.append(outputs['cls_prob'][inds])
boxes.append(box_util.clip_tiled_boxes(im_boxes, raw_image.shape)) all_boxes.append(box_util.clip_tiled_boxes(boxes, raw_image.shape))
return ( return np.vstack(all_scores), np.vstack(all_boxes)
np.vstack(scores) if len(ims) > 0 else scores[0],
np.vstack(boxes) if len(ims) > 0 else boxes[0],
)
def test_net(weights, num_classes, q_in, q_out, device): def test_net(weights, num_classes, q_in, q_out, device):
...@@ -84,7 +77,7 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -84,7 +77,7 @@ def test_net(weights, num_classes, q_in, q_out, device):
while True: while True:
idx, raw_image = q_in.get() idx, raw_image = q_in.get()
if raw_image is None: if idx < 0:
break break
boxes_this_image = [[]] boxes_this_image = [[]]
...@@ -101,17 +94,16 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -101,17 +94,16 @@ def test_net(weights, num_classes, q_in, q_out, device):
(cls_boxes, cls_scores[:, np.newaxis]) (cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False) ).astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS: if cfg.TEST.USE_SOFT_NMS:
keep = nms_wrapper.soft_nms( keep = nms_util.soft_nms(
cls_detections, cls_detections,
thresh=cfg.TEST.NMS, thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD, method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA, sigma=cfg.TEST.SOFT_NMS_SIGMA,
) )
else: else:
keep = nms_wrapper.nms( keep = nms_util.nms(
cls_detections, cls_detections,
thresh=cfg.TEST.NMS, thresh=cfg.TEST.NMS,
force_cpu=True,
) )
cls_detections = cls_detections[keep, :] cls_detections = cls_detections[keep, :]
boxes_this_image.append(cls_detections) boxes_this_image.append(cls_detections)
...@@ -119,11 +111,8 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -119,11 +111,8 @@ def test_net(weights, num_classes, q_in, q_out, device):
q_out.put(( q_out.put((
idx, idx,
{ dict([('im_detect', _t['im_detect'].average_time),
'im_detect': _t['im_detect'].average_time, ('misc', _t['misc'].average_time)]),
'misc': _t['misc'].average_time, dict([('boxes', boxes_this_image)]),
},
{
'boxes': boxes_this_image,
},
)) ))
...@@ -16,7 +16,7 @@ from __future__ import print_function ...@@ -16,7 +16,7 @@ from __future__ import print_function
import collections import collections
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.core.config import cfg
def generate_grid_anchors(features, base_anchors, strides): def generate_grid_anchors(features, base_anchors, strides):
...@@ -75,7 +75,7 @@ def map_rois_to_levels(rois, k_min, k_max): ...@@ -75,7 +75,7 @@ def map_rois_to_levels(rois, k_min, k_max):
return np.clip(target_levels, k_min, k_max) return np.clip(target_levels, k_min, k_max)
def map_blobs_to_outputs(blobs, defaults, lvl_inds): def map_blobs_by_levels(blobs, defaults, lvl_inds):
"""Map blobs to outputs according to fpn indices.""" """Map blobs to outputs according to fpn indices."""
outputs = collections.defaultdict(list) outputs = collections.defaultdict(list)
for inds in lvl_inds: for inds in lvl_inds:
......
...@@ -13,10 +13,7 @@ from __future__ import absolute_import ...@@ -13,10 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
# Import custom modules from seetadet.algo.faster_rcnn.anchor_target import AnchorTarget
from lib.modeling.fast_rcnn import FastRCNN from seetadet.algo.faster_rcnn.proposal import Proposal
from lib.modeling.fpn import FPN from seetadet.algo.mask_rcnn.data_loader import DataLoader
from lib.modeling.mask_rcnn import MaskRCNN from seetadet.algo.mask_rcnn.proposal_target import ProposalTarget
from lib.modeling.retinanet import RetinaNet
from lib.modeling.rpn import RPN
from lib.modeling.ssd import SSD
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from seetadet.algo.mask_rcnn import data_transformer
from seetadet.core.config import cfg
from seetadet.datasets.factory import get_dataset
from seetadet.utils import logger
from seetadet.utils.blob import im_list_to_blob
from seetadet.utils.blob import mask_list_to_blob
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
dataset = get_dataset(cfg.TRAIN.DATASET)
self.iterator = Iterator(**{
'dataset': dataset.cls,
'source': dataset.source,
'classes': dataset.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_THREADS - 1,
})
def __call__(self):
outputs = self.iterator.next()
if isinstance(outputs['data'], np.ndarray):
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class Iterator(mp.Process):
"""Iterator to return the batch of data."""
def __init__(self, **kwargs):
super(Iterator, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and \
kwargs.get('phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self.daemon = True
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.q_in = mp.Queue(num_batches * self._batch_size)
self.q1_out = mp.Queue(num_batches * self._batch_size)
self.q2_out = mp.Queue(num_batches * self._batch_size)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
part_idx=part_idx, num_parts=num_parts, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.q_in
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
p = data_transformer.DataTransformer(**kwargs)
p._seed += (i + rank * self._num_transformers)
p.q_in = self.q_in
p.q1_out, p.q2_out = self.q1_out, self.q2_out
p.start()
self._transformers.append(p)
time.sleep(0.1)
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for p in processes:
p.terminate()
p.join()
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def next(self):
"""Return the next batch of data."""
return self.__next__()
def __iter__(self):
"""Return the iterator self."""
return self
def __next__(self):
"""Return the next batch of data."""
q_out = None
# Two queues to implement aspect-grouping
# This is necessary to reduce the gpu memory
# from fetching a huge square batch blob
while q_out is None:
if self.q1_out.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
q_out = self.q1_out
elif self.q2_out.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
q_out = self.q2_out
self.q1_out, self.q2_out = self.q2_out, self.q1_out
images, images_info = [], []
boxes_to_pack, masks_to_pack = [], []
for i in range(cfg.TRAIN.IMS_PER_BATCH):
image, image_scale, boxes, masks = q_out.get()
images.append(image)
images_info.append(list(image.shape[:2]) + [image_scale])
gt_boxes = np.zeros((boxes.shape[0], boxes.shape[1] + 1), 'float32')
gt_boxes[:, :boxes.shape[1]], gt_boxes[:, -1] = boxes, i
boxes_to_pack.append(gt_boxes)
masks_to_pack.append(masks)
return {
'data': im_list_to_blob(images),
'ims_info': np.array(images_info, 'float32'),
'gt_boxes': np.concatenate(boxes_to_pack),
'gt_masks': mask_list_to_blob(masks_to_pack),
}
...@@ -17,17 +17,18 @@ import multiprocessing ...@@ -17,17 +17,18 @@ import multiprocessing
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.datasets.example import Example from seetadet.datasets.example import Example
from lib.pycocotools import mask_utils from seetadet.pycocotools import mask_utils
from lib.utils import boxes as box_util from seetadet.utils import boxes as box_util
from lib.utils.blob import prep_im_for_blob from seetadet.utils.blob import prep_im_for_blob
from lib.utils.image import get_image_with_target_size
class DataTransformer(multiprocessing.Process): class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(DataTransformer, self).__init__() super(DataTransformer, self).__init__()
self._scales = cfg.TRAIN.SCALES
self._max_size = cfg.TRAIN.MAX_SIZE
self._seed = cfg.RNG_SEED self._seed = cfg.RNG_SEED
self._use_flipped = cfg.TRAIN.USE_FLIPPED self._use_flipped = cfg.TRAIN.USE_FLIPPED
self._use_diff = cfg.TRAIN.USE_DIFF self._use_diff = cfg.TRAIN.USE_DIFF
...@@ -98,9 +99,8 @@ class DataTransformer(multiprocessing.Process): ...@@ -98,9 +99,8 @@ class DataTransformer(multiprocessing.Process):
img = example.image img = example.image
# Scale # Scale
max_size = cfg.TRAIN.MAX_SIZE target_size = self._scales[np.random.randint(len(self._scales))]
target_size = cfg.TRAIN.SCALES[np.random.randint(len(cfg.TRAIN.SCALES))] img, im_scale = prep_im_for_blob(img, target_size, self._max_size)
img, im_scale, jitter = prep_im_for_blob(img, target_size, max_size)
# Flip # Flip
apply_flip = False apply_flip = False
......
...@@ -18,13 +18,11 @@ import collections ...@@ -18,13 +18,11 @@ import collections
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
from lib.core.config import cfg from seetadet.algo.faster_rcnn import utils as rcnn_util
from lib.faster_rcnn.utils import map_blobs_to_outputs from seetadet.core.config import cfg
from lib.faster_rcnn.utils import map_returns_to_blobs from seetadet.utils import boxes as box_util
from lib.faster_rcnn.utils import map_rois_to_levels from seetadet.utils import mask as mask_util
from lib.utils import boxes as box_util from seetadet.utils.env import new_tensor
from lib.utils import mask as mask_util
from lib.utils.framework import new_tensor
class ProposalTarget(object): class ProposalTarget(object):
...@@ -36,10 +34,8 @@ class ProposalTarget(object): ...@@ -36,10 +34,8 @@ class ProposalTarget(object):
self.num_classes = cfg.MODEL.NUM_CLASSES self.num_classes = cfg.MODEL.NUM_CLASSES
self.defaults = collections.OrderedDict([ self.defaults = collections.OrderedDict([
('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')), ('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
('labels', np.array([-1], 'float32')), ('labels', np.array([-1], 'int64')),
('bbox_targets', np.zeros((1, self.num_classes * 4), 'float32')), ('bbox_targets', np.zeros((1, 4), 'float32')),
('bbox_inside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_outside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('mask_targets', -np.ones((1, self.resolution, self.resolution), 'float32')), ('mask_targets', -np.ones((1, self.resolution, self.resolution), 'float32')),
]) ])
...@@ -72,67 +68,75 @@ class ProposalTarget(object): ...@@ -72,67 +68,75 @@ class ProposalTarget(object):
# Sample a batch of RoIs for training # Sample a batch of RoIs for training
rois_per_image = cfg.TRAIN.BATCH_SIZE rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
map_returns_to_blobs( rcnn_util.map_returns_to_blobs(
sample_rois( sample_rois(
rois, rois,
gt_boxes, gt_boxes,
gt_masks, gt_masks,
rois_per_image, rois_per_image,
fg_rois_per_image, fg_rois_per_image,
self.num_classes,
ims_info[ix][2], ims_info[ix][2],
), blobs, keys, ), blobs, keys,
) )
# Stack into continuous blobs # Stack into continuous blobs
for k, v in blobs.items(): blobs = dict((k, np.concatenate(blobs[k])) for k in blobs.keys())
blobs[k] = np.concatenate(blobs[k], 0)
# Distribute rois into pyramids # Distribute rois into pyramids
k_min = cfg.FPN.ROI_MIN_LEVEL k_min = cfg.FPN.ROI_MIN_LEVEL
k_max = cfg.FPN.ROI_MAX_LEVEL k_max = cfg.FPN.ROI_MAX_LEVEL
k = k_max - k_min + 1 num_levels = k_max - k_min + 1
levels = map_rois_to_levels(blobs['rois'], k_min, k_max) levels = rcnn_util.map_rois_to_levels(blobs['rois'], k_min, k_max)
outputs = \ lvl_blobs = rcnn_util.map_blobs_by_levels(
map_blobs_to_outputs( blobs,
blobs, self.defaults,
self.defaults, [np.where(levels == (i + k_min))[0] for i in range(num_levels)],
[np.where(levels == (i + k_min))[0] for i in range(k)], )
)
rois_wide = [lvl_blobs['rois'][i] for i in range(num_levels)]
# Select the foreground RoIs only for mask branch mask_rois_wide, mask_labels_wide = [], []
for i in range(k):
inds = np.where(outputs['labels'][i] > 0)[0] # Select the foreground RoIs only for bbox/mask branch
inds = inds if len(inds) > 0 else np.array([0], 'int64') for i in range(num_levels):
outputs['mask_rois'].append(outputs['rois'][i][inds]) inds = np.where(lvl_blobs['labels'][i] > 0)[0]
outputs['mask_targets'][i] = outputs['mask_targets'][i][inds] if len(inds) > 0:
outputs['mask_labels'].append(outputs['labels'][i][inds].astype('int64') - 1) mask_rois_wide.append(lvl_blobs['rois'][i][inds])
mask_labels_wide.append(lvl_blobs['labels'][i][inds] - 1)
# Use the sparse indices to select logits lvl_blobs['mask_targets'][i] = lvl_blobs['mask_targets'][i][inds]
# Reduce the overhead on feeding dense class-specific targets else:
mask_labels = np.concatenate(outputs['mask_labels'], 0) mask_rois_wide.append(self.defaults['rois'])
mask_indices = np.arange(len(mask_labels)) * (self.num_classes - 1) mask_labels_wide.append(np.array([0], 'int64'))
lvl_blobs['mask_targets'][i] = self.defaults['mask_targets']
blobs = dict((k, np.concatenate(lvl_blobs[k])) for k in blobs.keys())
mask_labels = np.concatenate(mask_labels_wide)
fg_inds = np.where(blobs['labels'] > 0)[0]
bbox_cls_inds = np.arange(len(blobs['rois'])) * self.num_classes
mask_cls_inds = np.arange(len(mask_labels)) * (self.num_classes - 1)
# Sample a proposal randomly to avoid memory issue
if len(fg_inds) == 0:
fg_inds = np.random.randint(len(blobs['labels']), size=[1])
return { return {
'rois': [new_tensor(outputs['rois'][i]) for i in range(k)], 'rois': [new_tensor(rois_wide[i]) for i in range(num_levels)],
'labels': new_tensor(np.concatenate(outputs['labels'], 0)), 'mask_rois': [new_tensor(mask_rois_wide[i]) for i in range(num_levels)],
'bbox_targets': new_tensor(np.vstack(outputs['bbox_targets'])), 'labels': new_tensor(blobs['labels']),
'bbox_inside_weights': new_tensor(np.vstack(outputs['bbox_inside_weights'])), 'bbox_indices': new_tensor(bbox_cls_inds[fg_inds] + blobs['labels'][fg_inds]),
'bbox_outside_weights': new_tensor(np.vstack(outputs['bbox_outside_weights'])), 'bbox_targets': new_tensor(blobs['bbox_targets'][fg_inds].astype('float32')),
'mask_rois': [new_tensor(outputs['mask_rois'][i]) for i in range(k)], 'bbox_anchors': new_tensor(blobs['rois'][fg_inds, 1:].astype('float32')),
'mask_targets': new_tensor(np.vstack(outputs['mask_targets'])), 'mask_indices': new_tensor(mask_cls_inds + mask_labels),
'mask_indices': new_tensor(mask_indices + mask_labels), 'mask_targets': new_tensor(blobs['mask_targets']),
} }
def get_targets( def compute_targets(
ex_rois, ex_rois,
gt_rois, gt_rois,
gt_labels, gt_labels,
gt_masks, gt_masks,
mask_flags, mask_flags,
mask_size, mask_size,
num_classes,
im_scale, im_scale,
): ):
"""Compute the bounding-box regression targets.""" """Compute the bounding-box regression targets."""
...@@ -141,14 +145,8 @@ def get_targets( ...@@ -141,14 +145,8 @@ def get_targets(
assert gt_rois.shape[1] == 4 assert gt_rois.shape[1] == 4
# Compute bbox regression targets # Compute bbox regression targets
fg_inds = np.where(gt_labels > 0)[0] fg_inds = np.where(gt_labels > 0)[0]
targets = box_util.bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS) bbox_targets = box_util.bbox_transform(
bbox_targets = np.zeros((ex_rois.shape[0], 4 * num_classes), 'float32') ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
for i in fg_inds:
start = int(4 * gt_labels[i])
bbox_targets[i, start:start + 4] = targets[i]
inside_weights[i, start:start + 4] = (1., 1., 1., 1.)
outside_weights = np.array(inside_weights > 0).astype('float32')
# Compute mask classification targets # Compute mask classification targets
mask_shape = [mask_size] * 2 mask_shape = [mask_size] * 2
ex_rois_ori = np.round(ex_rois / im_scale).astype(int) ex_rois_ori = np.round(ex_rois / im_scale).astype(int)
...@@ -168,7 +166,7 @@ def get_targets( ...@@ -168,7 +166,7 @@ def get_targets(
mask=box_mask, mask=box_mask,
size=mask_shape, size=mask_shape,
) )
return bbox_targets, inside_weights, outside_weights, mask_targets return bbox_targets, mask_targets
def sample_rois( def sample_rois(
...@@ -177,14 +175,13 @@ def sample_rois( ...@@ -177,14 +175,13 @@ def sample_rois(
gt_masks, gt_masks,
num_rois, num_rois,
num_fg_rois, num_fg_rois,
num_classes,
im_scale, im_scale,
): ):
"""Sample a batch of RoIs comprising foreground and background examples.""" """Sample a batch of RoIs comprising foreground and background examples."""
overlaps = box_util.bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4]) overlaps = box_util.bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1) gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1) max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4] labels = gt_boxes[gt_assignment, 4].astype('int64')
# Select foreground RoIs as those with >= FG_THRESH overlap # Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
...@@ -209,19 +206,16 @@ def sample_rois( ...@@ -209,19 +206,16 @@ def sample_rois(
rois, labels = all_rois[keep_inds], labels[keep_inds] rois, labels = all_rois[keep_inds], labels[keep_inds]
# Clamp labels for the background RoIs to 0 # Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0 labels[fg_rois_per_this_image:] = 0
# Clamp the image indices for the background RoIs to -1
rois[fg_rois_per_this_image:][0] = -1
# Compute the target from RoIs # Compute the target from RoIs
outputs = [rois, labels] outputs = [rois, labels]
outputs += get_targets( outputs += compute_targets(
rois[:, 1:5], rois[:, 1:5],
gt_boxes[gt_assignment[keep_inds], :4], gt_boxes[gt_assignment[keep_inds], :4],
labels, labels,
gt_masks[gt_assignment[fg_inds]], gt_masks[gt_assignment[fg_inds]],
gt_boxes[gt_assignment[fg_inds], 5], gt_boxes[gt_assignment[fg_inds], 5],
cfg.MRCNN.RESOLUTION, cfg.MRCNN.RESOLUTION,
num_classes,
im_scale, im_scale,
) )
return outputs return outputs
...@@ -13,19 +13,20 @@ from __future__ import absolute_import ...@@ -13,19 +13,20 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import types
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.algo.faster_rcnn import utils as rcnn_util
from lib.faster_rcnn import map_rois_to_levels from seetadet.core.config import cfg
from lib.faster_rcnn import map_blobs_to_outputs from seetadet.modeling.detector import new_detector
from lib.modeling.detector import new_detector from seetadet.utils import env
from lib.nms import nms_wrapper from seetadet.utils import nms as nms_util
from lib.utils import framework from seetadet.utils import time_util
from lib.utils import time_util from seetadet.utils import boxes as box_util
from lib.utils import boxes as box_util from seetadet.utils.blob import im_list_to_blob
from lib.utils.blob import im_list_to_blob from seetadet.utils.image import scale_image
from lib.utils.image import scale_image
def im_detect(detector, raw_image): def im_detect(detector, raw_image):
...@@ -33,50 +34,46 @@ def im_detect(detector, raw_image): ...@@ -33,50 +34,46 @@ def im_detect(detector, raw_image):
ims, ims_scale = scale_image(raw_image) ims, ims_scale = scale_image(raw_image)
# Prepare blobs # Prepare blobs
blobs = {'data': im_list_to_blob(ims)} data = im_list_to_blob(ims)
blobs['ims_info'] = np.array([ ims_info = np.array([list(data.shape[1:3]) + [im_scale]
list(blobs['data'].shape[1:3]) + [im_scale] for im_scale in ims_scale], dtype=np.float32)
for im_scale in ims_scale
], dtype=np.float32)
# Do Forward # Do Forward
if not hasattr(detector, 'graph'): data = torch.from_numpy(data)
with framework.new_workspace().as_default(): ims_info = torch.from_numpy(ims_info)
data = torch.from_numpy(blobs['data'])
ims_info = torch.from_numpy(blobs['ims_info']) if not hasattr(detector, 'script_forward'):
with torch.no_grad(): def script_forward(self, data, ims_info):
with torch.jit.Tracer(retain_ops=True): return self.forward({'data': data, 'ims_info': ims_info})
inputs = {'data': data, 'ims_info': ims_info} detector.script_forward = torch.jit.trace(
outputs = detector.forward(inputs) func=types.MethodType(script_forward, detector),
detector.graph = \ example_inputs=[data, ims_info],
framework.Graph(inputs, { )
'rois': outputs['rois'],
'cls_prob': outputs['cls_prob'], outputs = detector.script_forward(data, ims_info)
'bbox_pred': outputs['bbox_pred'] outputs = dict((k, outputs[k].numpy()) for k in outputs.keys())
})
outputs = detector.graph(**blobs)
# Decode results # Decode results
rois = outputs['rois'] all_scores, all_boxes, batch_inds = [], [], []
scores, boxes, batch_inds = [], [], []
pred_boxes = \ pred_boxes = \
box_util.bbox_transform_inv( box_util.bbox_transform_inv(
rois[:, 1:5], outputs['rois'][:, 1:5],
outputs['bbox_pred'], outputs['bbox_pred'],
cfg.BBOX_REG_WEIGHTS, cfg.BBOX_REG_WEIGHTS,
) )
for i in range(len(ims)): for i in range(len(ims)):
inds = np.where(rois[:, 0].astype(np.int32) == i)[0] inds = np.where(outputs['rois'][:, 0].astype(np.int32) == i)[0]
im_boxes = pred_boxes[inds] / ims_scale[i] boxes = pred_boxes[inds] / ims_scale[i]
scores.append(outputs['cls_prob'][inds]) all_scores.append(outputs['cls_prob'][inds])
boxes.append(box_util.clip_tiled_boxes(im_boxes, raw_image.shape)) all_boxes.append(box_util.clip_tiled_boxes(boxes, raw_image.shape))
batch_inds.append(np.ones((len(inds), 1), 'int32') * i) batch_inds.append(np.ones((len(inds), 1), 'int32') * i)
return ( return (
np.vstack(scores) if len(ims) > 0 else scores[0], np.vstack(all_scores),
np.vstack(boxes) if len(ims) > 0 else boxes[0], np.vstack(all_boxes),
np.vstack(batch_inds) if len(ims) > 0 else batch_inds[0], np.vstack(batch_inds),
np.array(ims_scale, 'float64'), np.array(ims_scale, 'float64'),
) )
...@@ -85,43 +82,29 @@ def mask_detect(detector, rois): ...@@ -85,43 +82,29 @@ def mask_detect(detector, rois):
k_min = cfg.FPN.ROI_MIN_LEVEL k_min = cfg.FPN.ROI_MIN_LEVEL
k_max = cfg.FPN.ROI_MAX_LEVEL k_max = cfg.FPN.ROI_MAX_LEVEL
k = k_max - k_min + 1 k = k_max - k_min + 1
levels = map_rois_to_levels(rois, k_min, k_max) levels = rcnn_util.map_rois_to_levels(rois, k_min, k_max)
level_inds = [np.where(levels == (i + k_min))[0] for i in range(k)] level_inds = [np.where(levels == (i + k_min))[0] for i in range(k)]
fpn_rois = map_blobs_to_outputs( fpn_rois = rcnn_util.map_blobs_by_levels(
{'rois': rois[:, :5]}, {'rois': rois[:, :5]},
{'rois': np.array([[-1, 0, 0, 1, 1]], 'float32')}, {'rois': np.array([[-1, 0, 0, 1, 1]], 'float32')},
level_inds)['rois'] level_inds)['rois']
workspace = detector.graph.workspace with torch.no_grad():
placeholders = detector.graph.placeholders mask_score = detector.rcnn.compute_mask_score(
score_fn = detector.rcnn.compute_mask_score rois=[env.new_tensor(r.astype('float32')) for r in fpn_rois])
with workspace.as_default(): nc, i = mask_score.shape[1], 0
if 'rois' not in placeholders: mask_inds = {}
placeholders['rois'] = \ for inds in level_inds:
[framework.new_placeholder(cfg.GPU_ID) for _ in range(k)] for idx in inds:
placeholders['mask_inds'] = \ cls = int(rois[idx, 5])
framework.new_placeholder(cfg.GPU_ID) mask_inds[idx] = (i * nc + cls)
for i, v in enumerate(fpn_rois): i += 1
framework.feed_tensor(placeholders['rois'][i], v.astype('float32')) if len(inds) == 0:
with torch.no_grad(): i += 1
mask_score = score_fn(rois=placeholders['rois']) mask_inds = list(map(mask_inds.get, sorted(mask_inds)))
nc, i = mask_score.shape[1], 0 mask_inds = env.new_tensor(np.array(mask_inds, 'int64'))
mask_inds = {} with torch.no_grad():
for inds in level_inds: mask_pred = mask_score.index_select((0, 1), mask_inds)
for idx in inds: return detector.rcnn.sigmoid(mask_pred).numpy().copy()
cls = int(rois[idx, 5])
mask_inds[idx] = (i * nc + cls)
i += 1
if len(inds) == 0:
i += 1
mask_inds = list(map(mask_inds.get, sorted(mask_inds)))
framework.feed_tensor(
placeholders['mask_inds'],
np.array(mask_inds, 'int64'),
)
with torch.no_grad():
mask_pred = mask_score.index_select(
(0, 1), placeholders['mask_inds'])
return detector.rcnn.sigmoid(mask_pred).numpy(True).copy()
def test_net(weights, num_classes, q_in, q_out, device): def test_net(weights, num_classes, q_in, q_out, device):
...@@ -132,7 +115,7 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -132,7 +115,7 @@ def test_net(weights, num_classes, q_in, q_out, device):
while True: while True:
idx, raw_image = q_in.get() idx, raw_image = q_in.get()
if raw_image is None: if idx < 0:
break break
rois_this_image = [] rois_this_image = []
...@@ -153,17 +136,16 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -153,17 +136,16 @@ def test_net(weights, num_classes, q_in, q_out, device):
(cls_boxes, cls_scores[:, np.newaxis]) (cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False) ).astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS: if cfg.TEST.USE_SOFT_NMS:
keep = nms_wrapper.soft_nms( keep = nms_util.soft_nms(
cls_detections, cls_detections,
thresh=cfg.TEST.NMS, thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD, method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA, sigma=cfg.TEST.SOFT_NMS_SIGMA,
) )
else: else:
keep = nms_wrapper.nms( keep = nms_util.nms(
cls_detections, cls_detections,
thresh=cfg.TEST.NMS, thresh=cfg.TEST.NMS,
force_cpu=True,
) )
cls_detections = cls_detections[keep, :] cls_detections = cls_detections[keep, :]
cls_batch_inds = cls_batch_inds[keep] cls_batch_inds = cls_batch_inds[keep]
...@@ -190,13 +172,9 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -190,13 +172,9 @@ def test_net(weights, num_classes, q_in, q_out, device):
q_out.put(( q_out.put((
idx, idx,
{ dict([('im_detect', _t['im_detect'].average_time),
'im_detect': _t['im_detect'].average_time, ('mask_detect', _t['mask_detect'].average_time),
'mask_detect': _t['mask_detect'].average_time, ('misc', _t['misc'].average_time)]),
'misc': _t['misc'].average_time, dict([('boxes', boxes_this_image),
}, ('masks', masks_this_image)]),
{
'boxes': boxes_this_image,
'masks': masks_this_image,
},
)) ))
...@@ -13,7 +13,5 @@ from __future__ import absolute_import ...@@ -13,7 +13,5 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.faster_rcnn.anchor_target import AnchorTarget from seetadet.algo.retinanet.anchor_target import AnchorTarget
from lib.faster_rcnn.proposal import Proposal from seetadet.algo.retinanet.data_loader import DataLoader
from lib.mask_rcnn.data_loader import DataLoader
from lib.mask_rcnn.proposal_target import ProposalTarget
...@@ -15,12 +15,12 @@ from __future__ import print_function ...@@ -15,12 +15,12 @@ from __future__ import print_function
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors_v2 from seetadet.algo.faster_rcnn.generate_anchors import generate_anchors_v2
from lib.faster_rcnn import generate_grid_anchors from seetadet.algo.faster_rcnn.utils import generate_grid_anchors
from lib.utils import boxes as box_util from seetadet.utils import boxes as box_util
from lib.utils import logger from seetadet.utils import logger
from lib.utils.framework import new_tensor from seetadet.utils.env import new_tensor
class AnchorTarget(object): class AnchorTarget(object):
...@@ -47,7 +47,7 @@ class AnchorTarget(object): ...@@ -47,7 +47,7 @@ class AnchorTarget(object):
sizes=sizes, sizes=sizes,
)) ))
def __call__(self, features, gt_boxes, ims_info): def __call__(self, features, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images) gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
...@@ -67,10 +67,8 @@ class AnchorTarget(object): ...@@ -67,10 +67,8 @@ class AnchorTarget(object):
num_anchors = all_anchors.shape[0] num_anchors = all_anchors.shape[0]
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care # Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
labels_wide = -np.ones((num_images, num_anchors,), 'float32') labels_wide = -np.ones((num_images, num_anchors,), 'int64')
bbox_targets_wide = np.zeros((num_images, num_anchors, 4), 'float32') bbox_indices_wide, bbox_anchors_wide, bbox_targets_wide = [], [], []
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
# Different from R-CNN, all anchors will be used # Different from R-CNN, all anchors will be used
inds_inside, anchors = np.arange(num_anchors), all_anchors inds_inside, anchors = np.arange(num_anchors), all_anchors
...@@ -81,7 +79,7 @@ class AnchorTarget(object): ...@@ -81,7 +79,7 @@ class AnchorTarget(object):
gt_boxes = gt_boxes_wide[ix] gt_boxes = gt_boxes_wide[ix]
# label: 1 is positive, 0 is negative, -1 is don't care # label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32) labels = np.empty((num_inside,), dtype='int64')
labels.fill(-1) labels.fill(-1)
# Overlaps between the anchors and the gt boxes # Overlaps between the anchors and the gt boxes
...@@ -89,48 +87,41 @@ class AnchorTarget(object): ...@@ -89,48 +87,41 @@ class AnchorTarget(object):
argmax_overlaps = overlaps.argmax(1) argmax_overlaps = overlaps.argmax(1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps] max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
# fg label: for each gt, anchor with highest overlap # Foreground: for each gt, anchor with highest overlap
gt_argmax_overlaps = overlaps.argmax(0) gt_argmax_overlaps = overlaps.argmax(0)
gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
gt_inds = argmax_overlaps[gt_argmax_overlaps] gt_inds = argmax_overlaps[gt_argmax_overlaps]
labels[gt_argmax_overlaps] = gt_boxes[gt_inds, 4] labels[gt_argmax_overlaps] = gt_boxes[gt_inds, 4]
# fg label: above threshold IOU # Foreground: above threshold IoU
inds = max_overlaps >= cfg.RETINANET.POSITIVE_OVERLAP inds = max_overlaps >= cfg.RETINANET.POSITIVE_OVERLAP
gt_inds = argmax_overlaps[inds] gt_inds = argmax_overlaps[inds]
labels[inds] = gt_boxes[gt_inds, 4] labels[inds] = gt_boxes[gt_inds, 4]
fg_inds = np.where(labels > 0)[0] fg_inds = np.where(labels > 0)[0]
# bg label: below threshold IOU # Background: below threshold IoU
labels[max_overlaps < cfg.RETINANET.NEGATIVE_OVERLAP] = 0 labels[max_overlaps < cfg.RETINANET.NEGATIVE_OVERLAP] = 0
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) # Retract the clamping if we don't have one
bbox_targets[fg_inds, :] = \ if len(fg_inds) == 0:
gt_inds = argmax_overlaps[gt_argmax_overlaps]
labels[gt_argmax_overlaps] = gt_boxes[gt_inds, 4]
fg_inds = np.where(labels > 0)[0]
labels_wide[ix, inds_inside] = labels
bbox_anchors_wide.append(anchors[fg_inds])
bbox_indices_wide.append(fg_inds + (num_anchors * ix))
bbox_targets_wide.append(
box_util.bbox_transform( box_util.bbox_transform(
anchors[fg_inds, :], anchors[fg_inds],
gt_boxes[argmax_overlaps[fg_inds], :4], gt_boxes[argmax_overlaps[fg_inds], :4],
) )
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32) )
bbox_inside_weights[fg_inds, :] = np.array((1., 1., 1., 1.))
bbox_reg_weight = float(cfg.RETINANET.BBOX_REG_WEIGHT)
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[fg_inds, :] = bbox_reg_weight / max(len(fg_inds), 1)
labels_wide[ix, inds_inside] = labels
bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
labels = labels_wide.reshape((num_images, num_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return { return {
'labels': new_tensor(labels), 'labels': new_tensor(labels_wide),
'bbox_targets': new_tensor(bbox_targets), 'bbox_indices': new_tensor(np.concatenate(bbox_indices_wide)),
'bbox_inside_weights': new_tensor(bbox_inside_weights), 'bbox_anchors': new_tensor(np.concatenate(bbox_anchors_wide).astype('float32')),
'bbox_outside_weights': new_tensor(bbox_outside_weights), 'bbox_targets': new_tensor(np.concatenate(bbox_targets_wide).astype('float32')),
} }
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from seetadet.algo import faster_rcnn
from seetadet.algo import ssd
from seetadet.core.config import cfg
class DataLoader(object):
"""Provide mini-batches of data."""
def __new__(cls):
if cfg.TRAIN.MAX_SIZE > 0:
return faster_rcnn.DataLoader()
else:
return ssd.DataLoader()
...@@ -13,66 +13,59 @@ from __future__ import absolute_import ...@@ -13,66 +13,59 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import types
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.modeling.detector import new_detector from seetadet.modeling.detector import new_detector
from lib.nms import nms_wrapper from seetadet.utils import nms as nms_util
from lib.utils import framework from seetadet.utils import time_util
from lib.utils import time_util from seetadet.utils.blob import im_list_to_blob
from lib.utils.blob import im_list_to_blob from seetadet.utils.image import scale_image
from lib.utils.image import scale_image
def ims_detect(detector, raw_images): def ims_detect(detector, raw_images):
"""Detect images, with single or multiple scales.""" """Detect images, with single or multiple scales."""
ims, ims_scale = scale_image(raw_images[0]) ims, ims_scale = [], []
num_scales = len(ims_scale) for i in range(len(raw_images)):
ims_shape = [im.shape for im in raw_images] im, im_scale = scale_image(raw_images[i])
for item_idx in range(1, len(raw_images)): ims += im
ims_ext, ims_scale_ext = scale_image(raw_images[item_idx]) ims_scale += im_scale
ims += ims_ext
ims_scale += ims_scale_ext num_scales = len(ims_scale) // len(raw_images)
ims_shape = np.array([im.shape[:2] for im in ims])
ims_scale = np.array(ims_scale).reshape((len(ims), -1))
# Prepare blobs # Prepare blobs
blobs = {'data': im_list_to_blob(ims)} data = im_list_to_blob(ims)
blobs['ims_info'] = np.array([ ims_info = np.hstack([ims_shape, ims_scale]).astype('float32')
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale
], dtype=np.float32)
# Do Forward # Do Forward
if not hasattr(detector, 'graph'): data = torch.from_numpy(data)
with framework.new_workspace().as_default(): ims_info = torch.from_numpy(ims_info)
data = torch.from_numpy(blobs['data'])
ims_info = torch.from_numpy(blobs['ims_info']) if not hasattr(detector, 'script_forward'):
with torch.no_grad(): def script_forward(self, data, ims_info):
with torch.jit.Tracer(retain_ops=True): return self.forward({'data': data, 'ims_info': ims_info})
inputs = {'data': data, 'ims_info': ims_info} detector.script_forward = torch.jit.trace(
outputs = detector.forward(inputs) func=types.MethodType(script_forward, detector),
detector.graph = \ example_inputs=[data, ims_info],
framework.Graph({ )
'data': inputs['data'],
'ims_info': inputs['ims_info'] outputs = detector.script_forward(data, ims_info)
}, {'detections': outputs['detections']}) outputs = dict((k, outputs[k].numpy()) for k in outputs.keys())
outputs = detector.graph(**blobs)
# Unpack results # Unpack results
results = outputs['detections'] results = outputs['detections']
detections = [[] for _ in range(len(ims_shape))] detections = [[] for _ in range(len((raw_images)))]
for i in range(len(ims)): for i in range(len(ims)):
inds = np.where(results[:, 0].astype(np.int32) == i)[0] inds = np.where(results[:, 0].astype(np.int32) == i)[0]
detections[i // num_scales].append(results[inds, 1:]) detections[i // num_scales].append(results[inds, 1:])
for i in range(len(ims_shape)): return [np.vstack(detections[i]) for i in range(len(raw_images))]
detections[i] = \
np.vstack(detections[i]) \
if len(detections[i]) > 1 \
else detections[i][0]
return detections
def test_net(weights, num_classes, q_in, q_out, device): def test_net(weights, num_classes, q_in, q_out, device):
...@@ -88,7 +81,7 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -88,7 +81,7 @@ def test_net(weights, num_classes, q_in, q_out, device):
indices, raw_images = [], [] indices, raw_images = [], []
for i in range(cfg.TEST.IMS_PER_BATCH): for i in range(cfg.TEST.IMS_PER_BATCH):
idx, raw_image = q_in.get() idx, raw_image = q_in.get()
if raw_image is None: if idx < 0:
must_stop = True must_stop = True
break break
indices.append(idx) indices.append(idx)
...@@ -115,17 +108,16 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -115,17 +108,16 @@ def test_net(weights, num_classes, q_in, q_out, device):
cls_boxes, cls_scores[:, np.newaxis])) \ cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False) .astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS: if cfg.TEST.USE_SOFT_NMS:
keep = nms_wrapper.soft_nms( keep = nms_util.soft_nms(
cls_detections, cls_detections,
thresh=cfg.TEST.NMS, thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD, method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA, sigma=cfg.TEST.SOFT_NMS_SIGMA,
) )
else: else:
keep = nms_wrapper.nms( keep = nms_util.nms(
cls_detections, cls_detections,
thresh=cfg.TEST.NMS, thresh=cfg.TEST.NMS,
force_cpu=True,
) )
cls_detections = cls_detections[keep, :] cls_detections = cls_detections[keep, :]
boxes_this_image.append(cls_detections) boxes_this_image.append(cls_detections)
...@@ -133,11 +125,7 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -133,11 +125,7 @@ def test_net(weights, num_classes, q_in, q_out, device):
q_out.put(( q_out.put((
indices[i], indices[i],
{ dict([('im_detect', _t['im_detect'].average_time),
'im_detect': _t['im_detect'].average_time, ('misc',_t['misc'].average_time)]),
'misc': _t['misc'].average_time, dict([('boxes', boxes_this_image)]),
},
{
'boxes': boxes_this_image,
},
)) ))
...@@ -13,11 +13,8 @@ from __future__ import absolute_import ...@@ -13,11 +13,8 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.faster_rcnn.anchor_target import AnchorTarget from seetadet.algo.ssd.data_loader import DataLoader
from lib.faster_rcnn.data_loader import DataLoader from seetadet.algo.ssd.hard_mining import HardMining
from lib.faster_rcnn.proposal import Proposal from seetadet.algo.ssd.multibox import MultiBoxMatch
from lib.faster_rcnn.proposal_target import ProposalTarget from seetadet.algo.ssd.multibox import MultiBoxTarget
from lib.faster_rcnn.utils import generate_grid_anchors from seetadet.algo.ssd.priorbox import PriorBox
from lib.faster_rcnn.utils import map_blobs_to_outputs
from lib.faster_rcnn.utils import map_rois_to_levels
from lib.faster_rcnn.utils import map_returns_to_blobs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from seetadet.algo.ssd import data_transformer
from seetadet.core.config import cfg
from seetadet.datasets.factory import get_dataset
from seetadet.utils import logger
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
dataset = get_dataset(cfg.TRAIN.DATASET)
if cfg.USE_DALI:
from seetadet.dali import ssd_pipeline as pipe
self.iterator = pipe.new_iterator(dataset.source)
else:
self.iterator = Iterator(**{
'dataset': dataset.cls,
'source': dataset.source,
'classes': dataset.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_THREADS - 1,
})
def __call__(self):
outputs = self.iterator.next()
if isinstance(outputs['data'], np.ndarray):
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class Iterator(object):
"""Iterator to return the batch of data."""
def __init__(self, **kwargs):
super(Iterator, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and \
kwargs.get('phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 32)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.q_in = mp.Queue(num_batches * self._batch_size)
self.q_out = mp.Queue(num_batches * self._batch_size)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
part_idx=part_idx, num_parts=num_parts, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.q_in
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
p = data_transformer.DataTransformer(**kwargs)
p._seed += (i + rank * self._num_transformers)
p.q_in, p.q_out = self.q_in, self.q_out
p.start()
self._transformers.append(p)
time.sleep(0.1)
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for p in processes:
p.terminate()
p.join()
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def next(self):
"""Return the next batch of data."""
return self.__next__()
def __iter__(self):
"""Return the iterator self."""
return self
def __next__(self):
"""Return the next batch of data."""
n = cfg.TRAIN.IMS_PER_BATCH
h = w = cfg.TRAIN.SCALES[0]
boxes_to_pack = []
image, boxes = self.q_out.get()
images = np.zeros((n, h, w, 3), image.dtype)
for i in range(n):
images[i] = image
gt_boxes = np.zeros((boxes.shape[0], boxes.shape[1] + 1), 'float32')
gt_boxes[:, :boxes.shape[1]], gt_boxes[:, -1] = boxes, i
boxes_to_pack.append(gt_boxes)
if i != (cfg.TRAIN.IMS_PER_BATCH - 1):
image, boxes = self.q_out.get()
boxes_to_pack = np.concatenate(boxes_to_pack)
return {'data': images, 'gt_boxes': boxes_to_pack}
...@@ -14,19 +14,18 @@ from __future__ import division ...@@ -14,19 +14,18 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import multiprocessing import multiprocessing
import cv2
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.algo.ssd import transforms
from lib.datasets.example import Example from seetadet.core.config import cfg
from lib.ssd import transforms from seetadet.datasets.example import Example
from lib.utils import boxes as box_util from seetadet.utils import boxes as box_util
class DataTransformer(multiprocessing.Process): class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(DataTransformer, self).__init__() super(DataTransformer, self).__init__()
self._scale = cfg.TRAIN.SCALES[0]
self._seed = cfg.RNG_SEED self._seed = cfg.RNG_SEED
self._mirror = cfg.TRAIN.USE_FLIPPED self._mirror = cfg.TRAIN.USE_FLIPPED
self._use_diff = cfg.TRAIN.USE_DIFF self._use_diff = cfg.TRAIN.USE_DIFF
...@@ -107,14 +106,15 @@ class DataTransformer(multiprocessing.Process): ...@@ -107,14 +106,15 @@ class DataTransformer(multiprocessing.Process):
gt_boxes = np.empty((roi_dict['gt_classes'].size, 5), 'float32') gt_boxes = np.empty((roi_dict['gt_classes'].size, 5), 'float32')
gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes'] gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']
if len(gt_boxes) == 0:
# Ignore the non-object image
return img, gt_boxes
# Distort => Expand => Sample => Resize # Distort => Expand => Sample => Resize
img, gt_boxes = self.augment_image(img, gt_boxes) img, gt_boxes = self.augment_image(img, gt_boxes)
# Restore to the blob scale # Restore to the blob scale
gt_boxes[:, 0] *= cfg.SSD.RESIZE.WIDTH gt_boxes[:, :4] *= self._scale
gt_boxes[:, 1] *= cfg.SSD.RESIZE.HEIGHT
gt_boxes[:, 2] *= cfg.SSD.RESIZE.WIDTH
gt_boxes[:, 3] *= cfg.SSD.RESIZE.HEIGHT
# Post-Process for image # Post-Process for image
if img.dtype == 'uint16': if img.dtype == 'uint16':
......
...@@ -15,47 +15,43 @@ from __future__ import print_function ...@@ -15,47 +15,43 @@ from __future__ import print_function
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.utils.framework import new_tensor from seetadet.utils.env import new_tensor
class HardMining(object): class HardMining(object):
def __call__(self, prob_wide, labels_wide, overlaps_wide): def __call__(self, prob, labels, overlaps):
prob_wide = prob_wide.numpy(True) label_shape, label_size = labels.shape, labels.size
prob = prob.numpy().reshape((label_size, -1))
labels, overlaps = labels.flatten(), overlaps.flatten()
neg_ovr = cfg.SSD.OHEM.NEG_OVERLAP neg_ovr = cfg.SSD.OHEM.NEG_OVERLAP
neg_ratio = cfg.SSD.OHEM.NEG_POS_RATIO neg_ratio = cfg.SSD.OHEM.NEG_POS_RATIO
# label ``-1`` will be ignored # label ``-1`` will be ignored
new_labels_wide = -np.ones(labels_wide.shape, 'int64') new_labels = -np.ones(labels.shape, 'int64')
for ix in range(labels_wide.shape[0]):
labels = labels_wide[ix]
overlaps = overlaps_wide[ix]
prob = prob_wide[ix]
loss = np.zeros(labels.shape, 'float32')
inds = np.where(labels >= 0)[0]
loss[inds] = -np.log(
np.maximum(
prob[inds, labels[inds]],
np.finfo(float).eps,
)
)
# Filter negatives cls_loss = -np.log(
fg_inds = np.where(labels > 0)[0] np.maximum(
neg_inds = np.where(labels == 0)[0] prob[np.arange(label_size), labels],
neg_overlaps = overlaps[neg_inds] np.finfo(float).eps,
eligible_neg_inds = np.where(neg_overlaps < neg_ovr)[0] )
neg_inds = neg_inds[eligible_neg_inds] )
# Apply mining on negatives # Filter negatives
neg_loss = loss[neg_inds] fg_inds = np.where(labels > 0)[0]
num_pos, num_neg = len(fg_inds), len(neg_inds) neg_inds = np.where(labels == 0)[0]
num_bg = min(int(num_pos * neg_ratio), num_neg) neg_overlaps = overlaps[neg_inds]
bg_inds = neg_inds[np.argsort(-neg_loss)][:num_bg] eligible_neg_inds = np.where(neg_overlaps < neg_ovr)[0]
new_labels_wide[ix][fg_inds] = labels[fg_inds] # Keep fg indices neg_inds = neg_inds[eligible_neg_inds]
new_labels_wide[ix][bg_inds] = 0 # Use hard negatives as bg indices
# Apply mining on negatives
neg_cls_loss = cls_loss[neg_inds]
num_pos, num_neg = len(fg_inds), len(neg_inds)
num_bg = min(int(num_pos * neg_ratio), num_neg)
bg_inds = neg_inds[np.argsort(-neg_cls_loss)][:num_bg]
new_labels[fg_inds] = labels[fg_inds] # Keep fg indices
new_labels[bg_inds] = 0 # Use hard negatives as bg indices
# Feed labels to compute cls loss # Feed labels to compute cls loss
return {'labels': new_tensor(new_labels_wide)} return {'labels': new_tensor(new_labels.reshape(label_shape))}
...@@ -15,9 +15,9 @@ from __future__ import print_function ...@@ -15,9 +15,9 @@ from __future__ import print_function
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.utils import boxes as box_util from seetadet.utils import boxes as box_util
from lib.utils.framework import new_tensor from seetadet.utils.env import new_tensor
class MultiBoxMatch(object): class MultiBoxMatch(object):
...@@ -47,8 +47,8 @@ class MultiBoxMatch(object): ...@@ -47,8 +47,8 @@ class MultiBoxMatch(object):
# Bipartite matching and assignments # Bipartite matching and assignments
bipartite_inds = overlaps.argmax(0) bipartite_inds = overlaps.argmax(0)
class_assignment = gt_boxes[:, -1] class_assignment = gt_boxes[:, -1]
match_inds_wide[ix][bipartite_inds] = np.arange(num_gt, dtype='int32') match_inds_wide[ix, bipartite_inds] = np.arange(num_gt, dtype='int32')
match_labels_wide[ix][bipartite_inds] = class_assignment match_labels_wide[ix, bipartite_inds] = class_assignment
# Per prediction matching and assignments # Per prediction matching and assignments
# Note that SSD match each prior box for only once # Note that SSD match each prior box for only once
...@@ -56,8 +56,8 @@ class MultiBoxMatch(object): ...@@ -56,8 +56,8 @@ class MultiBoxMatch(object):
per_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] per_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
gt_assignment = argmax_overlaps[per_inds] gt_assignment = argmax_overlaps[per_inds]
class_assignment = gt_boxes[gt_assignment, -1] class_assignment = gt_boxes[gt_assignment, -1]
match_inds_wide[ix][per_inds] = gt_assignment match_inds_wide[ix, per_inds] = gt_assignment
match_labels_wide[ix][per_inds] = class_assignment match_labels_wide[ix, per_inds] = class_assignment
return { return {
'match_inds': match_inds_wide, 'match_inds': match_inds_wide,
...@@ -82,15 +82,7 @@ class MultiBoxTarget(object): ...@@ -82,15 +82,7 @@ class MultiBoxTarget(object):
num_priors, box_dim = prior_boxes.shape[:] num_priors, box_dim = prior_boxes.shape[:]
gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images) gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
bbox_indices_wide, bbox_anchors_wide, bbox_targets_wide = [], [], []
bbox_targets_wide = np.zeros((num_images, num_priors, box_dim), 'float32')
bbox_inside_weights_wide = np.zeros(bbox_targets_wide.shape, 'float32')
bbox_outside_weights_wide = np.zeros(bbox_targets_wide.shape, 'float32')
# Number of matched boxes(#positive)
n_pos = float(max(len(np.where(match_labels_wide > 0)[0]), 1))
# Multiple by the num images to compensate the smooth l1 loss
bbox_reg_weight = cfg.SSD.BBOX_REG_WEIGHT * num_images / n_pos
for ix in range(num_images): for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix] gt_boxes = gt_boxes_wide[ix]
...@@ -106,17 +98,18 @@ class MultiBoxTarget(object): ...@@ -106,17 +98,18 @@ class MultiBoxTarget(object):
gt_rois = gt_boxes[gt_assignment] gt_rois = gt_boxes[gt_assignment]
# Assign bbox targets # Assign bbox targets
bbox_targets_wide[ix][ex_inds] = \ bbox_anchors_wide.append(ex_rois)
bbox_indices_wide.append(ex_inds + (num_priors * ix))
bbox_targets_wide.append(
box_util.bbox_transform( box_util.bbox_transform(
ex_rois, ex_rois,
gt_rois, gt_rois,
cfg.BBOX_REG_WEIGHTS, cfg.BBOX_REG_WEIGHTS,
) )
bbox_inside_weights_wide[ix, :] = 1. )
bbox_outside_weights_wide[ix][ex_inds] = bbox_reg_weight
return { return {
'bbox_targets': new_tensor(bbox_targets_wide), 'bbox_indices': new_tensor(np.concatenate(bbox_indices_wide)),
'bbox_inside_weights': new_tensor(bbox_inside_weights_wide), 'bbox_anchors': new_tensor(np.concatenate(bbox_anchors_wide).astype('float32')),
'bbox_outside_weights': new_tensor(bbox_outside_weights_wide), 'bbox_targets': new_tensor(np.concatenate(bbox_targets_wide).astype('float32')),
} }
...@@ -15,9 +15,8 @@ from __future__ import print_function ...@@ -15,9 +15,8 @@ from __future__ import print_function
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.algo.ssd.generate_anchors import generate_anchors
from lib.ssd.generate_anchors import generate_anchors from seetadet.core.config import cfg
from lib.utils import logger
class PriorBox(object): class PriorBox(object):
...@@ -29,8 +28,10 @@ class PriorBox(object): ...@@ -29,8 +28,10 @@ class PriorBox(object):
max_sizes = cfg.SSD.MULTIBOX.MAX_SIZES max_sizes = cfg.SSD.MULTIBOX.MAX_SIZES
if len(max_sizes) > 0: if len(max_sizes) > 0:
if len(min_sizes) != len(max_sizes): if len(min_sizes) != len(max_sizes):
logger.fatal('Got {} min sizes and {} max sizes.'.format( raise ValueError(
len(min_sizes), len(max_sizes))) 'Got {} min sizes and {} max sizes.'
.format(len(min_sizes), len(max_sizes))
)
self.strides = cfg.SSD.MULTIBOX.STRIDES self.strides = cfg.SSD.MULTIBOX.STRIDES
aspect_ratios = cfg.SSD.MULTIBOX.ASPECT_RATIOS aspect_ratios = cfg.SSD.MULTIBOX.ASPECT_RATIOS
self.base_anchors = [] self.base_anchors = []
...@@ -44,9 +45,14 @@ class PriorBox(object): ...@@ -44,9 +45,14 @@ class PriorBox(object):
aspect_ratios[i], aspect_ratios[i],
) )
) )
self.grid_anchors = None
def __call__(self, features): def __call__(self, features):
all_anchors = [] if self.grid_anchors is not None:
return self.grid_anchors
self.grid_anchors = []
for i in range(len(self.strides)): for i in range(len(self.strides)):
# 1. Generate base grids # 1. Generate base grids
height, width = features[i].shape[-2:] height, width = features[i].shape[-2:]
...@@ -61,26 +67,17 @@ class PriorBox(object): ...@@ -61,26 +67,17 @@ class PriorBox(object):
# Reshape to (K * A, 4) shifted anchors # Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0] A = self.base_anchors[i].shape[0]
D = self.base_anchors[i].shape[1] D = self.base_anchors[i].shape[1]
if D == 4: shifts = np.vstack((
shifts = np.vstack(( shift_x.ravel(),
shift_x.ravel(), shift_y.ravel(),
shift_y.ravel(), shift_x.ravel(),
shift_x.ravel(), shift_y.ravel())
shift_y.ravel()) ).transpose()
).transpose()
elif D == 5:
shifts = np.vstack((
shift_x.ravel(),
shift_y.ravel(),
shift_x.ravel() * 0,
shift_y.ravel() * 0,
shift_y.ravel() * 0)
).transpose()
else:
raise ValueError('Excepted anchor4d or anchor5d.')
K = shifts.shape[0] # K = map_h * map_w K = shifts.shape[0] # K = map_h * map_w
anchors = (self.base_anchors[i].reshape((1, A, D)) + anchors = (self.base_anchors[i].reshape((1, A, D)) +
shifts.reshape((1, K, D)).transpose((1, 0, 2))) shifts.reshape((1, K, D)).transpose((1, 0, 2)))
anchors = anchors.reshape((K * A, D)).astype(np.float32) anchors = anchors.reshape((K * A, D)).astype(np.float32)
all_anchors.append(anchors) self.grid_anchors.append(anchors)
return np.concatenate(all_anchors, axis=0) self.grid_anchors = np.concatenate(self.grid_anchors)
return self.grid_anchors
...@@ -13,26 +13,30 @@ from __future__ import absolute_import ...@@ -13,26 +13,30 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import types
import cv2 import cv2
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.modeling.detector import new_detector from seetadet.modeling.detector import new_detector
from lib.nms import nms_wrapper from seetadet.utils import boxes as box_util
from lib.utils import boxes as box_util from seetadet.utils import nms as nms_util
from lib.utils import framework from seetadet.utils import time_util
from lib.utils import time_util
def get_images(ims): def get_images(ims):
target_h = cfg.SSD.RESIZE.HEIGHT out_size = cfg.TEST.SCALES[0]
target_w = cfg.SSD.RESIZE.WIDTH
processed_ims, im_scales = [], [] processed_ims, im_scales = [], []
for im in ims: for im in ims:
im_scales.append((float(target_h) / im.shape[0], im_scales.append((float(out_size) / im.shape[0],
float(target_w) / im.shape[1])) float(out_size) / im.shape[1]))
processed_ims.append(cv2.resize(im, (target_w, target_h))) processed_ims.append(
cv2.resize(
im, (out_size, out_size),
interpolation=cv2.INTER_AREA,
))
if ims[0].dtype == 'uint16': if ims[0].dtype == 'uint16':
ims_blob = np.array(processed_ims, dtype='float32') / 256. ims_blob = np.array(processed_ims, dtype='float32') / 256.
else: else:
...@@ -45,34 +49,33 @@ def ims_detect(detector, ims): ...@@ -45,34 +49,33 @@ def ims_detect(detector, ims):
data, im_scales = get_images(ims) data, im_scales = get_images(ims)
# Do Forward # Do Forward
if not hasattr(detector, 'graph'): data = torch.from_numpy(data)
with framework.new_workspace().as_default():
with torch.no_grad(): if not hasattr(detector, 'script_forward'):
with torch.jit.Tracer(retain_ops=True): def script_forward(self, data):
inputs = {'data': torch.from_numpy(data)} return self.forward({'data': data})
outputs = detector.forward(inputs) detector.script_forward = torch.jit.trace(
detector.graph = \ func=types.MethodType(script_forward, detector),
framework.Graph(inputs, { example_inputs=[data],
'cls_prob': outputs['cls_prob'], )
'bbox_pred': outputs['bbox_pred']
}, {'prior_boxes': outputs['prior_boxes']}) outputs = detector.script_forward(data)
outputs = detector.graph(data=data) cls_prob = outputs['cls_prob'].numpy()
bbox_pred = outputs['bbox_pred'].numpy()
# Decode results # Decode results
batch_boxes = [] batch_boxes = []
for i in range(len(im_scales)): for i in range(len(im_scales)):
boxes = box_util.bbox_transform_inv( boxes = box_util.bbox_transform_inv(
outputs['prior_boxes'], outputs['prior_boxes'],
outputs['bbox_pred'][i], bbox_pred[i],
cfg.BBOX_REG_WEIGHTS, cfg.BBOX_REG_WEIGHTS,
) )
boxes[:, 0] /= im_scales[i][1] boxes[:, 0::2] /= im_scales[i][1]
boxes[:, 1] /= im_scales[i][0] boxes[:, 1::2] /= im_scales[i][0]
boxes[:, 2] /= im_scales[i][1]
boxes[:, 3] /= im_scales[i][0]
batch_boxes.append(box_util.clip_boxes(boxes, ims[i].shape)) batch_boxes.append(box_util.clip_boxes(boxes, ims[i].shape))
return outputs['cls_prob'], batch_boxes return cls_prob, batch_boxes
def test_net(weights, num_classes, q_in, q_out, device): def test_net(weights, num_classes, q_in, q_out, device):
...@@ -88,7 +91,7 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -88,7 +91,7 @@ def test_net(weights, num_classes, q_in, q_out, device):
indices, raw_images = [], [] indices, raw_images = [], []
for i in range(cfg.TEST.IMS_PER_BATCH): for i in range(cfg.TEST.IMS_PER_BATCH):
idx, raw_image = q_in.get() idx, raw_image = q_in.get()
if raw_image is None: if idx < 0:
must_stop = True must_stop = True
break break
indices.append(idx) indices.append(idx)
...@@ -116,17 +119,16 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -116,17 +119,16 @@ def test_net(weights, num_classes, q_in, q_out, device):
(cls_boxes, cls_scores[:, np.newaxis])) \ (cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False) .astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS: if cfg.TEST.USE_SOFT_NMS:
keep = nms_wrapper.soft_nms( keep = nms_util.soft_nms(
cls_detections, cls_detections,
thresh=cfg.TEST.NMS, thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD, method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA, sigma=cfg.TEST.SOFT_NMS_SIGMA,
) )
else: else:
keep = nms_wrapper.nms( keep = nms_util.nms(
cls_detections, cls_detections,
thresh=cfg.TEST.NMS, thresh=cfg.TEST.NMS,
force_cpu=True,
) )
cls_detections = cls_detections[keep, :] cls_detections = cls_detections[keep, :]
boxes_this_image.append(cls_detections) boxes_this_image.append(cls_detections)
...@@ -134,11 +136,7 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -134,11 +136,7 @@ def test_net(weights, num_classes, q_in, q_out, device):
q_out.put(( q_out.put((
indices[i], indices[i],
{ dict([('im_detect', _t['im_detect'].average_time),
'im_detect': _t['im_detect'].average_time, ('misc',_t['misc'].average_time)]),
'misc': _t['misc'].average_time, dict([('boxes', boxes_this_image)]),
},
{
'boxes': boxes_this_image,
},
)) ))
...@@ -22,9 +22,10 @@ import PIL.ImageEnhance ...@@ -22,9 +22,10 @@ import PIL.ImageEnhance
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.utils import boxes as box_util from seetadet.utils import boxes as box_util
from lib.utils import logger from seetadet.utils import boxes_v2 as box_util_v2
from seetadet.utils import logger
class Compose(object): class Compose(object):
...@@ -40,43 +41,35 @@ class Compose(object): ...@@ -40,43 +41,35 @@ class Compose(object):
class Distort(object): class Distort(object):
def __init__(self): def __init__(self):
self._brightness_prob = cfg.SSD.DISTORT.BRIGHTNESS_PROB self._prob = 0.5
self._contrast_prob = cfg.SSD.DISTORT.CONTRAST_PROB self._transforms = [
self._saturation_prob = cfg.SSD.DISTORT.SATURATION_PROB (PIL.ImageEnhance.Brightness, self._prob),
(PIL.ImageEnhance.Contrast, self._prob),
(PIL.ImageEnhance.Color, self._prob),
]
def apply(self, img, boxes=None): def apply(self, img, boxes=None):
img = PIL.Image.fromarray(img) if self._prob > 0:
transforms = [ img = PIL.Image.fromarray(img)
(PIL.ImageEnhance.Brightness, self._brightness_prob), for transform_fn, prob in self._transforms:
(PIL.ImageEnhance.Contrast, self._contrast_prob), if npr.uniform() < prob:
(PIL.ImageEnhance.Color, self._saturation_prob), img = transform_fn(img)
] img = img.enhance(1. + npr.uniform(-.4, .4))
np.random.shuffle(transforms) return np.array(img), boxes
for transform_fn, prob in transforms: return img, boxes
if npr.uniform() < prob:
img = transform_fn(img)
img = img.enhance(1. + npr.uniform(-.4, .4))
return np.array(img), boxes
class Expand(object): class Expand(object):
def __init__(self): def __init__(self):
self._expand_prob = cfg.SSD.EXPAND.PROB self._max_ratio = 1. / cfg.TRAIN.RANDOM_SCALES[0]
self._max_ratio = cfg.SSD.EXPAND.MAX_RATIO self._expand_prob = 0.5 if self._max_ratio > 1 else 0
if self._max_ratio < 1.0:
logger.fatal(
'The max expand ratio must >= 1, got {}'
.format(self._max_ratio)
)
def apply(self, img, boxes=None): def apply(self, img, boxes=None):
prob = npr.uniform() prob = npr.uniform()
if prob > self._expand_prob: if prob > self._expand_prob:
return img, boxes return img, boxes
ratio = npr.uniform(1., self._max_ratio)
if ratio == 1:
return img, boxes
ratio = npr.uniform(1., self._max_ratio)
im_h, im_w = img.shape[:2] im_h, im_w = img.shape[:2]
expand_h, expand_w = int(im_h * ratio), int(im_w * ratio) expand_h, expand_w = int(im_h * ratio), int(im_w * ratio)
h_off = int(math.floor(npr.uniform(0., expand_h - im_h))) h_off = int(math.floor(npr.uniform(0., expand_h - im_h)))
...@@ -99,19 +92,14 @@ class Expand(object): ...@@ -99,19 +92,14 @@ class Expand(object):
class Resize(object): class Resize(object):
def __init__(self): def __init__(self):
self._target_size = ( self._target_size = (cfg.TRAIN.SCALES[0],) * 2
cfg.SSD.RESIZE.WIDTH, self._interp_mode = [
cfg.SSD.RESIZE.HEIGHT, cv2.INTER_LINEAR,
) cv2.INTER_AREA,
interp_list = { cv2.INTER_NEAREST,
'LINEAR': cv2.INTER_LINEAR, cv2.INTER_CUBIC,
'AREA': cv2.INTER_AREA, cv2.INTER_LANCZOS4,
'NEAREST': cv2.INTER_NEAREST, ]
'CUBIC': cv2.INTER_CUBIC,
'LANCZOS4': cv2.INTER_LANCZOS4,
}
interp_mode = cfg.SSD.RESIZE.INTERP_MODE
self._interp_mode = [interp_list[key] for key in interp_mode]
def apply(self, img, boxes): def apply(self, img, boxes):
rand = npr.randint(len(self._interp_mode)) rand = npr.randint(len(self._interp_mode))
...@@ -144,7 +132,10 @@ class Sample(object): ...@@ -144,7 +132,10 @@ class Sample(object):
@classmethod @classmethod
def _compute_overlaps(cls, rand_box, gt_boxes): def _compute_overlaps(cls, rand_box, gt_boxes):
return box_util.iou(np.expand_dims(rand_box, 0), gt_boxes[:, 0:4]) return box_util_v2.iou(
np.expand_dims(rand_box, 0),
gt_boxes[:, 0:4],
)
@classmethod @classmethod
def _generate_sample(cls, sample_param): def _generate_sample(cls, sample_param):
...@@ -162,18 +153,27 @@ class Sample(object): ...@@ -162,18 +153,27 @@ class Sample(object):
h_off = npr.uniform(0., 1. - bbox_h) h_off = npr.uniform(0., 1. - bbox_h)
return np.array([w_off, h_off, w_off + bbox_w, h_off + bbox_h]) return np.array([w_off, h_off, w_off + bbox_w, h_off + bbox_h])
def _check_satisfy(self, sample_box, gt_boxes, constraint): def _check_center(self, sample_box, gt_boxes):
ctr_x = (gt_boxes[:, 2] + gt_boxes[:, 0]) / 2.0
ctr_y = (gt_boxes[:, 3] + gt_boxes[:, 1]) / 2.0
# Keep the ground-truth box whose center is in the sample box
# Implement ``EmitConstraint.CENTER`` in the original SSD
keep_inds = np.where((ctr_x >= sample_box[0]) & (ctr_x <= sample_box[2]) &
(ctr_y >= sample_box[1]) & (ctr_y <= sample_box[3]))[0]
return len(keep_inds) > 0
def _check_overlap(self, sample_box, gt_boxes, constraint):
min_overlap = constraint.get('min_overlap', None) min_overlap = constraint.get('min_overlap', None)
max_overlap = constraint.get('max_overlap', None) max_overlap = constraint.get('max_overlap', None)
if min_overlap is None and \ if min_overlap is None and \
max_overlap is None: max_overlap is None:
return True return True
max_overlap = self._compute_overlaps(sample_box, gt_boxes).max() ovr = self._compute_overlaps(sample_box, gt_boxes).max()
if min_overlap is not None: if min_overlap is not None:
if max_overlap < min_overlap: if ovr < min_overlap:
return False return False
if max_overlap is not None: if max_overlap is not None:
if max_overlap > max_overlap: if ovr > max_overlap:
return False return False
return True return True
...@@ -187,9 +187,10 @@ class Sample(object): ...@@ -187,9 +187,10 @@ class Sample(object):
sample_box = self._generate_sample(sampler) sample_box = self._generate_sample(sampler)
if sampler['min_overlap'] != 0. or \ if sampler['min_overlap'] != 0. or \
sampler['max_overlap'] != 1.: sampler['max_overlap'] != 1.:
ok = self._check_satisfy(sample_box, gt_boxes, sampler) if not self._check_overlap(sample_box, gt_boxes, sampler):
if not ok:
continue continue
if not self._check_center(sample_box, gt_boxes):
continue
found += 1 found += 1
sample_boxes.append(sample_box) sample_boxes.append(sample_box)
return sample_boxes return sample_boxes
...@@ -206,8 +207,6 @@ class Sample(object): ...@@ -206,8 +207,6 @@ class Sample(object):
if gt_boxes is not None: if gt_boxes is not None:
ctr_x = (gt_boxes[:, 2] + gt_boxes[:, 0]) / 2.0 ctr_x = (gt_boxes[:, 2] + gt_boxes[:, 0]) / 2.0
ctr_y = (gt_boxes[:, 3] + gt_boxes[:, 1]) / 2.0 ctr_y = (gt_boxes[:, 3] + gt_boxes[:, 1]) / 2.0
# Keep the ground-truth box whose center is in the sample box
# Implement ``EmitConstraint.CENTER`` in the original SSD
keep_inds = np.where((ctr_x >= rand_box[0]) & (ctr_x <= rand_box[2]) & keep_inds = np.where((ctr_x >= rand_box[0]) & (ctr_x <= rand_box[2]) &
(ctr_y >= rand_box[1]) & (ctr_y <= rand_box[3]))[0] (ctr_y >= rand_box[1]) & (ctr_y <= rand_box[3]))[0]
gt_boxes = gt_boxes[keep_inds] gt_boxes = gt_boxes[keep_inds]
......
...@@ -19,11 +19,14 @@ sys.path.append('../../') ...@@ -19,11 +19,14 @@ sys.path.append('../../')
import cv2 import cv2
import numpy as np import numpy as np
from lib.ssd import transforms from seetadet.algo.ssd import transforms
from seetadet.core.config import cfg
if __name__ == '__main__': if __name__ == '__main__':
np.random.seed(3) np.random.seed(3)
cfg.TRAIN.SCALES = [300]
cfg.TRAIN.RANDOM_SCALES = [0.25, 1.00]
augmentor = transforms.Compose( augmentor = transforms.Compose(
transforms.Distort(), transforms.Distort(),
...@@ -36,8 +39,6 @@ if __name__ == '__main__': ...@@ -36,8 +39,6 @@ if __name__ == '__main__':
img = cv2.imread('cat.jpg') img = cv2.imread('cat.jpg')
boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32) boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
img, boxes = augmentor(img, boxes) img, boxes = augmentor(img, boxes)
if len(boxes) < 1:
continue
for box in boxes: for box in boxes:
x1 = int(box[0] * img.shape[1]) x1 = int(box[0] * img.shape[1])
y1 = int(box[1] * img.shape[0]) y1 = int(box[1] * img.shape[0])
......
...@@ -20,7 +20,7 @@ from __future__ import print_function ...@@ -20,7 +20,7 @@ from __future__ import print_function
import os.path as osp import os.path as osp
import numpy as np import numpy as np
from lib.utils.attrdict import AttrDict from seetadet.utils.attrdict import AttrDict
cfg = __C = AttrDict() cfg = __C = AttrDict()
...@@ -38,41 +38,27 @@ __C.TRAIN = AttrDict() ...@@ -38,41 +38,27 @@ __C.TRAIN = AttrDict()
# Initialize network with weights from this file # Initialize network with weights from this file
__C.TRAIN.WEIGHTS = '' __C.TRAIN.WEIGHTS = ''
# Database to train # Dataset to train
__C.TRAIN.DATABASE = '' __C.TRAIN.DATASET = ''
# The number of workers to transform data # The number of threads to load train data
__C.TRAIN.NUM_WORKERS = 3 __C.TRAIN.NUM_THREADS = 4
# Scales to use during training (can list multiple scales) # Scales to use during training (can list multiple scales)
# Each scale is the pixel size of an image's shortest side # Each scale is the pixel size of an image's shortest side
__C.TRAIN.SCALES = (600,) __C.TRAIN.SCALES = (300,)
# Max pixel size of the longest side of a scaled input image # Max pixel size of the longest side of a scaled input image
# A square will be used if value < 1 # A square will be used if value < 1
__C.TRAIN.MAX_SIZE = 1000 __C.TRAIN.MAX_SIZE = 0
# Images to use per mini-batch # Images to use per mini-batch
__C.TRAIN.IMS_PER_BATCH = 1 __C.TRAIN.IMS_PER_BATCH = 1
# Minibatch size (number of regions of interest [ROIs]) # Use shuffled images during training?
__C.TRAIN.BATCH_SIZE = 128
# Fraction of minibatch that is labeled foreground (i.e. class > 0)
__C.TRAIN.FG_FRACTION = 0.25
# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
__C.TRAIN.FG_THRESH = 0.5
# Overlap threshold for a ROI to be considered background (class = 0 if
# overlap in [LO, HI))
__C.TRAIN.BG_THRESH_HI = 0.5
__C.TRAIN.BG_THRESH_LO = 0.0
# Use shuffle after each epoch
__C.TRAIN.USE_SHUFFLE = True __C.TRAIN.USE_SHUFFLE = True
# The number of chunks to shuffle # The number of shuffle chunks
__C.TRAIN.NUM_SHUFFLE_CHUNKS = 0 __C.TRAIN.SHUFFLE_CHUNKS = 0
# Use horizontally-flipped images during training? # Use horizontally-flipped images during training?
__C.TRAIN.USE_FLIPPED = True __C.TRAIN.USE_FLIPPED = True
...@@ -80,17 +66,25 @@ __C.TRAIN.USE_FLIPPED = True ...@@ -80,17 +66,25 @@ __C.TRAIN.USE_FLIPPED = True
# Use the difficult(under occlusion) objects # Use the difficult(under occlusion) objects
__C.TRAIN.USE_DIFF = True __C.TRAIN.USE_DIFF = True
# Overlap required between a ROI and ground-truth box in order for that ROI to # Range to jitter the image scales
# be used as a bounding-box regression training example __C.TRAIN.RANDOM_SCALES = [1., 1.]
__C.TRAIN.BBOX_THRESH = 0.5
# If True, randomly scale the image by scale range
__C.TRAIN.USE_SCALE_JITTER = False
__C.TRAIN.SCALE_JITTER_RANGE = [0.75, 1.0]
# If True, randomly distort the image by brightness, contrast, and saturation # If True, randomly distort the image by brightness, contrast, and saturation
__C.TRAIN.USE_COLOR_JITTER = False __C.TRAIN.USE_COLOR_JITTER = False
# Mini-batch size (#RoIs) for two stage detector
__C.TRAIN.BATCH_SIZE = 128
# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
__C.TRAIN.FG_THRESH = 0.5
# Fraction of mini-batch that is labeled foreground (i.e. class > 0)
__C.TRAIN.FG_FRACTION = 0.25
# Overlap threshold for a ROI to be considered background (class = 0 if
# overlap in [LO, HI))
__C.TRAIN.BG_THRESH_HI = 0.5
__C.TRAIN.BG_THRESH_LO = 0.0
# IOU >= thresh: positive example # IOU >= thresh: positive example
__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7 __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
# IOU < thresh: negative example # IOU < thresh: negative example
...@@ -123,20 +117,19 @@ __C.TRAIN.RPN_STRADDLE_THRESH = 0 ...@@ -123,20 +117,19 @@ __C.TRAIN.RPN_STRADDLE_THRESH = 0
__C.TEST = AttrDict() __C.TEST = AttrDict()
# Database to test # Dataset to test
__C.TEST.DATABASE = '' __C.TEST.DATASET = ''
# Original json ground-truth file to use # Original json ground-truth file to use
# Records in the Database file will be used instead
__C.TEST.JSON_FILE = '' __C.TEST.JSON_FILE = ''
# Scales to use during testing (can list multiple scales) # Scales to use during testing (can list multiple scales)
# Each scale is the pixel size of an image's shortest side # Each scale is the pixel size of an image's shortest side
__C.TEST.SCALES = (600,) __C.TEST.SCALES = (300,)
# Max pixel size of the longest side of a scaled input image # Max pixel size of the longest side of a scaled input image
# A square will be used if value < 1 # A square will be used if value < 1
__C.TEST.MAX_SIZE = 1000 __C.TEST.MAX_SIZE = 0
# Images to use per mini-batch # Images to use per mini-batch
__C.TEST.IMS_PER_BATCH = 1 __C.TEST.IMS_PER_BATCH = 1
...@@ -217,10 +210,20 @@ __C.MODEL.CLASSES = ['__background__'] ...@@ -217,10 +210,20 @@ __C.MODEL.CLASSES = ['__background__']
# The value of ``K`` is usually set to 2 # The value of ``K`` is usually set to 2
__C.MODEL.FREEZE_AT = 2 __C.MODEL.FREEZE_AT = 2
# The variant of ReLU activation
# ('ReLU', 'ReLU6')
__C.MODEL.RELU_VARIANT = 'ReLU'
# Setting of focal loss # Setting of focal loss
__C.MODEL.FOCAL_LOSS_ALPHA = 0.25 __C.MODEL.FOCAL_LOSS_ALPHA = 0.25
__C.MODEL.FOCAL_LOSS_GAMMA = 2.0 __C.MODEL.FOCAL_LOSS_GAMMA = 2.0
# The optional loss for bbox regression
# ('NORM', 'IOU')
__C.MODEL.REG_LOSS_TYPE = 'NORM'
# Weight for bbox regression loss
__C.MODEL.REG_LOSS_WEIGHT = 1.
# Stride of the coarsest Feature level # Stride of the coarsest Feature level
# This is needed so the input can be padded properly # This is needed so the input can be padded properly
__C.MODEL.COARSEST_STRIDE = 32 __C.MODEL.COARSEST_STRIDE = 32
...@@ -268,9 +271,6 @@ __C.RETINANET.ANCHOR_SCALE = 4 ...@@ -268,9 +271,6 @@ __C.RETINANET.ANCHOR_SCALE = 4
# NOTE: this doesn't include the last conv for logits # NOTE: this doesn't include the last conv for logits
__C.RETINANET.NUM_CONVS = 4 __C.RETINANET.NUM_CONVS = 4
# Weight for bbox regression loss
__C.RETINANET.BBOX_REG_WEIGHT = 1.
# During inference, #locs to select based on cls score before NMS is performed # During inference, #locs to select based on cls score before NMS is performed
__C.RETINANET.PRE_NMS_TOP_N = 5000 __C.RETINANET.PRE_NMS_TOP_N = 5000
...@@ -362,9 +362,6 @@ __C.SSD = AttrDict() ...@@ -362,9 +362,6 @@ __C.SSD = AttrDict()
# NOTE: this doesn't include the last conv for logits # NOTE: this doesn't include the last conv for logits
__C.SSD.NUM_CONVS = 0 __C.SSD.NUM_CONVS = 0
# Weight for bbox regression loss
__C.SSD.BBOX_REG_WEIGHT = 1.
# MultiBox configs # MultiBox configs
__C.SSD.MULTIBOX = AttrDict() __C.SSD.MULTIBOX = AttrDict()
__C.SSD.MULTIBOX.STRIDES = [] __C.SSD.MULTIBOX.STRIDES = []
...@@ -379,23 +376,6 @@ __C.SSD.OHEM.NEG_OVERLAP = 0.5 ...@@ -379,23 +376,6 @@ __C.SSD.OHEM.NEG_OVERLAP = 0.5
# The ratio used in hard example mining # The ratio used in hard example mining
__C.SSD.OHEM.NEG_POS_RATIO = 3.0 __C.SSD.OHEM.NEG_POS_RATIO = 3.0
# Distort the image?
__C.SSD.DISTORT = AttrDict()
__C.SSD.DISTORT.BRIGHTNESS_PROB = 0.5
__C.SSD.DISTORT.CONTRAST_PROB = 0.5
__C.SSD.DISTORT.SATURATION_PROB = 0.5
# Expand the image?
__C.SSD.EXPAND = AttrDict()
__C.SSD.EXPAND.PROB = 0.5
__C.SSD.EXPAND.MAX_RATIO = 4.0
# Resize the image?
__C.SSD.RESIZE = AttrDict()
__C.SSD.RESIZE.HEIGHT = 300
__C.SSD.RESIZE.WIDTH = 300
__C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
# Samplers # Samplers
# Format as (min_scale, max_scale, # Format as (min_scale, max_scale,
# min_aspect_ratio, max_aspect_ratio, # min_aspect_ratio, max_aspect_ratio,
...@@ -486,7 +466,7 @@ __C.SOLVER.LR_POLICY = 'steps_with_decay' ...@@ -486,7 +466,7 @@ __C.SOLVER.LR_POLICY = 'steps_with_decay'
# Momentum to use with SGD # Momentum to use with SGD
__C.SOLVER.MOMENTUM = 0.9 __C.SOLVER.MOMENTUM = 0.9
# L2 regularization hyper parameters # L2 regularization for weight parameters
__C.SOLVER.WEIGHT_DECAY = 0.0001 __C.SOLVER.WEIGHT_DECAY = 0.0001
# L2 norm factor for clipping gradients # L2 norm factor for clipping gradients
__C.SOLVER.CLIP_NORM = -1.0 __C.SOLVER.CLIP_NORM = -1.0
...@@ -505,6 +485,9 @@ __C.NUM_GPUS = 1 ...@@ -505,6 +485,9 @@ __C.NUM_GPUS = 1
# Use NCCL for all reduce, otherwise use cuda-aware mpi # Use NCCL for all reduce, otherwise use cuda-aware mpi
__C.USE_NCCL = True __C.USE_NCCL = True
# Use DALI to load the batch of data instead of original pipeline
__C.USE_DALI = False
# Hosts for Inter-Machine communication # Hosts for Inter-Machine communication
__C.HOSTS = [] __C.HOSTS = []
...@@ -531,9 +514,6 @@ __C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data')) ...@@ -531,9 +514,6 @@ __C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
# Place outputs under an experiments directory # Place outputs under an experiments directory
__C.EXP_DIR = '' __C.EXP_DIR = ''
# Use GPU implementation of non-maximum suppression
__C.USE_GPU_NMS = True
# Default GPU device id # Default GPU device id
__C.GPU_ID = 0 __C.GPU_ID = 0
......
...@@ -18,8 +18,8 @@ import shutil ...@@ -18,8 +18,8 @@ import shutil
import time import time
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.core.config import cfg_from_file from seetadet.core.config import cfg_from_file
class Coordinator(object): class Coordinator(object):
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import functools
class Registry(object):
"""The base registry class."""
def __init__(self, name):
self._name = name
self._registry = collections.OrderedDict()
def has(self, key):
return key in self._registry
def register(self, name, func=None, **kwargs):
def decorated(inner_function):
for key in (name if isinstance(
name, (tuple, list)) else [name]):
if self.has(key):
raise KeyError(
'`%s` has been registered in %s.'
% (key, self._name)
)
self._registry[key] = functools.partial(
inner_function, **kwargs)
if func is not None:
return decorated(func)
return decorated
def get(self, name):
if not self.has(name):
raise KeyError(
"`%s` is not registered in <%s>."
% (name, self._name)
)
return self._registry[name]
def try_get(self, name):
if self.has(name):
return self.get(name)
return None
backbones = Registry('backbones')
models = Registry('models')
...@@ -20,9 +20,9 @@ import os ...@@ -20,9 +20,9 @@ import os
import cv2 import cv2
import dragon import dragon
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.datasets.example import Example from seetadet.datasets.example import Example
from lib.datasets.factory import get_imdb from seetadet.datasets.factory import get_dataset
class _Server(object): class _Server(object):
...@@ -50,13 +50,13 @@ class _Server(object): ...@@ -50,13 +50,13 @@ class _Server(object):
class TestServer(_Server): class TestServer(_Server):
def __init__(self, output_dir): def __init__(self, output_dir):
super(TestServer, self).__init__(output_dir) super(TestServer, self).__init__(output_dir)
self.imdb = get_imdb(cfg.TEST.DATABASE) self.dataset = get_dataset(cfg.TEST.DATASET)
self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE) self.dataset.competition_mode(cfg.TEST.COMPETITION_MODE)
self.classes = self.imdb.classes self.classes = self.dataset.classes
self.num_images = self.imdb.num_images self.num_images = self.dataset.num_images
self.num_classes = self.imdb.num_classes self.num_classes = self.dataset.num_classes
self.data_reader = dragon.io.DataReader( self.data_reader = dragon.io.DataReader(
dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source)) dataset=self.dataset.cls, source=self.dataset.source)
self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH * 5) self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH * 5)
self.data_reader.start() self.data_reader.start()
self.gt_recs = collections.OrderedDict() self.gt_recs = collections.OrderedDict()
...@@ -81,16 +81,16 @@ class TestServer(_Server): ...@@ -81,16 +81,16 @@ class TestServer(_Server):
def evaluate_detections(self, all_boxes): def evaluate_detections(self, all_boxes):
if cfg.TEST.PROTOCOL == 'dump': if cfg.TEST.PROTOCOL == 'dump':
self.imdb.dump_detections(all_boxes, self.output_dir) self.dataset.dump_detections(all_boxes, self.output_dir)
else: else:
self.imdb.evaluate_detections( self.dataset.evaluate_detections(
all_boxes, all_boxes,
self.get_records(), self.get_records(),
self.output_dir, self.output_dir,
) )
def evaluate_segmentations(self, all_boxes, all_masks): def evaluate_segmentations(self, all_boxes, all_masks):
self.imdb.evaluate_segmentations( self.dataset.evaluate_segmentations(
all_boxes, all_boxes,
all_masks, all_masks,
self.get_records(), self.get_records(),
...@@ -101,7 +101,7 @@ class TestServer(_Server): ...@@ -101,7 +101,7 @@ class TestServer(_Server):
class InferServer(_Server): class InferServer(_Server):
def __init__(self, output_dir): def __init__(self, output_dir):
super(InferServer, self).__init__(output_dir) super(InferServer, self).__init__(output_dir)
self.images_dir = cfg.TEST.DATABASE self.images_dir = cfg.TEST.DATASET
self.images = os.listdir(self.images_dir) self.images = os.listdir(self.images_dir)
self.classes = cfg.MODEL.CLASSES self.classes = cfg.MODEL.CLASSES
self.num_images = len(self.images) self.num_images = len(self.images)
......
...@@ -18,9 +18,9 @@ import multiprocessing ...@@ -18,9 +18,9 @@ import multiprocessing
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.utils import time_util from seetadet.utils import time_util
from lib.utils.vis import vis_one_image from seetadet.utils.vis import vis_one_image
def run_test_net(checkpoint, server, devices): def run_test_net(checkpoint, server, devices):
...@@ -30,8 +30,8 @@ def run_test_net(checkpoint, server, devices): ...@@ -30,8 +30,8 @@ def run_test_net(checkpoint, server, devices):
devices = devices if devices else [cfg.GPU_ID] devices = devices if devices else [cfg.GPU_ID]
num_workers = len(devices) num_workers = len(devices)
test_fn = importlib.import_module( test_module = 'seetadet.algo.%s.test' % cfg.MODEL.TYPE
'lib.%s.test' % cfg.MODEL.TYPE).test_net test_fn = importlib.import_module(test_module).test_net
_t = time_util.new_timers('im_detect', 'mask_detect', 'misc') _t = time_util.new_timers('im_detect', 'mask_detect', 'misc')
......
...@@ -22,11 +22,11 @@ import os ...@@ -22,11 +22,11 @@ import os
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.solver.sgd import SGDSolver from seetadet.solver.sgd import SGDSolver
from lib.utils import logger from seetadet.utils import logger
from lib.utils import time_util from seetadet.utils import time_util
from lib.utils.stats import SmoothedValue from seetadet.utils.stats import SmoothedValue
class SolverWrapper(object): class SolverWrapper(object):
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.dali as dali
import numpy as np
from seetadet.core.config import cfg
class DataReader(dali.ops.KPLRecordReader):
def __init__(
self,
path,
features,
pipeline,
shard_id=0,
num_shards=1,
shuffle_after_epoch=False,
shuffle_chunks=0,
aspect_grouping=False,
):
super(DataReader, self).__init__(
path=path,
features=features,
pipeline=pipeline,
shard_id=shard_id,
num_shards=num_shards,
shuffle_after_epoch=shuffle_after_epoch,
shuffle_chunks=shuffle_chunks,
)
self._aspect_grouping = aspect_grouping
self._class_to_ind = dict(zip(
cfg.MODEL.CLASSES,
range(len(cfg.MODEL.CLASSES))
))
self._queue1, self._queue2 = [], []
def feed_inputs(self):
if not self._aspect_grouping:
feed_dict = collections.defaultdict(list)
for i in range(self._pipe.batch_size):
while True:
example = self._buffer.get()
if len(example['object']) > 0:
break
data = self.example_to_data(example)
for k, v in data.items():
feed_dict[k].append(v)
for k, v in self.features.items():
self._pipe.feed_input(self.features[k], feed_dict[k])
else:
batch_size = self._pipe.batch_size
while True:
batch_data = None
if len(self._queue1) >= batch_size:
batch_data = self._queue1[:batch_size]
self._queue1 = self._queue1[batch_size:]
elif len(self._queue2) >= batch_size:
batch_data = self._queue2[:batch_size]
self._queue2 = self._queue2[batch_size:]
if batch_data is not None:
feed_dict = collections.defaultdict(list)
for data in batch_data:
for k, v in data.items():
feed_dict[k].append(v)
for k, v in self.features.items():
self._pipe.feed_input(self.features[k], feed_dict[k])
break
while True:
example = self._buffer.get()
if len(example['object']) > 0:
break
data = self.example_to_data(example)
ratio = float(data['shape'][0]) / data['shape'][1]
if ratio > 1:
self._queue1.append(data)
else:
self._queue2.append(data)
def example_to_data(self, example):
bbox_data, bbox_ratio, bbox_label = [], [], []
h, w, c = example['height'], example['width'], example['depth']
for obj in example['object']:
x1 = float(max(obj['xmin'], 0))
y1 = float(max(obj['ymin'], 0))
x2 = float(min(obj['xmax'], w - 1))
y2 = float(min(obj['ymax'], h - 1))
bbox_data.append([x1, y1, x2, y2])
bbox_ratio.append([x1 / w, y1 / h, x2 / w, y2 / h])
bbox_label.append(self._class_to_ind[obj['name']])
return {
'image': example['content'],
'shape': np.array([h, w, c], 'int64'),
'bbox/data': np.array(bbox_data, 'float32'),
'bbox/ratio': np.array(bbox_ratio, 'float32'),
'bbox/label': np.array(bbox_label, 'int32')
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from dragon.vm import dali
from dragon.vm.dali.plugin.pytorch import DALIGenericIterator
from seetadet.core.config import cfg
from seetadet.dali.data_reader import DataReader
class Pipeline(dali.Pipeline):
def __init__(self, source):
super(Pipeline, self).__init__(
batch_size=cfg.TRAIN.IMS_PER_BATCH,
num_threads=cfg.TRAIN.NUM_THREADS,
)
random_scales = cfg.TRAIN.RANDOM_SCALES
if random_scales[1] > 1:
raise ValueError('The max scale range should be <= 1.')
mean_values = np.array(cfg.PIXEL_MEANS, 'int64').tolist()
self.max_size = cfg.TRAIN.MAX_SIZE
self.reader = DataReader(
path=source,
features=['image', 'shape', 'bbox/data', 'bbox/label'],
pipeline=self,
shard_id=dali.get_distributed_info()[0],
num_shards=dali.get_distributed_info()[1],
shuffle_after_epoch=cfg.TRAIN.USE_SHUFFLE,
shuffle_chunks=cfg.TRAIN.SHUFFLE_CHUNKS,
aspect_grouping=True,
)
self.decode = dali.ops.ImageDecoder()
self.resize = dali.ops.Resize(max_size=self.max_size)
self.brightness_contrast = dali.ops.BrightnessContrast()
self.hsv = dali.ops.Hsv()
self.cmn = dali.ops.CropMirrorNormalize(
mean=np.array(mean_values, 'int64').tolist(),
std=[1., 1., 1.],
)
self.pad = dali.ops.Pad(
axes=[1, 2],
align=cfg.MODEL.COARSEST_STRIDE
if cfg.MODEL.COARSEST_STRIDE > 0 else None,
)
with dali.device('cpu'):
self.resize_rng = dali.ops.Uniform([
cfg.TRAIN.SCALES[0] * random_scales[0],
cfg.TRAIN.SCALES[0] * random_scales[1],
])
self.twist_rng = dali.ops.Uniform([0.6, 1.4])
self.flip_rng = dali.ops.CoinFlip(0.5 if cfg.TRAIN.USE_FLIPPED else 0.)
def iter_setup(self):
self.reader.feed_inputs()
def define_graph(self):
# Read inputs from file
inputs = self.reader()
shape = inputs['shape']
bbox = inputs['bbox/data']
label = inputs['bbox/label']
# Decode image
image = self.decode(inputs['image'])
# Augment the color space
if cfg.TRAIN.USE_COLOR_JITTER:
image = self.hsv(
self.brightness_contrast(
image,
brightness=self.twist_rng(),
contrast=self.twist_rng(),
),
saturation=self.twist_rng()
)
# Resize to the target size
target_size = self.resize_rng()
image = self.resize(image, resize_shorter=target_size)
# Normalize and pad to blob shape
apply_flip = self.flip_rng()
image = self.cmn(image, mirror=apply_flip)
image = self.pad(image)
return image, bbox, label, target_size, shape, apply_flip
class Iterator(DALIGenericIterator):
def __init__(self, pipeline):
super(Iterator, self).__init__(pipeline)
@property
def handlers(self):
return ([0], self.copy_handler,), \
([1, 2, 3, 4, 5], self.gt_handler)
def next(self):
(images,), (gt_boxes, ims_info) = self.__next__()
return {'data': images, 'gt_boxes': gt_boxes, 'ims_info': ims_info}
def gt_handler(self, tensors):
def impl(box_list, labels, im_shape, target_size, max_size, flip):
num_images = len(box_list)
im_size_min = np.min(im_shape[:, :2], axis=1).astype('float32')
im_size_max = np.max(im_shape[:, :2], axis=1).astype('float32')
im_scales = target_size / im_size_min
inds = np.where(np.round(im_scales * im_size_max) > max_size)[0]
im_scales[inds] = max_size / im_size_max[inds]
box_list = [box_list[i] * im_scales[i] for i in range(num_images)]
for i in (np.where(flip > 0)[0]):
boxes = box_list[i]
boxes_flipped = box_list[i].copy()
width = im_shape[i, 1] * im_scales[i]
boxes_flipped[:, 0] = width - boxes[:, 2] - 1
boxes_flipped[:, 2] = width - boxes[:, 0] - 1
box_list[i] = boxes_flipped
im_scales = np.expand_dims(im_scales, 1)
batch_inds = [np.ones([e.size, 1]) * i for i, e in enumerate(labels)]
boxes = np.concatenate(box_list)
labels = np.expand_dims(np.concatenate(labels), axis=1)
batch_inds = np.concatenate(batch_inds)
gt_boxes = np.hstack([boxes, labels, batch_inds])
ims_info = np.hstack([im_shape[:, :2] * im_scales, im_scales])
return gt_boxes.astype('float32'), ims_info.astype('float32')
bbox, label, target_size, shape, flip = tensors
shape = shape.as_array()
return impl(
box_list=[bbox.at(i) for i in range(len(shape))],
labels=[label.at(i) for i in range(len(shape))],
im_shape=shape,
target_size=target_size.as_array().squeeze(),
max_size=self._pipe.max_size,
flip=flip.as_array()
)
def new_iterator(source):
with dali.device('cuda', cfg.GPU_ID):
return Iterator(Pipeline(source))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from dragon.vm import dali
from dragon.vm.dali.plugin.pytorch import DALIGenericIterator
from seetadet.core.config import cfg
from seetadet.dali.data_reader import DataReader
class Pipeline(dali.Pipeline):
def __init__(self, source):
super(Pipeline, self).__init__(
batch_size=cfg.TRAIN.IMS_PER_BATCH,
num_threads=cfg.TRAIN.NUM_THREADS,
)
paste_ratio = 1. / cfg.TRAIN.RANDOM_SCALES[0]
mean_values = np.array(cfg.PIXEL_MEANS, 'int64').tolist()
self.target_size = cfg.TRAIN.SCALES[0]
self.reader = DataReader(
path=source,
features=['image', 'bbox/ratio', 'bbox/label'],
pipeline=self,
shard_id=dali.get_distributed_info()[0],
num_shards=dali.get_distributed_info()[1],
shuffle_after_epoch=cfg.TRAIN.USE_SHUFFLE,
shuffle_chunks=cfg.TRAIN.SHUFFLE_CHUNKS,
)
self.decode = dali.ops.ImageDecoder()
self.brightness_contrast = dali.ops.BrightnessContrast()
self.hsv = dali.ops.Hsv()
self.paste = dali.ops.Paste(fill_value=mean_values)
self.slice = dali.ops.Slice()
self.resize = dali.ops.Resize(self.target_size, self.target_size)
self.cmn = dali.ops.CropMirrorNormalize(mean=mean_values, std=[1., 1., 1.])
with dali.device('cpu'):
self.bbox_paste = dali.ops.BBoxPaste()
self.bbox_crop = dali.ops.RandomBBoxCrop()
self.bbox_flip = dali.ops.BbFlip()
self.twist_rng = dali.ops.Uniform([0.6, 1.4])
self.paste_pos = dali.ops.Uniform((0., 1.))
self.paste_ratio = dali.ops.Uniform((0., paste_ratio - 1))
self.flip_rng = dali.ops.CoinFlip(0.5 if cfg.TRAIN.USE_FLIPPED else 0.)
def iter_setup(self):
self.reader.feed_inputs()
def define_graph(self):
# Read inputs from file
inputs = self.reader()
bbox = inputs['bbox/ratio']
label = inputs['bbox/label']
# Decode image
image = self.decode(inputs['image'])
# Augment the color space
image = self.hsv(
self.brightness_contrast(
image,
brightness=self.twist_rng(),
contrast=self.twist_rng(),
), saturation=self.twist_rng()
)
# Expand randomly to get smaller objects
pr = self.paste_ratio() * self.flip_rng() + 1.
px, py = self.paste_pos(), self.paste_pos()
image = self.paste(image, paste_x=px, paste_y=py, ratio=pr)
bbox = self.bbox_paste(bbox, paste_x=px, paste_y=py, ratio=pr)
# Sample RoIs with IoU constraint
crop_begin, crop_size, bbox, label = self.bbox_crop(bbox, label)
image = self.slice(image, crop_begin, crop_size)
# Resize image to a fixed size
image = self.resize(image)
# Normalize
apply_flip = self.flip_rng()
image = self.cmn(image, mirror=apply_flip)
bbox = self.bbox_flip(bbox, horizontal=apply_flip)
return image, bbox, label
class Iterator(DALIGenericIterator):
def __init__(self, pipeline):
super(Iterator, self).__init__(pipeline)
@property
def handlers(self):
return ([0], self.copy_handler,), ([1, 2], self.gt_handler)
def next(self):
(images,), gt_boxes = self.__next__()
return {'data': images, 'gt_boxes': gt_boxes}
def gt_handler(self, tensors):
bbox, label = tensors
num_images = self._pipe.batch_size
boxes = np.concatenate([bbox.at(i) for i in range(num_images)])
boxes[:, 0::2] *= self._pipe.target_size
boxes[:, 1::2] *= self._pipe.target_size
labels = [label.at(i) for i in range(num_images)]
batch_inds = [np.ones_like(e) * i for i, e in enumerate(labels)]
labels, batch_inds = np.concatenate(labels), np.concatenate(batch_inds)
return np.hstack([boxes, labels, batch_inds])
def new_iterator(source):
with dali.device('cuda', cfg.GPU_ID):
return Iterator(Pipeline(source))
...@@ -19,11 +19,11 @@ import sys ...@@ -19,11 +19,11 @@ import sys
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.pycocotools import mask as mask_tools from seetadet.pycocotools import mask as mask_tools
from lib.pycocotools.coco import COCO from seetadet.pycocotools.coco import COCO
from lib.pycocotools.cocoeval import COCOeval from seetadet.pycocotools.cocoeval import COCOeval
from lib.utils import mask as mask_util from seetadet.utils import mask as mask_util
class COCOEvaluator(object): class COCOEvaluator(object):
......
...@@ -20,12 +20,14 @@ from __future__ import print_function ...@@ -20,12 +20,14 @@ from __future__ import print_function
import os import os
import uuid import uuid
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.datasets.coco_evaluator import COCOEvaluator from seetadet.datasets.coco_evaluator import COCOEvaluator
from lib.datasets.voc_evaluator import VOCEvaluator from seetadet.datasets.voc_evaluator import VOCEvaluator
class imdb(object): class Dataset(object):
"""The base dataset class."""
def __init__(self, source): def __init__(self, source):
self._source = source self._source = source
self._num_images = 0 self._num_images = 0
...@@ -51,6 +53,10 @@ class imdb(object): ...@@ -51,6 +53,10 @@ class imdb(object):
return self._class_to_ind return self._class_to_ind
@property @property
def cls(self):
return type(self)
@property
def comp_id(self): def comp_id(self):
return '_' + self._salt if self.config['use_salt'] else '' return '_' + self._salt if self.config['use_salt'] else ''
......
...@@ -16,7 +16,7 @@ from __future__ import print_function ...@@ -16,7 +16,7 @@ from __future__ import print_function
import cv2 import cv2
import numpy as np import numpy as np
from lib.pycocotools import mask_utils from seetadet.pycocotools import mask_utils
class Example(object): class Example(object):
......
...@@ -18,27 +18,29 @@ from __future__ import division ...@@ -18,27 +18,29 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import os
from lib.datasets.taas import TaaS from seetadet.datasets import kpl_record
# TaaS DataSet def get_dataset(name):
_GLOBAL_DATA_SETS = {'taas': lambda source: TaaS(source)} """Get a dataset by name."""
keys = name.split('://')
def get_imdb(name):
"""Get an imdb (image database) by name."""
keys = name.split(':')
if len(keys) >= 2: if len(keys) >= 2:
cls, source = keys[0], ':'.join(keys[1:]) cls, source = keys
if cls not in _GLOBAL_DATA_SETS: if cls not in _GLOBAL_REGISTERED_DATASET:
raise KeyError('Unknown DataSet: {}'.format(cls)) raise KeyError('Unknown dataset:', cls)
return _GLOBAL_DATA_SETS[cls](source) return _GLOBAL_REGISTERED_DATASET[cls](source)
elif os.path.exists(name): elif os.path.exists(name):
return _GLOBAL_DATA_SETS['taas'](name) return _GLOBAL_REGISTERED_DATASET['default'](name)
else: else:
raise ValueError('Illegal Database: {}' + name) raise ValueError('Illegal dataset:', name)
def list_dataset():
"""List all registered dataset."""
return _GLOBAL_REGISTERED_DATASET.keys()
def list_imdbs(): _GLOBAL_REGISTERED_DATASET = {
"""List all registered imdbs.""" 'default': lambda source:
return _GLOBAL_DATA_SETS.keys() kpl_record.KPLRecordDataset(source),
}
...@@ -21,23 +21,26 @@ import os ...@@ -21,23 +21,26 @@ import os
import dragon import dragon
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.datasets.imdb import imdb from seetadet.datasets.dataset import Dataset
class TaaS(imdb): class KPLRecordDataset(Dataset):
def __init__(self, source): def __init__(self, source):
imdb.__init__(self, source) super(KPLRecordDataset, self).__init__(source)
self._dataset = dragon.io.SeetaRecordDataset self._num_images = self.cls(self.source).size
self._num_images = self._dataset(self.source).size
@property
def cls(self):
return dragon.io.KPLRecordDataset
def dump_detections(self, all_boxes, output_dir): def dump_detections(self, all_boxes, output_dir):
dataset = self._dataset(self.source) dataset = self.cls(self.source)
for file in ('data.data', 'data.index', 'data.meta'): for file in ('data.data', 'data.index', 'data.meta'):
file = os.path.join(output_dir, file) file = os.path.join(output_dir, file)
if os.path.exists(file): if os.path.exists(file):
os.remove(file) os.remove(file)
writer = dragon.io.SeetaRecordWriter(output_dir, dataset.protocol) writer = dragon.io.KPLRecordWriter(output_dir, dataset.protocol)
for i in range(len(dataset)): for i in range(len(dataset)):
example = dataset.get() example = dataset.get()
example['object'] = [] example['object'] = []
......
...@@ -20,11 +20,11 @@ from __future__ import print_function ...@@ -20,11 +20,11 @@ from __future__ import print_function
import cv2 import cv2
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.pycocotools import mask_utils from seetadet.pycocotools import mask_utils
from lib.utils import boxes as box_util from seetadet.utils import boxes as box_util
from lib.utils.framework import pickle from seetadet.utils.env import pickle
from lib.utils.mask import mask_overlap from seetadet.utils.mask import mask_overlap
def voc_ap(rec, prec, use_07_metric=False): def voc_ap(rec, prec, use_07_metric=False):
......
...@@ -16,8 +16,8 @@ from __future__ import print_function ...@@ -16,8 +16,8 @@ from __future__ import print_function
import os import os
import numpy as np import numpy as np
from lib.datasets import voc_eval from seetadet.datasets import voc_eval
from lib.utils.framework import pickle from seetadet.utils.env import pickle
class VOCEvaluator(object): class VOCEvaluator(object):
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Backbones
import seetadet.modeling.airnet
import seetadet.modeling.mobilenet
import seetadet.modeling.resnet
import seetadet.modeling.vgg
# Custom modules
from seetadet.modeling.fast_rcnn import FastRCNN
from seetadet.modeling.fpn import FPN
from seetadet.modeling.mask_rcnn import MaskRCNN
from seetadet.modeling.retinanet import RetinaNet
from seetadet.modeling.rpn import RPN
from seetadet.modeling.ssd import SSD
...@@ -15,17 +15,18 @@ from __future__ import print_function ...@@ -15,17 +15,18 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.modules import init from seetadet.core.registry import backbones
from lib.modules import nn from seetadet.modules import init
from seetadet.modules import nn
class WideResBlock(nn.Module): class WideResBlock(nn.Module):
def __init__(self, dim_in, dim_out, stride=1, downsample=None): def __init__(self, dim_in, dim_out, stride=1, downsample=None):
super(WideResBlock, self).__init__() super(WideResBlock, self).__init__()
self.conv1 = nn.Conv3x3(dim_in, dim_out, stride) self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
self.bn1 = nn.Affine(dim_out) self.bn1 = nn.FrozenAffine(dim_out)
self.conv2 = nn.Conv3x3(dim_out, dim_out) self.conv2 = nn.Conv3x3(dim_out, dim_out)
self.bn2 = nn.Affine(dim_out) self.bn2 = nn.FrozenAffine(dim_out)
self.downsample = downsample self.downsample = downsample
self.relu = nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
...@@ -51,15 +52,15 @@ class InceptionBlock(nn.Module): ...@@ -51,15 +52,15 @@ class InceptionBlock(nn.Module):
def __init__(self, dim_in, dim_out): def __init__(self, dim_in, dim_out):
super(InceptionBlock, self).__init__() super(InceptionBlock, self).__init__()
self.conv1 = nn.Conv1x1(dim_in, dim_out) self.conv1 = nn.Conv1x1(dim_in, dim_out)
self.bn1 = nn.Affine(dim_out) self.bn1 = nn.FrozenAffine(dim_out)
self.conv2 = nn.Conv3x3(dim_out, dim_out // 2) self.conv2 = nn.Conv3x3(dim_out, dim_out // 2)
self.bn2 = nn.Affine(dim_out // 2) self.bn2 = nn.FrozenAffine(dim_out // 2)
self.conv3a = nn.Conv3x3(dim_out // 2, dim_out) self.conv3a = nn.Conv3x3(dim_out // 2, dim_out)
self.bn3a = nn.Affine(dim_out) self.bn3a = nn.FrozenAffine(dim_out)
self.conv3b = nn.Conv3x3(dim_out, dim_out) self.conv3b = nn.Conv3x3(dim_out, dim_out)
self.bn3b = nn.Affine(dim_out) self.bn3b = nn.FrozenAffine(dim_out)
self.conv4 = nn.Conv3x3(dim_out * 3, dim_out) self.conv4 = nn.Conv3x3(dim_out * 3, dim_out)
self.bn4 = nn.Affine(dim_out) self.bn4 = nn.FrozenAffine(dim_out)
self.relu = nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
def forward(self, x): def forward(self, x):
...@@ -103,7 +104,7 @@ class AirNet(nn.Module): ...@@ -103,7 +104,7 @@ class AirNet(nn.Module):
padding=3, padding=3,
bias=False, bias=False,
) )
self.bn1 = nn.Affine(self.dim_in) self.bn1 = nn.FrozenAffine(self.dim_in)
self.relu = nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d( self.maxpool = nn.MaxPool2d(
kernel_size=2, kernel_size=2,
...@@ -127,7 +128,7 @@ class AirNet(nn.Module): ...@@ -127,7 +128,7 @@ class AirNet(nn.Module):
def make_blocks(self, dim_out, blocks, stride=1): def make_blocks(self, dim_out, blocks, stride=1):
downsample = nn.Sequential( downsample = nn.Sequential(
nn.Conv1x1(self.dim_in, dim_out, stride=stride), nn.Conv1x1(self.dim_in, dim_out, stride=stride),
nn.Affine(dim_out), nn.FrozenAffine(dim_out),
) )
layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)] layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)]
self.dim_in = dim_out self.dim_in = dim_out
...@@ -164,13 +165,7 @@ def airnet(num_stages): ...@@ -164,13 +165,7 @@ def airnet(num_stages):
return AirNet(blocks, num_stages) return AirNet(blocks, num_stages)
def make_airnet_(): return airnet(5) backbones.register('airnet', func=airnet, num_stages=5)
backbones.register('airnet_3b', func=airnet, num_stages=3)
backbones.register('airnet_4b', func=airnet, num_stages=4)
def make_airnet_3b(): return airnet(3) backbones.register('airnet_5b', func=airnet, num_stages=5)
def make_airnet_4b(): return airnet(4)
def make_airnet_5b(): return airnet(5)
...@@ -17,17 +17,12 @@ import collections ...@@ -17,17 +17,12 @@ import collections
import importlib import importlib
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from seetadet import modeling as models
from lib.modeling import FPN from seetadet.core.config import cfg
from lib.modeling import RPN from seetadet.core.registry import backbones
from lib.modeling import FastRCNN from seetadet.modules import nn
from lib.modeling import MaskRCNN from seetadet.modules import vision
from lib.modeling import RetinaNet from seetadet.utils import logger
from lib.modeling import SSD
from lib.modeling.factory import get_body_func
from lib.modules import nn
from lib.modules import vision
from lib.utils import logger
class Detector(nn.Module): class Detector(nn.Module):
...@@ -46,18 +41,17 @@ class Detector(nn.Module): ...@@ -46,18 +41,17 @@ class Detector(nn.Module):
# + DataLoader # + DataLoader
self.data_loader_cls = importlib.import_module( self.data_loader_cls = importlib.import_module(
'lib.{}'.format(model)).DataLoader 'seetadet.algo.{}'.format(model)).DataLoader
self.bootstrap = vision.Bootstrap() self.bootstrap = vision.Bootstrap()
# + FeatureExtractor # + FeatureExtractor
self.body = get_body_func(body)() self.body = backbones.get(body)()
feature_dims = self.body.feature_dims feature_dims = self.body.feature_dims
# + FeatureEnhancer # + FeatureEnhancer
if 'fpn' in modules: if 'fpn' in modules:
self.fpn = FPN(feature_dims) self.fpn = models.FPN(feature_dims)
feature_dims = self.fpn.feature_dims feature_dims = self.fpn.feature_dims
elif 'mbox' in modules: elif 'mbox' in modules:
pass # Placeholder pass # Placeholder
else: else:
...@@ -65,17 +59,17 @@ class Detector(nn.Module): ...@@ -65,17 +59,17 @@ class Detector(nn.Module):
# + Detection Modules # + Detection Modules
if 'rcnn' in model: if 'rcnn' in model:
self.rpn = RPN(feature_dims[0]) self.rpn = models.RPN(feature_dims[0])
if 'faster' in model: if 'faster' in model:
self.rcnn = FastRCNN(feature_dims[0]) self.rcnn = models.FastRCNN(feature_dims[0])
elif 'mask' in model: elif 'mask' in model:
self.rcnn = MaskRCNN(feature_dims[0]) self.rcnn = models.MaskRCNN(feature_dims[0])
if 'retinanet' in model: if 'retinanet' in model:
self.retinanet = RetinaNet(feature_dims[0]) self.retinanet = models.RetinaNet(feature_dims[0])
if 'ssd' in model: if 'ssd' in model:
self.ssd = SSD(feature_dims) self.ssd = models.SSD(feature_dims)
def load_weights(self, weights): def load_weights(self, weights):
"""Load the state dict of this detector. """Load the state dict of this detector.
...@@ -171,13 +165,11 @@ class Detector(nn.Module): ...@@ -171,13 +165,11 @@ class Detector(nn.Module):
return outputs return outputs
def optimize_for_inference(self): def optimize_for_inference(self):
"""Optimize the graph for the inference. """Optimize the graph for the inference."""
It usually involves the removing of BN or Affine. ###################################
""" # Merge Affine into Convolution #
################################## ###################################
# Merge Affine into Convolution #
##################################
last_module = None last_module = None
for e in self.modules(): for e in self.modules():
if isinstance(e, nn.Affine) and \ if isinstance(e, nn.Affine) and \
...@@ -195,7 +187,7 @@ class Detector(nn.Module): ...@@ -195,7 +187,7 @@ class Detector(nn.Module):
last_module = None last_module = None
for e in self.modules(): for e in self.modules():
if isinstance(e, nn.BatchNorm2d) and \ if isinstance(e, nn.BatchNorm2d) and \
nn.is_conv2d(last_module): isinstance(last_module, nn.Conv2d):
if last_module.bias is None: if last_module.bias is None:
delattr(last_module, 'bias') delattr(last_module, 'bias')
e.forward = lambda x: x e.forward = lambda x: x
......
...@@ -18,12 +18,12 @@ import functools ...@@ -18,12 +18,12 @@ import functools
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib import faster_rcnn from seetadet.algo import faster_rcnn
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.modules import det from seetadet.modules import det
from lib.modules import init from seetadet.modules import init
from lib.modules import nn from seetadet.modules import nn
from lib.modules import vision from seetadet.modules import vision
class FastRCNN(nn.Module): class FastRCNN(nn.Module):
...@@ -54,7 +54,11 @@ class FastRCNN(nn.Module): ...@@ -54,7 +54,11 @@ class FastRCNN(nn.Module):
'RoIAlign': vision.roi_align 'RoIAlign': vision.roi_align
}[cfg.FRCNN.ROI_XFORM_METHOD], size=cfg.FRCNN.ROI_XFORM_RESOLUTION) }[cfg.FRCNN.ROI_XFORM_METHOD], size=cfg.FRCNN.ROI_XFORM_RESOLUTION)
self.cls_loss = nn.CrossEntropyLoss() self.cls_loss = nn.CrossEntropyLoss()
self.bbox_loss = nn.SmoothL1Loss() if 'IOU' in cfg.MODEL.REG_LOSS_TYPE.upper():
self.bbox_loss = nn.IoULoss(
delta_weights=cfg.BBOX_REG_WEIGHTS)
else:
self.bbox_loss = nn.SmoothL1Loss(reduction='sum')
# Compute spatial scales according to strides # Compute spatial scales according to strides
self.spatial_scales = [ self.spatial_scales = [
1. / (2 ** lvl) 1. / (2 ** lvl)
...@@ -124,15 +128,22 @@ class FastRCNN(nn.Module): ...@@ -124,15 +128,22 @@ class FastRCNN(nn.Module):
if self.training: if self.training:
# Compute rcnn losses # Compute rcnn losses
bbox_pred = outputs['bbox_pred'].view(0, -1, 4) \
.index_select((0, 1), self.data['bbox_indices'])
bbox_loss_weight = \
cfg.MODEL.REG_LOSS_WEIGHT / (
roi_features.shape[0] if isinstance(
self.bbox_loss, nn.SmoothL1Loss
) else 1.
)
outputs.update(collections.OrderedDict([ outputs.update(collections.OrderedDict([
('cls_loss', self.cls_loss( ('cls_loss', self.cls_loss(
cls_score, self.data['labels'])), cls_score, self.data['labels'])),
('bbox_loss', self.bbox_loss( ('bbox_loss', self.bbox_loss(
outputs['bbox_pred'], bbox_pred,
self.data['bbox_targets'], self.data['bbox_targets'],
self.data['bbox_inside_weights'], self.data['bbox_anchors'],
self.data['bbox_outside_weights'], ) * bbox_loss_weight),
)),
])) ]))
else: else:
# Return the rois to decode the refine boxes # Return the rois to decode the refine boxes
......
...@@ -13,11 +13,11 @@ from __future__ import absolute_import ...@@ -13,11 +13,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch from dragon.vm.torch.nn import functional as nn_funcs
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.modules import init from seetadet.modules import init
from lib.modules import nn from seetadet.modules import nn
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
...@@ -36,7 +36,7 @@ class FPN(nn.Module): ...@@ -36,7 +36,7 @@ class FPN(nn.Module):
self.P.append(nn.Conv3x3(dim, dim, bias=True)) self.P.append(nn.Conv3x3(dim, dim, bias=True))
if 'rcnn' in cfg.MODEL.TYPE: if 'rcnn' in cfg.MODEL.TYPE:
self.apply_func = self.apply_on_rcnn self.apply_func = self.apply_on_rcnn
self.maxpool = nn.MaxPool2d(1, 2, ceil_mode=True) self.maxpool = nn.MaxPool2d(kernel_size=1, stride=2)
else: else:
self.apply_func = self.apply_on_generic self.apply_func = self.apply_on_generic
self.relu = nn.ReLU(inplace=False) self.relu = nn.ReLU(inplace=False)
...@@ -44,6 +44,7 @@ class FPN(nn.Module): ...@@ -44,6 +44,7 @@ class FPN(nn.Module):
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else dim dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else dim
self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True)) self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True))
self.feature_dims = [dim] self.feature_dims = [dim]
self.coarsest_stride = cfg.MODEL.COARSEST_STRIDE
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -56,14 +57,18 @@ class FPN(nn.Module): ...@@ -56,14 +57,18 @@ class FPN(nn.Module):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)] outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
# Apply MaxPool for higher features # Apply max pool for higher features
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1): for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
outputs.append(self.maxpool(outputs[-1])) outputs.append(self.maxpool(outputs[-1]))
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL] # Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1): for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1]) lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.vision.ops.nn_resize( if self.coarsest_stride > 0:
fpn_input, dsize=None, fx=2., fy=2.) upscale_output = nn_funcs.upsample(
fpn_input, scale_factor=2)
else:
upscale_output = nn_funcs.upsample(
fpn_input, size=lateral_output.shape[2:])
fpn_input = lateral_output.__iadd__(upscale_output) fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input)) outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs return outputs
...@@ -78,11 +83,15 @@ class FPN(nn.Module): ...@@ -78,11 +83,15 @@ class FPN(nn.Module):
outputs.append(self.P[i - min_lvl](extra_input)) outputs.append(self.P[i - min_lvl](extra_input))
if i != max_lvl: if i != max_lvl:
extra_input = self.relu(outputs[-1]) extra_input = self.relu(outputs[-1])
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL] # Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1): for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1]) lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.vision.ops.nn_resize( if self.coarsest_stride > 0:
fpn_input, dsize=None, fx=2., fy=2.) upscale_output = nn_funcs.upsample(
fpn_input, scale_factor=2)
else:
upscale_output = nn_funcs.upsample(
fpn_input, size=lateral_output.shape[2:])
fpn_input = lateral_output.__iadd__(upscale_output) fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input)) outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs return outputs
......
...@@ -18,12 +18,12 @@ import functools ...@@ -18,12 +18,12 @@ import functools
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib import mask_rcnn from seetadet.algo import mask_rcnn
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.modules import det from seetadet.modules import det
from lib.modules import init from seetadet.modules import init
from lib.modules import nn from seetadet.modules import nn
from lib.modules import vision from seetadet.modules import vision
class MaskRCNN(nn.Module): class MaskRCNN(nn.Module):
...@@ -65,7 +65,7 @@ class MaskRCNN(nn.Module): ...@@ -65,7 +65,7 @@ class MaskRCNN(nn.Module):
'RoIAlign': vision.roi_align, 'RoIAlign': vision.roi_align,
}[cfg.MRCNN.ROI_XFORM_METHOD], size=cfg.MRCNN.ROI_XFORM_RESOLUTION) }[cfg.MRCNN.ROI_XFORM_METHOD], size=cfg.MRCNN.ROI_XFORM_RESOLUTION)
self.cls_loss = nn.CrossEntropyLoss() self.cls_loss = nn.CrossEntropyLoss()
self.bbox_loss = nn.SmoothL1Loss() self.bbox_loss = nn.SmoothL1Loss(reduction='sum')
self.mask_loss = nn.BCEWithLogitsLoss() self.mask_loss = nn.BCEWithLogitsLoss()
# Compute spatial scales according to strides # Compute spatial scales according to strides
self.spatial_scales = [ self.spatial_scales = [
...@@ -146,15 +146,14 @@ class MaskRCNN(nn.Module): ...@@ -146,15 +146,14 @@ class MaskRCNN(nn.Module):
if self.training: if self.training:
# Compute the loss of bbox branch # Compute the loss of bbox branch
bbox_pred = outputs['bbox_pred'].view(0, -1, 4) \
.index_select((0, 1), self.data['bbox_indices'])
outputs.update(collections.OrderedDict([ outputs.update(collections.OrderedDict([
('cls_loss', self.cls_loss( ('cls_loss', self.cls_loss(
cls_score, self.data['labels'])), cls_score, self.data['labels'])),
('bbox_loss', self.bbox_loss( ('bbox_loss', self.bbox_loss(
outputs['bbox_pred'], bbox_pred, self.data['bbox_targets'],
self.data['bbox_targets'], ) / roi_features.shape[0]),
self.data['bbox_inside_weights'],
self.data['bbox_outside_weights'],
)),
])) ]))
# Compute the loss of mask branch # Compute the loss of mask branch
mask_score = self.get_mask_score( mask_score = self.get_mask_score(
...@@ -171,7 +170,7 @@ class MaskRCNN(nn.Module): ...@@ -171,7 +170,7 @@ class MaskRCNN(nn.Module):
outputs['rois'] = self.data['rois'][0] outputs['rois'] = self.data['rois'][0]
# Return the classification prob # Return the classification prob
outputs['cls_prob'] = self.softmax(cls_score) outputs['cls_prob'] = self.softmax(cls_score)
# Set a callback to decode mask from refine RoIs # Set a callback to decode mask from refined RoIs
self.compute_mask_score = \ self.compute_mask_score = \
functools.partial( functools.partial(
self.get_mask_score, self.get_mask_score,
......
...@@ -17,17 +17,18 @@ import functools ...@@ -17,17 +17,18 @@ import functools
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.modules import init from seetadet.core.registry import backbones
from lib.modules import nn from seetadet.modules import init
from lib.modules import vision from seetadet.modules import nn
from seetadet.modules import vision
def conv_triplet(dim_in, dim_out): def conv_triplet(dim_in, dim_out):
"""1x1 convolution + BN + ReLU.""" """1x1 convolution + BN + ReLU."""
return [ return [
nn.Conv2d(dim_in, dim_out, 1, bias=False), nn.Conv2d(dim_in, dim_out, 1, bias=False),
nn.Affine(dim_out), nn.FrozenAffine(dim_out),
nn.ReLU(True), nn.ReLU(True),
] ]
...@@ -42,10 +43,10 @@ def conv_quintet(dim_in, dim_out, ks, stride): ...@@ -42,10 +43,10 @@ def conv_quintet(dim_in, dim_out, ks, stride):
padding=ks // 2, padding=ks // 2,
bias=False, bias=False,
), ),
nn.Affine(dim_in), nn.FrozenAffine(dim_in),
nn.ReLU(True), nn.ReLU(True),
nn.Conv1x1(dim_in, dim_out), nn.Conv1x1(dim_in, dim_out),
nn.Affine(dim_out), nn.FrozenAffine(dim_out),
] ]
...@@ -76,7 +77,7 @@ def Stem(dim_out, stride=1): ...@@ -76,7 +77,7 @@ def Stem(dim_out, stride=1):
padding=1, padding=1,
bias=False, bias=False,
), ),
nn.Affine(dim_out), nn.FrozenAffine(dim_out),
nn.ReLU(True), nn.ReLU(True),
) )
...@@ -197,7 +198,8 @@ class NASMobileNet(nn.Module): ...@@ -197,7 +198,8 @@ class NASMobileNet(nn.Module):
return outputs return outputs
def make_mobilenet_a1(): @backbones.register('mobilenet_a1')
def mobilenet_a1():
return NASMobileNet([ return NASMobileNet([
4, 6, 6, 6, 4, 6, 6, 6,
3, 3, 4, 6, 3, 3, 4, 6,
...@@ -207,7 +209,8 @@ def make_mobilenet_a1(): ...@@ -207,7 +209,8 @@ def make_mobilenet_a1():
], Setting.PROXYLESS_MOBILE) ], Setting.PROXYLESS_MOBILE)
def make_mobilenet_v2(): @backbones.register('mobilenet_v2')
def mobilenet_v2():
return NASMobileNet([ return NASMobileNet([
1, 1, 1, 1,
1, 1, 1, 1, 1, 1,
......
...@@ -19,9 +19,10 @@ from __future__ import print_function ...@@ -19,9 +19,10 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.modules import nn from seetadet.core.registry import backbones
from lib.modules import init from seetadet.modules import nn
from seetadet.modules import init
class BasicBlock(nn.Module): class BasicBlock(nn.Module):
...@@ -35,10 +36,10 @@ class BasicBlock(nn.Module): ...@@ -35,10 +36,10 @@ class BasicBlock(nn.Module):
): ):
super(BasicBlock, self).__init__() super(BasicBlock, self).__init__()
self.conv1 = nn.Conv3x3(dim_in, dim_out, stride) self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
self.bn1 = nn.Affine(dim_out) self.bn1 = nn.FrozenAffine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.conv2 = nn.Conv3x3(dim_out, dim_out) self.conv2 = nn.Conv3x3(dim_out, dim_out)
self.bn2 = nn.Affine(dim_out) self.bn2 = nn.FrozenAffine(dim_out)
self.downsample = downsample self.downsample = downsample
self.dropblock = dropblock self.dropblock = dropblock
...@@ -83,11 +84,11 @@ class Bottleneck(torch.nn.Module): ...@@ -83,11 +84,11 @@ class Bottleneck(torch.nn.Module):
super(Bottleneck, self).__init__() super(Bottleneck, self).__init__()
dim = int(dim_out * self.contraction) dim = int(dim_out * self.contraction)
self.conv1 = nn.Conv1x1(dim_in, dim) self.conv1 = nn.Conv1x1(dim_in, dim)
self.bn1 = nn.Affine(dim) self.bn1 = nn.FrozenAffine(dim)
self.conv2 = nn.Conv3x3(dim, dim, stride=stride) self.conv2 = nn.Conv3x3(dim, dim, stride=stride)
self.bn2 = nn.Affine(dim) self.bn2 = nn.FrozenAffine(dim)
self.conv3 = nn.Conv1x1(dim, dim_out) self.conv3 = nn.Conv1x1(dim, dim_out)
self.bn3 = nn.Affine(dim_out) self.bn3 = nn.FrozenAffine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.downsample = downsample self.downsample = downsample
self.dropblock = dropblock self.dropblock = dropblock
...@@ -132,7 +133,7 @@ class ResNet(torch.nn.Module): ...@@ -132,7 +133,7 @@ class ResNet(torch.nn.Module):
padding=3, padding=3,
bias=False, bias=False,
) )
self.bn1 = nn.Affine(self.dim_in) self.bn1 = nn.FrozenAffine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d( self.maxpool = torch.nn.MaxPool2d(
kernel_size=3, kernel_size=3,
...@@ -181,7 +182,7 @@ class ResNet(torch.nn.Module): ...@@ -181,7 +182,7 @@ class ResNet(torch.nn.Module):
if stride != 1 or self.dim_in != dim_out: if stride != 1 or self.dim_in != dim_out:
downsample = nn.Sequential( downsample = nn.Sequential(
nn.Conv1x1(self.dim_in, dim_out, stride=stride), nn.Conv1x1(self.dim_in, dim_out, stride=stride),
nn.Affine(dim_out), nn.FrozenAffine(dim_out),
) )
layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)] layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)]
self.dim_in = dim_out self.dim_in = dim_out
...@@ -194,11 +195,17 @@ class ResNet(torch.nn.Module): ...@@ -194,11 +195,17 @@ class ResNet(torch.nn.Module):
x = self.bn1(x) x = self.bn1(x)
x = self.relu(x) x = self.relu(x)
x = self.maxpool(x) x = self.maxpool(x)
outputs = [x] outputs = [x]
outputs += [self.layer1(outputs[-1])] outputs += [self.layer1(outputs[-1])]
outputs += [self.layer2(outputs[-1])] outputs += [self.layer2(outputs[-1])]
outputs += [self.layer3(outputs[-1])] outputs += [self.layer3(outputs[-1])]
outputs += [self.layer4(outputs[-1])] outputs += [self.layer4(outputs[-1])]
if self.training:
# Hold the frozen outputs if necessary
self.last_outputs = outputs
return outputs return outputs
...@@ -225,16 +232,8 @@ def resnet(depth): ...@@ -225,16 +232,8 @@ def resnet(depth):
return ResNet(block, units, filters) return ResNet(block, units, filters)
def make_resnet_18(): return resnet(18) backbones.register(['res18', 'resnet18', 'resnet_18'], func=resnet, depth=18)
backbones.register(['res34', 'resnet34', 'resnet_34'], func=resnet, depth=34)
backbones.register(['res50', 'resnet50', 'resnet_50'], func=resnet, depth=50)
def make_resnet_34(): return resnet(34) backbones.register(['res101', 'resnet101', 'resnet_101'], func=resnet, depth=101)
backbones.register(['res152', 'resnet152', 'resnet_152'], func=resnet, depth=152)
def make_resnet_50(): return resnet(50)
def make_resnet_101(): return resnet(101)
def make_resnet_152(): return resnet(152)
...@@ -17,11 +17,11 @@ import collections ...@@ -17,11 +17,11 @@ import collections
import math import math
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib import retinanet from seetadet.algo import retinanet
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.modules import det from seetadet.modules import det
from lib.modules import init from seetadet.modules import init
from lib.modules import nn from seetadet.modules import nn
class RetinaNet(nn.Module): class RetinaNet(nn.Module):
...@@ -56,7 +56,11 @@ class RetinaNet(nn.Module): ...@@ -56,7 +56,11 @@ class RetinaNet(nn.Module):
self.anchor_target = retinanet.AnchorTarget() self.anchor_target = retinanet.AnchorTarget()
self.cls_loss = nn.SigmoidFocalLoss() self.cls_loss = nn.SigmoidFocalLoss()
self.bbox_loss = nn.SmoothL1Loss(0.1111) if 'IOU' in cfg.MODEL.REG_LOSS_TYPE.upper():
self.bbox_loss = nn.IoULoss()
else:
self.bbox_loss = nn.SmoothL1Loss(0.1111)
self.centerness_loss = nn.BCEWithLogitsLoss(reduction='valid')
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -71,7 +75,8 @@ class RetinaNet(nn.Module): ...@@ -71,7 +75,8 @@ class RetinaNet(nn.Module):
# For details, See the official codes: # For details, See the official codes:
# https://github.com/facebookresearch/Detectron # https://github.com/facebookresearch/Detectron
self.cls_score.bias.fill_( self.cls_score.bias.fill_(
-math.log((1 - cfg.PRIOR_PROB) / cfg.PRIOR_PROB)) -math.log((1 - cfg.PRIOR_PROB) / cfg.PRIOR_PROB)
)
def compute_outputs(self, features): def compute_outputs(self, features):
"""Compute the RetinaNet logits. """Compute the RetinaNet logits.
...@@ -97,48 +102,44 @@ class RetinaNet(nn.Module): ...@@ -97,48 +102,44 @@ class RetinaNet(nn.Module):
return torch.cat(cls_score_wide, dim=2), \ return torch.cat(cls_score_wide, dim=2), \
torch.cat(bbox_pred_wide, dim=2) torch.cat(bbox_pred_wide, dim=2)
else: else:
return cls_score_wide[0], bbox_pred_wide[0] return cls_score_wide[0], bbox_pred_wide[0], \
def compute_losses( def compute_losses(self, features, cls_score, bbox_pred, gt_boxes):
self,
features,
cls_score,
bbox_pred,
gt_boxes,
ims_info,
):
"""Compute the RetinaNet classification loss and regression loss. """Compute the RetinaNet classification loss and regression loss.
Parameters Parameters
---------- ----------
features : sequence of dragon.vm.torch.Tensor features : Sequence[dragon.vm.torch.Tensor]
The features of specific conv layers. The features of specific conv layers.
cls_score : dragon.vm.torch.Tensor cls_score : dragon.vm.torch.Tensor
The classification logits. The classification logits.
bbox_pred : dragon.vm.torch.Tensor bbox_pred : dragon.vm.torch.Tensor
The bbox regression logits. The bbox regression logits.
centerness : dragon.vm.torch.Tensor
The centerness logits.
gt_boxes : numpy.ndarray gt_boxes : numpy.ndarray
The packed ground-truth boxes. The packed ground-truth boxes.
ims_info : numpy.ndarray ims_info : numpy.ndarray
The information of input images. The information of input images.
""" """
self.retinanet_data = \ self.data = \
self.anchor_target( self.anchor_target(
features=features, features=features,
gt_boxes=gt_boxes, gt_boxes=gt_boxes,
ims_info=ims_info,
) )
return collections.OrderedDict([ bbox_pred = bbox_pred.permute(0, 2, 1) \
.index_select((0, 1), self.data['bbox_indices'])
outputs = collections.OrderedDict([
('cls_loss', self.cls_loss( ('cls_loss', self.cls_loss(
cls_score, self.retinanet_data['labels'])), cls_score, self.data['labels'])),
('bbox_loss', self.bbox_loss( ('bbox_loss', self.bbox_loss(
bbox_pred, bbox_pred,
self.retinanet_data['bbox_targets'], self.data['bbox_targets'],
self.retinanet_data['bbox_inside_weights'], self.data['bbox_anchors'],
self.retinanet_data['bbox_outside_weights'], ))
)),
]) ])
return outputs
def forward(self, *args, **kwargs): def forward(self, *args, **kwargs):
cls_score, bbox_pred = self.compute_outputs(kwargs['features']) cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
...@@ -149,19 +150,17 @@ class RetinaNet(nn.Module): ...@@ -149,19 +150,17 @@ class RetinaNet(nn.Module):
if self.training: if self.training:
outputs.update( outputs.update(
self.compute_losses( self.compute_losses(
kwargs['features'], features=kwargs['features'],
cls_score, cls_score=cls_score,
bbox_pred, bbox_pred=bbox_pred,
kwargs['gt_boxes'], gt_boxes=kwargs['gt_boxes'],
kwargs['ims_info'],
) )
) )
else: else:
outputs['detections'] = \ outputs['detections'] = \
self.decoder( self.decoder(
kwargs['features'], kwargs['features'],
self.cls_prob(cls_score) self.cls_prob(cls_score).permute(0, 2, 1),
.permute(0, 2, 1),
bbox_pred, bbox_pred,
kwargs['ims_info'], kwargs['ims_info'],
) )
......
...@@ -16,10 +16,10 @@ from __future__ import print_function ...@@ -16,10 +16,10 @@ from __future__ import print_function
import collections import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib import faster_rcnn from seetadet.algo import faster_rcnn
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.modules import init from seetadet.modules import init
from lib.modules import nn from seetadet.modules import nn
class RPN(nn.Module): class RPN(nn.Module):
...@@ -45,7 +45,8 @@ class RPN(nn.Module): ...@@ -45,7 +45,8 @@ class RPN(nn.Module):
self.anchor_target = faster_rcnn.AnchorTarget() self.anchor_target = faster_rcnn.AnchorTarget()
self.cls_loss = nn.BCEWithLogitsLoss() self.cls_loss = nn.BCEWithLogitsLoss()
self.bbox_loss = nn.SmoothL1Loss(0.1111) self.bbox_loss = nn.SmoothL1Loss(
beta=0.1111, reduction='sum')
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -108,21 +109,26 @@ class RPN(nn.Module): ...@@ -108,21 +109,26 @@ class RPN(nn.Module):
The information of input images. The information of input images.
""" """
self.rpn_data = \ self.data = \
self.anchor_target( self.anchor_target(
features=features, features=features,
gt_boxes=gt_boxes, gt_boxes=gt_boxes,
ims_info=ims_info, ims_info=ims_info,
) )
bbox_pred = bbox_pred.permute(0, 2, 1) \
.index_select((0, 1), self.data['bbox_indices'])
bbox_loss_weight = 1. / (
cfg.TRAIN.RPN_BATCHSIZE *
cfg.TRAIN.IMS_PER_BATCH
)
return collections.OrderedDict([ return collections.OrderedDict([
('rpn_cls_loss', self.cls_loss( ('rpn_cls_loss', self.cls_loss(
cls_score, self.rpn_data['labels'])), cls_score, self.data['labels'])),
('rpn_bbox_loss', self.bbox_loss( ('rpn_bbox_loss', self.bbox_loss(
bbox_pred, bbox_pred,
self.rpn_data['bbox_targets'], self.data['bbox_targets'],
self.rpn_data['bbox_inside_weights'], self.data['bbox_anchors'],
self.rpn_data['bbox_outside_weights'], ) * bbox_loss_weight),
)),
]) ])
def forward(self, *args, **kwargs): def forward(self, *args, **kwargs):
......
...@@ -16,10 +16,10 @@ from __future__ import print_function ...@@ -16,10 +16,10 @@ from __future__ import print_function
import collections import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib import ssd from seetadet.algo import ssd
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.modules import init from seetadet.modules import init
from lib.modules import nn from seetadet.modules import nn
class SSD(nn.Module): class SSD(nn.Module):
...@@ -66,7 +66,11 @@ class SSD(nn.Module): ...@@ -66,7 +66,11 @@ class SSD(nn.Module):
self.hard_mining = ssd.HardMining() self.hard_mining = ssd.HardMining()
self.box_target = ssd.MultiBoxTarget() self.box_target = ssd.MultiBoxTarget()
self.cls_loss = nn.CrossEntropyLoss() self.cls_loss = nn.CrossEntropyLoss()
self.bbox_loss = nn.SmoothL1Loss() if 'IOU' in cfg.MODEL.REG_LOSS_TYPE:
self.bbox_loss = nn.IoULoss(
delta_weights=cfg.BBOX_REG_WEIGHTS)
else:
self.bbox_loss = nn.SmoothL1Loss()
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -110,8 +114,7 @@ class SSD(nn.Module): ...@@ -110,8 +114,7 @@ class SSD(nn.Module):
# Concat them if necessary # Concat them if necessary
return \ return \
torch.cat(cls_score_wide, dim=1) \ torch.cat(cls_score_wide, dim=1).view(0, -1, cfg.MODEL.NUM_CLASSES), \
.view(0, -1, cfg.MODEL.NUM_CLASSES), \
torch.cat(bbox_pred_wide, dim=1).view(0, -1, self.box_dim) torch.cat(bbox_pred_wide, dim=1).view(0, -1, self.box_dim)
def compute_losses( def compute_losses(
...@@ -160,6 +163,8 @@ class SSD(nn.Module): ...@@ -160,6 +163,8 @@ class SSD(nn.Module):
gt_boxes, gt_boxes,
) )
) )
bbox_pred = bbox_pred.index_select(
(0, 1), self.data['bbox_indices'])
return collections.OrderedDict([ return collections.OrderedDict([
# A compensating factor of 4.0 is used # A compensating factor of 4.0 is used
# As we normalize both the pos and neg samples # As we normalize both the pos and neg samples
...@@ -169,9 +174,8 @@ class SSD(nn.Module): ...@@ -169,9 +174,8 @@ class SSD(nn.Module):
('bbox_loss', self.bbox_loss( ('bbox_loss', self.bbox_loss(
bbox_pred, bbox_pred,
self.data['bbox_targets'], self.data['bbox_targets'],
self.data['bbox_inside_weights'], self.data['bbox_anchors'],
self.data['bbox_outside_weights'], ) * cfg.MODEL.REG_LOSS_WEIGHT)
)),
]) ])
def forward(self, *args, **kwargs): def forward(self, *args, **kwargs):
......
...@@ -13,9 +13,10 @@ from __future__ import absolute_import ...@@ -13,9 +13,10 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.modules import init from seetadet.core.registry import backbones
from lib.modules import nn from seetadet.modules import init
from seetadet.modules import nn
class VGG(nn.Module): class VGG(nn.Module):
...@@ -41,14 +42,14 @@ class VGG(nn.Module): ...@@ -41,14 +42,14 @@ class VGG(nn.Module):
if j == 0: if j == 0:
dim_in = filter_list[i] dim_in = filter_list[i]
if reduced: if reduced:
# L2Norm is redundant from the observation self.conv4_3_norm = nn.L2Normalize(filter_list[3], init=20.)
# We just keep a trainable scale
self.conv4_3_norm = nn.Affine(filter_list[3], bias=False)
self.conv4_3_norm.weight.zero_() # Zero-Init
self.fc6 = nn.Conv2d( self.fc6 = nn.Conv2d(
filter_list[-1], 1024, in_channels=filter_list[-1],
kernel_size=3, padding=6, out_channels=1024,
stride=1, dilation=6, kernel_size=3,
padding=6,
stride=1,
dilation=6,
) )
self.fc7 = nn.Conv1x1(1024, 1024, bias=True) self.fc7 = nn.Conv1x1(1024, 1024, bias=True)
self.feature_dims = [filter_list[-2], 1024] self.feature_dims = [filter_list[-2], 1024]
...@@ -142,14 +143,18 @@ class VGG(nn.Module): ...@@ -142,14 +143,18 @@ class VGG(nn.Module):
else: else:
outputs.append(x) outputs.append(x)
if self.training:
# Hold the frozen outputs if necessary
self.last_outputs = outputs
return outputs return outputs
def make_vgg_16(): def vgg_16(**kwargs):
return VGG(([2, 2, 3, 3, 3], [64, 128, 256, 512, 512])) return VGG(([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]), **kwargs)
def make_vgg_16_reduced(scale=300): def vgg_16_reduced(scale=300):
if scale == 300: if scale == 300:
extra_arch = ( extra_arch = (
[2, 2, 1, 1], [2, 2, 1, 1],
...@@ -164,11 +169,9 @@ def make_vgg_16_reduced(scale=300): ...@@ -164,11 +169,9 @@ def make_vgg_16_reduced(scale=300):
) )
else: else:
raise ValueError('Unsupported scale: {}'.format(scale)) raise ValueError('Unsupported scale: {}'.format(scale))
return VGG(([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]), return vgg_16(extra_arch=extra_arch, reduced=True)
extra_arch=extra_arch, reduced=True)
def make_vgg_16_reduced_300(): return make_vgg_16_reduced(300)
def make_vgg_16_reduced_512(): return make_vgg_16_reduced(512) backbones.register('vgg16', func=vgg_16)
backbones.register('vgg16_reduced_300', func=vgg_16_reduced, scale=300)
backbones.register('vgg16_reduced_512', func=vgg_16_reduced, scale=512)
...@@ -13,8 +13,7 @@ from __future__ import absolute_import ...@@ -13,8 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.ssd.data_loader import DataLoader import os
from lib.ssd.hard_mining import HardMining
from lib.ssd.multibox import MultiBoxMatch from seetadet.utils import env
from lib.ssd.multibox import MultiBoxTarget env.load_library(os.path.join(os.path.dirname(__file__), '_C'))
from lib.ssd.priorbox import PriorBox
...@@ -14,21 +14,40 @@ from __future__ import division ...@@ -14,21 +14,40 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from dragon.vm.torch import nn from dragon.vm.torch import nn
from dragon.vm.torch.autograd import function from dragon.vm.torch.autograd.function import Function
from lib.core.config import cfg from seetadet.core.config import cfg
class _RetinaNetDecoder(function.Function): class _NonMaxSuppression(Function):
"""Filter out boxes that have high IoU with selected ones."""
def __init__(self, key, dev, **kwargs):
super(_NonMaxSuppression, self).__init__(key, dev, **kwargs)
self.iou_threshold = kwargs.get('iou_threshold', 0.5)
def attributes(self):
return {
'op_type': 'NonMaxSuppression',
'arguments': {'iou_threshold': self.iou_threshold}
}
def forward(self, dets):
return self.dispatch([dets], [self.alloc()])
class _RetinaNetDecoder(Function):
"""Decode predictions from RetinaNet."""
def __init__(self, key, dev, **kwargs): def __init__(self, key, dev, **kwargs):
super(_RetinaNetDecoder, self).__init__(key, dev, **kwargs) super(_RetinaNetDecoder, self).__init__(key, dev, **kwargs)
self.args = kwargs self.args = kwargs
def register_operator(self): def attributes(self):
return { return {
'op_type': 'Proposal', 'op_type': 'RetinaNetDecoder',
'arguments': { 'arguments': {
'det_type': 'RETINANET',
'strides': self.args['strides'], 'strides': self.args['strides'],
'ratios': self.args['ratios'], 'ratios': self.args['ratios'],
'scales': self.args['scales'], 'scales': self.args['scales'],
...@@ -39,20 +58,21 @@ class _RetinaNetDecoder(function.Function): ...@@ -39,20 +58,21 @@ class _RetinaNetDecoder(function.Function):
def forward(self, features, cls_prob, bbox_pred, ims_info): def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, ims_info] inputs = features + [cls_prob, bbox_pred, ims_info]
self._unify_devices(inputs[:-1]) # Skip <ims_info> self._check_device(inputs[:-1]) # Skip <ims_info>
return self.run(inputs, [self.alloc()], unify_devices=False) return self.dispatch(inputs, [self.alloc()], check_device=False)
class _RPNDecoder(Function):
"""Decode proposal regions from RPN."""
class _RPNDecoder(function.Function):
def __init__(self, key, dev, **kwargs): def __init__(self, key, dev, **kwargs):
super(_RPNDecoder, self).__init__(key, dev, **kwargs) super(_RPNDecoder, self).__init__(key, dev, **kwargs)
self.args = kwargs self.args = kwargs
def register_operator(self): def attributes(self):
return { return {
'op_type': 'Proposal', 'op_type': 'RPNDecoder',
'arguments': { 'arguments': {
'det_type': 'RCNN',
'strides': self.args['strides'], 'strides': self.args['strides'],
'ratios': self.args['ratios'], 'ratios': self.args['ratios'],
'scales': self.args['scales'], 'scales': self.args['scales'],
...@@ -69,9 +89,9 @@ class _RPNDecoder(function.Function): ...@@ -69,9 +89,9 @@ class _RPNDecoder(function.Function):
def forward(self, features, cls_prob, bbox_pred, ims_info): def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, ims_info] inputs = features + [cls_prob, bbox_pred, ims_info]
self._unify_devices(inputs[:-1]) # Skip <ims_info> self._check_device(inputs[:-1]) # Skip <ims_info>
outputs = [self.alloc() for _ in range(self.args['K'])] outputs = [self.alloc() for _ in range(self.args['K'])]
return self.run(inputs, outputs, unify_devices=False) return self.dispatch(inputs, outputs, check_device=False)
def decode_retinanet( def decode_retinanet(
...@@ -85,15 +105,15 @@ def decode_retinanet( ...@@ -85,15 +105,15 @@ def decode_retinanet(
pre_nms_top_n, pre_nms_top_n,
score_thresh, score_thresh,
): ):
return function.get( return _RetinaNetDecoder \
_RetinaNetDecoder, .instantiate(
cls_prob.device, cls_prob.device,
strides=strides, strides=strides,
ratios=ratios, ratios=ratios,
scales=scales, scales=scales,
pre_nms_top_n=pre_nms_top_n, pre_nms_top_n=pre_nms_top_n,
score_thresh=score_thresh, score_thresh=score_thresh,
).apply(features, cls_prob, bbox_pred, ims_info) ).apply(features, cls_prob, bbox_pred, ims_info)
def decode_rpn( def decode_rpn(
...@@ -114,26 +134,34 @@ def decode_rpn( ...@@ -114,26 +134,34 @@ def decode_rpn(
canonical_scale, canonical_scale,
canonical_level, canonical_level,
): ):
return function.get( return _RPNDecoder \
_RPNDecoder, .instantiate(
cls_prob.device, cls_prob.device,
K=num_outputs, K=num_outputs,
strides=strides, strides=strides,
ratios=ratios, ratios=ratios,
scales=scales, scales=scales,
pre_nms_top_n=pre_nms_top_n, pre_nms_top_n=pre_nms_top_n,
post_nms_top_n=post_nms_top_n, post_nms_top_n=post_nms_top_n,
nms_thresh=nms_thresh, nms_thresh=nms_thresh,
min_size=min_size, min_size=min_size,
min_level=min_level, min_level=min_level,
max_level=max_level, max_level=max_level,
canonical_scale=canonical_scale, canonical_scale=canonical_scale,
canonical_level=canonical_level, canonical_level=canonical_level,
).apply(features, cls_prob, bbox_pred, ims_info) ).apply(features, cls_prob, bbox_pred, ims_info)
def nms(dets, iou_threshold=0.5):
return _NonMaxSuppression \
.instantiate(
dets.device,
iou_threshold=iou_threshold,
).apply(dets)
class RetinaNetDecoder(nn.Module): class RetinaNetDecoder(nn.Module):
"""Generate pred regions from retinanet.""" """Decode predictions from retinanet."""
def __init__(self): def __init__(self):
super(RetinaNetDecoder, self).__init__() super(RetinaNetDecoder, self).__init__()
...@@ -154,7 +182,7 @@ class RetinaNetDecoder(nn.Module): ...@@ -154,7 +182,7 @@ class RetinaNetDecoder(nn.Module):
ratios=[float(e) for e in cfg.RETINANET.ASPECT_RATIOS], ratios=[float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
scales=self.scales, scales=self.scales,
pre_nms_top_n=cfg.RETINANET.PRE_NMS_TOP_N, pre_nms_top_n=cfg.RETINANET.PRE_NMS_TOP_N,
score_thresh=cfg.TEST.SCORE_THRESH, score_thresh=float(cfg.TEST.SCORE_THRESH),
) )
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define some basic structures."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon
from dragon.vm import torch
from dragon.vm.torch import nn
from dragon.vm.torch.nn import functional
from seetadet.core.config import cfg
class FrozenAffine(object):
"""Affine transformation with weight and bias fixed."""
def __new__(cls, dim_in, bias=True, inplace=True):
return nn.Affine(
num_features=dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
class Conv1x1(object):
"""1x1 convolution."""
def __new__(cls, dim_in, dim_out, stride=1, bias=False):
return nn.Conv2d(
in_channels=dim_in,
out_channels=dim_out,
kernel_size=1,
stride=stride,
bias=bias,
)
class Conv3x3(object):
"""3x3 convolution."""
def __new__(cls, dim_in, dim_out, stride=1, dilation=1, bias=False):
return nn.Conv2d(
in_channels=dim_in,
out_channels=dim_out,
kernel_size=3,
stride=stride,
padding=1 * dilation,
bias=bias,
)
class CrossEntropyLoss(object):
"""Cross entropy loss."""
def __new__(cls):
return nn.CrossEntropyLoss(ignore_index=-1)
class IoULoss(nn.Module):
def __init__(self, reduction='mean', delta_weights=None):
super(IoULoss, self).__init__()
self.data = {} # Store the detached tensors
self.reduction = reduction
self.delta_weights = delta_weights
def transform_inv(self, boxes, deltas, name=None):
widths = boxes[:, 2] - boxes[:, 0]
heights = boxes[:, 3] - boxes[:, 1]
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
if name is not None:
self.data[name + '/widths'] = widths
self.data[name + '/heights'] = heights
dx, dy, dw, dh = torch.chunk(deltas, chunks=4, dim=1)
if self.delta_weights is not None:
wx, wy, ww, wh = self.delta_weights
dx, dy, dw, dh = dx / wx, dy / wy, dw / ww, dh / wh
pred_ctr_x = dx * widths + ctr_x
pred_ctr_y = dy * heights + ctr_y
pred_w = torch.exp(dw) * widths
pred_h = torch.exp(dh) * heights
x1 = pred_ctr_x - 0.5 * pred_w
y1 = pred_ctr_y - 0.5 * pred_h
x2 = pred_ctr_x + 0.5 * pred_w
y2 = pred_ctr_y + 0.5 * pred_h
return x1, y1, x2, y2
def forward_impl(self, input, target, anchor):
x1, y1, x2, y2 = self.transform_inv(
anchor, input, name='logits')
self.x1, self.y1, self.x2, self.y2 = \
self.transform_inv(anchor, target)
# Compute the independent area
pred_area = (x2 - x1) * (y2 - y1)
target_area = (self.x2 - self.x1) * (self.y2 - self.y1)
# Compute the intersecting area
x1_inter = torch.maximum(x1, self.x1)
y1_inter = torch.maximum(y1, self.y1)
x2_inter = torch.minimum(x2, self.x2)
y2_inter = torch.minimum(y2, self.y2)
w_inter = torch.clamp(x2_inter - x1_inter, min=0)
h_inter = torch.clamp(y2_inter - y1_inter, min=0)
area_inter = w_inter * h_inter
# Compute the enclosing area
x1_enc = torch.minimum(x1, self.x1)
y1_enc = torch.minimum(y1, self.y1)
x2_enc = torch.maximum(x2, self.x2)
y2_enc = torch.maximum(y2, self.y2)
area_enc = (x2_enc - x1_enc) * (y2_enc - y1_enc) + 1.
# Compute the differentiable IoU metric
area_union = pred_area + target_area - area_inter
iou = area_inter / (area_union + 1.)
iou_metric = iou - (area_enc - area_union) / area_enc # GIoU
# Compute the reduced loss
if self.reduction == 'sum':
return (1 - iou_metric).sum()
else:
return (1 - iou_metric).mean()
def forward(self, *inputs, **kwargs):
# Enter a new detaching scope
with dragon.eager_scope('${IOU}'):
return self.forward_impl(*inputs, **kwargs)
class Identity(nn.Module):
"""Pass input to the output."""
def __init__(self, *args, **kwargs):
super(Identity, self).__init__()
_, _ = args, kwargs
def forward(self, x):
return x
class L2Normalize(nn.Module):
"""Normalize the input using L2 norm."""
def __init__(self, num_features, init=20.):
super(L2Normalize, self).__init__()
self.weight = nn.Parameter(torch.Tensor(num_features).fill_(init))
def forward(self, input):
out = functional.normalize(input, p=2, dim=1, eps=1e-5)
out = functional.affine(out, self.weight)
return out
class ReLU(object):
"""The generic ReLU activation."""
def __new__(cls, inplace=False):
return getattr(torch.nn, cfg.MODEL.RELU_VARIANT)(inplace)
class SigmoidFocalLoss(object):
"""Sigmoid focal loss."""
def __new__(cls):
return nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
)
class SmoothL1Loss(nn.Module):
"""Smoothed l1 loss."""
def __init__(self, beta=1., reduction='batch_size'):
super(SmoothL1Loss, self).__init__()
self.beta = beta
self.reduction = reduction
def forward(self, input, target, *args):
return functional.smooth_l1_loss(
input, target,
beta=self.beta,
reduction=self.reduction,
)
Affine = nn.Affine
AvgPool2d = nn.AvgPool2d
BatchNorm2d = nn.BatchNorm2d
BCEWithLogitsLoss = nn.BCEWithLogitsLoss
Conv2d = nn.Conv2d
ConvTranspose2d = nn.ConvTranspose2d
DepthwiseConv2d = nn.DepthwiseConv2d
Linear = nn.Linear
MaxPool2d = nn.MaxPool2d
Module = nn.Module
ModuleList = nn.ModuleList
Sequential = nn.Sequential
Sigmoid = nn.Sigmoid
Softmax = nn.Softmax
...@@ -13,9 +13,11 @@ from __future__ import absolute_import ...@@ -13,9 +13,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import functools
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from seetadet.core.config import cfg
def roi_align(input, boxes, spatial_scale, size): def roi_align(input, boxes, spatial_scale, size):
...@@ -35,12 +37,18 @@ def roi_pool(input, boxes, spatial_scale, size): ...@@ -35,12 +37,18 @@ def roi_pool(input, boxes, spatial_scale, size):
class Bootstrap(torch.nn.Module): class Bootstrap(torch.nn.Module):
"""Extended operator to process the images.""" """Process the input to match the computation."""
def __init__(self): def __init__(self):
super(Bootstrap, self).__init__() super(Bootstrap, self).__init__()
self.dtype = cfg.MODEL.PRECISION.lower() self.normalize_func = functools.partial(
self.mean_values = cfg.PIXEL_MEANS torch.channel_normalize,
mean=cfg.PIXEL_MEANS,
std=[1., 1., 1.],
dim=1,
dims=(0, 3, 1, 2),
dtype=cfg.MODEL.PRECISION.lower(),
)
self.dummy_buffer = torch.ones(1) self.dummy_buffer = torch.ones(1)
def _apply(self, fn): def _apply(self, fn):
...@@ -57,12 +65,13 @@ class Bootstrap(torch.nn.Module): ...@@ -57,12 +65,13 @@ class Bootstrap(torch.nn.Module):
return self.dummy_buffer.device return self.dummy_buffer.device
def forward(self, input): def forward(self, input):
if isinstance(input, torch.Tensor):
if input.size(1) <= 3:
return input
cur_device = self.device() cur_device = self.device()
if input._device != cur_device: if input._device != cur_device:
if cur_device.type == 'cpu': if cur_device.type == 'cpu':
input = input.cpu() input = input.cpu()
else: else:
input = input.cuda(cur_device.index) input = input.cuda(cur_device.index)
return torch.vision.ops.image_data( return self.normalize_func(input)
input, self.dtype, self.mean_values,
)
...@@ -13,5 +13,4 @@ from __future__ import absolute_import ...@@ -13,5 +13,4 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.faster_rcnn.data_loader import DataLoader from seetadet.onnx import nodes as _
from lib.retinanet.anchor_target import AnchorTarget
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.onnx import exporter
from dragon.vm.onnx import helper
@exporter.register('RetinanetDecoder')
def retinanet_decoder_exporter(op_def, shape_dict, ws):
node, const_tensors = exporter.translate(**locals())
node.op_type = 'ATen' # Currently not supported in ai.onnx
helper.add_attribute(node, 'op_type', 'RetinaNetDecoder')
for arg in op_def.arg:
if arg.name == 'strides':
helper.add_attribute(node, 'strides', arg.ints)
elif arg.name == 'ratios':
helper.add_attribute(node, 'ratios', arg.floats)
elif arg.name == 'scales':
helper.add_attribute(node, 'scales', arg.floats)
elif arg.name == 'pre_nms_top_n':
helper.add_attribute(node, 'pre_nms_top_n', arg.i)
elif arg.name == 'score_thresh':
helper.add_attribute(node, 'score_thresh', arg.f)
return node, const_tensors
@exporter.register('RPNDecoder')
def rpn_decoder_exporter(op_def, shape_dict, ws):
node, const_tensors = exporter.translate(**locals())
node.op_type = 'ATen' # Currently not supported in ai.onnx
helper.add_attribute(node, 'op_type', 'RPNDecoder')
for arg in op_def.arg:
if arg.name == 'strides':
helper.add_attribute(node, 'strides', arg.ints)
elif arg.name == 'ratios':
helper.add_attribute(node, 'ratios', arg.floats)
elif arg.name == 'scales':
helper.add_attribute(node, 'scales', arg.floats)
elif arg.name == 'pre_nms_top_n':
helper.add_attribute(node, 'pre_nms_top_n', arg.i)
elif arg.name == 'post_nms_top_n':
helper.add_attribute(node, 'post_nms_top_n', arg.i)
elif arg.name == 'nms_thresh':
helper.add_attribute(node, 'nms_thresh', arg.f)
elif arg.name == 'min_size':
helper.add_attribute(node, 'min_size', arg.i)
elif arg.name == 'min_level':
helper.add_attribute(node, 'min_level', arg.i)
elif arg.name == 'max_level':
helper.add_attribute(node, 'max_level', arg.i)
elif arg.name == 'canonical_scale':
helper.add_attribute(node, 'canonical_scale', arg.i)
elif arg.name == 'canonical_level':
helper.add_attribute(node, 'canonical_level', arg.i)
return node, const_tensors
...@@ -430,4 +430,4 @@ class COCO: ...@@ -430,4 +430,4 @@ class COCO:
""" """
rle = self.annToRLE(ann) rle = self.annToRLE(ann)
m = maskUtils.decode(rle) m = maskUtils.decode(rle)
return m return m
\ No newline at end of file
__author__ = 'tsungyi' __author__ = 'tsungyi'
import lib.pycocotools._mask as _mask import seetadet.pycocotools._mask as _mask
# Interface for manipulating masks stored in RLE format. # Interface for manipulating masks stored in RLE format.
# #
......
...@@ -15,8 +15,8 @@ from __future__ import print_function ...@@ -15,8 +15,8 @@ from __future__ import print_function
import numpy as np import numpy as np
from lib.pycocotools import mask as mask_tools from seetadet.pycocotools import mask as mask_tools
from lib.pycocotools.mask import frPyObjects from seetadet.pycocotools.mask import frPyObjects
def poly2rle(poly, height, width): def poly2rle(poly, height, width):
......
...@@ -15,7 +15,7 @@ from __future__ import print_function ...@@ -15,7 +15,7 @@ from __future__ import print_function
import math import math
from lib.core.config import cfg from seetadet.core.config import cfg
class _LRScheduler(object): class _LRScheduler(object):
......
...@@ -15,11 +15,11 @@ from __future__ import print_function ...@@ -15,11 +15,11 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.modeling.detector import Detector from seetadet.modeling.detector import Detector
from lib.solver import lr_scheduler from seetadet.solver import lr_scheduler
from lib.utils import framework from seetadet.utils import env
from lib.utils import time_util from seetadet.utils import time_util
class SGDSolver(object): class SGDSolver(object):
...@@ -28,7 +28,7 @@ class SGDSolver(object): ...@@ -28,7 +28,7 @@ class SGDSolver(object):
self.detector = Detector() self.detector = Detector()
# Define the optimizer and its arguments # Define the optimizer and its arguments
self.optimizer = torch.optim.SGD( self.optimizer = torch.optim.SGD(
framework.get_param_groups(self.detector), env.get_param_groups(self.detector),
lr=cfg.SOLVER.BASE_LR, lr=cfg.SOLVER.BASE_LR,
momentum=cfg.SOLVER.MOMENTUM, momentum=cfg.SOLVER.MOMENTUM,
weight_decay=cfg.SOLVER.WEIGHT_DECAY, weight_decay=cfg.SOLVER.WEIGHT_DECAY,
......
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# Codes are based on: # Codes are based on:
# #
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/utils/blob.py> # <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/utils/blob.py>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.utils.image import distort_image from seetadet.utils.image import distort_image
from lib.utils.image import resize_image from seetadet.utils.image import resize_image
def im_list_to_blob(ims): def im_list_to_blob(ims):
"""Convert a list of images into a network input. """Convert a list of images into a network input.
Assume that images are not means subtracted, and with BGR order. Assume that images are not means subtracted, and with BGR order.
""" """
blob_dtype = 'uint8' if ims[0].dtype == 'uint8' else 'float32' blob_dtype = 'uint8' if ims[0].dtype == 'uint8' else 'float32'
max_shape = np.array([im.shape for im in ims]).max(axis=0) max_shape = np.array([im.shape for im in ims]).max(axis=0)
if cfg.MODEL.COARSEST_STRIDE > 0: if cfg.MODEL.COARSEST_STRIDE > 0:
stride = float(cfg.MODEL.COARSEST_STRIDE) stride = float(cfg.MODEL.COARSEST_STRIDE)
max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride) max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride)
max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride) max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride)
blob_shape = (len(ims), max_shape[0], max_shape[1], 3) blob_shape = (len(ims), max_shape[0], max_shape[1], 3)
blob = np.empty(blob_shape, blob_dtype) blob = np.empty(blob_shape, blob_dtype)
blob[:] = cfg.PIXEL_MEANS blob[:] = cfg.PIXEL_MEANS
for i, im in enumerate(ims): for i, im in enumerate(ims):
if im.dtype == 'uint16': if im.dtype == 'uint16':
im = im.astype(blob_dtype) / 256. im = im.astype(blob_dtype) / 256.
blob[i, :im.shape[0], :im.shape[1], :] = im blob[i, :im.shape[0], :im.shape[1], :] = im
return blob return blob
def mask_list_to_blob(masks): def mask_list_to_blob(masks):
"""Convert a list of masks into a network input.""" """Convert a list of masks into a network input."""
max_shape = np.array([mask.shape[1:] for mask in masks]).max(axis=0) max_shape = np.array([mask.shape[1:] for mask in masks]).max(axis=0)
num_masks = np.array([mask.shape[0] for mask in masks]).sum() num_masks = np.array([mask.shape[0] for mask in masks]).sum()
blob_shape = ((num_masks, max_shape[0], max_shape[1])) blob_shape = (num_masks, max_shape[0], max_shape[1])
blob = np.zeros(blob_shape, 'uint8') blob = np.zeros(blob_shape, 'uint8')
count = 0 count = 0
for mask in masks: for mask in masks:
n, h, w = mask.shape n, h, w = mask.shape
blob[count:count + n, :h, :w] = mask blob[count:count + n, :h, :w] = mask
count += n count += n
return blob return blob
def prep_im_for_blob(img, target_size, max_size): def prep_im_for_blob(img, target_size, max_size):
"""Scale an image for use in a blob.""" """Scale an image for use in a blob."""
im_shape, jitter = img.shape, 1. im_shape, jitter = img.shape, 1.
if cfg.TRAIN.USE_COLOR_JITTER: if cfg.TRAIN.USE_COLOR_JITTER:
img = distort_image(img) img = distort_image(img)
if max_size > 0: if max_size > 0:
# Scale image along the shortest side # Scale image along the shortest side
im_size_min = np.min(im_shape[:2]) im_size_min = np.min(im_shape[:2])
im_size_max = np.max(im_shape[:2]) im_size_max = np.max(im_shape[:2])
im_scale = float(target_size) / float(im_size_min) im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than MAX_SIZE # Prevent the biggest axis from being more than MAX_SIZE
if np.round(im_scale * im_size_max) > max_size: if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max) im_scale = float(max_size) / float(im_size_max)
else: else:
# Scale image along the longest side # Scale image along the longest side
im_size_max = np.max(im_shape[:2]) im_size_max = np.max(im_shape[:2])
im_scale = float(target_size) / float(im_size_max) im_scale = float(target_size) / float(im_size_max)
if cfg.TRAIN.USE_SCALE_JITTER: r = cfg.TRAIN.RANDOM_SCALES
r = cfg.TRAIN.SCALE_JITTER_RANGE jitter = r[0] + np.random.rand() * (r[1] - r[0])
jitter = r[0] + np.random.rand() * (r[1] - r[0]) im_scale *= jitter
im_scale *= jitter
return resize_image(img, im_scale, im_scale), im_scale
return resize_image(img, im_scale, im_scale), im_scale, jitter
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# ------------------------------------------------------------
"""Box utilities for original coordinates."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from seetadet.utils import cython_bbox
def bbox_overlaps(boxes1, boxes2):
"""Compute the overlaps between two group of boxes."""
return cython_bbox.bbox_overlaps(
np.ascontiguousarray(boxes1, dtype=np.float),
np.ascontiguousarray(boxes2, dtype=np.float),
)
def bbox_transform(ex_rois, gt_rois, weights=(1., 1., 1., 1.)):
"""Transform the boxes to the regression targets."""
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
wx, wy, ww, wh = weights
targets = [wx * (gt_ctr_x - ex_ctr_x) / ex_widths]
targets += [wy * (gt_ctr_y - ex_ctr_y) / ex_heights]
targets += [ww * np.log(gt_widths / ex_widths)]
targets += [wh * np.log(gt_heights / ex_heights)]
return np.vstack(targets).transpose()
def bbox_centerness(ex_rois, gt_rois):
"""Compute centerness of the boxes to ground-truth."""
ex_ctr_x = (ex_rois[:, 2] + ex_rois[:, 0]) / 2
ex_ctr_y = (ex_rois[:, 3] + ex_rois[:, 1]) / 2
l = ex_ctr_x - gt_rois[:, 0]
t = ex_ctr_y - gt_rois[:, 1]
r = gt_rois[:, 2] - ex_ctr_x
b = gt_rois[:, 3] - ex_ctr_y
centerness = \
(np.minimum(l, r) / np.maximum(l, r)) * \
(np.minimum(t, b) / np.maximum(t, b))
min_dist = np.stack([l, t, r, b], axis=1).min(axis=1)
keep_inds = np.where(min_dist > 0.01)[0]
discard_inds = np.where(min_dist <= 0.01)[0]
centerness[keep_inds] = np.sqrt(centerness[keep_inds])
centerness[discard_inds] = -1
return centerness, keep_inds, discard_inds
def bbox_transform_inv(boxes, deltas, weights=(1., 1., 1., 1.)):
"""Decode the final boxes according to the deltas."""
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.
heights = boxes[:, 3] - boxes[:, 1] + 1.
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
wx, wy, ww, wh = weights
dx = deltas[:, 0::4] / wx
dy = deltas[:, 1::4] / wy
dw = deltas[:, 2::4] / ww
dh = deltas[:, 3::4] / wh
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w # x1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h # y1
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 # x2
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 # y2
return pred_boxes
def clip_boxes(boxes, im_shape):
# x1 >= 0
boxes[:, 0] = np.maximum(np.minimum(boxes[:, 0], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1] = np.maximum(np.minimum(boxes[:, 1], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2] = np.maximum(np.minimum(boxes[:, 2], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3] = np.maximum(np.minimum(boxes[:, 3], im_shape[0] - 1), 0)
return boxes
def clip_tiled_boxes(boxes, im_shape):
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
def dismantle_boxes(gt_boxes, num_images):
"""Dismantle the packed ground-truth boxes."""
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == i)[0]
][:, :-1] for i in range(num_images)
]
def expand_boxes(boxes, scale):
"""Expand an array of boxes by a given scale."""
w_half = (boxes[:, 2] - boxes[:, 0]) * .5
h_half = (boxes[:, 3] - boxes[:, 1]) * .5
x_c = (boxes[:, 2] + boxes[:, 0]) * .5
y_c = (boxes[:, 3] + boxes[:, 1]) * .5
w_half *= scale
h_half *= scale
boxes_exp = np.zeros(boxes.shape)
boxes_exp[:, 0] = x_c - w_half
boxes_exp[:, 2] = x_c + w_half
boxes_exp[:, 1] = y_c - h_half
boxes_exp[:, 3] = y_c + h_half
return boxes_exp
def flip_boxes(boxes, width):
"""Flip the boxes horizontally."""
boxes_flipped = boxes.copy()
boxes_flipped[:, 0] = width - boxes[:, 2] - 1
boxes_flipped[:, 2] = width - boxes[:, 0] - 1
return boxes_flipped
def filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min size."""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# ------------------------------------------------------------
"""Box utilities for normalized coordinates."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
def boxes_area(boxes):
"""Compute the area of an array of boxes."""
w = (boxes[:, 2] - boxes[:, 0])
h = (boxes[:, 3] - boxes[:, 1])
area = w * h
assert np.all(area >= 0), 'Negative areas founds'
return area
def intersection(boxes1, boxes2):
"""Compute pairwise intersection areas between boxes."""
[y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
[y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
inter_heights = np.maximum(
np.zeros(all_pairs_max_ymin.shape),
all_pairs_min_ymax - all_pairs_max_ymin
)
inter_widths = np.maximum(
np.zeros(all_pairs_max_xmin.shape),
all_pairs_min_xmax - all_pairs_max_xmin
)
return inter_heights * inter_widths
def ioa1(boxes1, boxes2):
"""Computes pairwise intersection-over-area between box collections."""
inter = intersection(boxes1, boxes2)
area = np.expand_dims(boxes_area(boxes1), axis=1)
return inter / area
def ioa2(boxes1, boxes2):
"""Computes pairwise intersection-over-area between box collections."""
inter = intersection(boxes1, boxes2)
area = np.expand_dims(boxes_area(boxes2), axis=0)
return inter / area
def iou(boxes1, boxes2):
"""Computes pairwise intersection-over-union between box collections."""
inter = intersection(boxes1, boxes2)
area1 = boxes_area(boxes1)
area2 = boxes_area(boxes2)
union = np.expand_dims(area1, axis=1) + \
np.expand_dims(area2, axis=0) - inter
return inter / union
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import importlib.machinery
import os
import dragon
from dragon.core.util import six
from dragon.vm import torch
import numpy as np
from seetadet.core.config import cfg
def get_param_groups(module):
"""Separate parameters according to weight decay.
Parameters
----------
module : dragon.vm.torch.nn.Module
The module to collect parameters.
Returns
-------
Sequence[ParamGroup]
The parameter groups.
"""
param_groups = [
{'params': []}, # Decayed always
{'params': [], 'weight_decay': -1.}
]
for name, param in module.named_parameters():
gi = 0 if 'weight' in name and param.dim() > 1 else 1
param_groups[gi]['params'].append(param)
if len(param_groups[1]['params']) == 0:
param_groups.pop() # Remove empty group
return param_groups
def load_library(library_prefix):
"""Load a shared library.
Parameters
----------
library_prefix : str
The prefix of library.
"""
loader_details = (
importlib.machinery.ExtensionFileLoader,
importlib.machinery.EXTENSION_SUFFIXES
)
library_prefix = os.path.abspath(library_prefix)
lib_dir, fullname = os.path.split(library_prefix)
finder = importlib.machinery.FileFinder(lib_dir, loader_details)
ext_specs = finder.find_spec(fullname)
if ext_specs is None:
raise ImportError(
'Could not find the pre-built library '
'for <%s>.' % library_prefix
)
dragon.load_library(ext_specs.origin)
def new_tensor(data, enforce_cpu=False):
"""Create a new tensor from the data.
Parameters
----------
data : array_like
The data value.
enforce_cpu : bool, optional, default=False
**True** to enforce the cpu storage.
Returns
-------
dragon.vm.torch.Tensor
The tensor taken with the data.
"""
if data is None:
return data
if isinstance(data, np.ndarray):
tensor = torch.from_numpy(data)
elif isinstance(data, torch.Tensor):
tensor = data
else:
tensor = torch.tensor(data)
if not enforce_cpu:
tensor = tensor.cuda(cfg.GPU_ID)
return tensor
# Aliases
pickle = six.moves.pickle
...@@ -18,7 +18,7 @@ import numpy as np ...@@ -18,7 +18,7 @@ import numpy as np
import PIL.Image import PIL.Image
import PIL.ImageEnhance import PIL.ImageEnhance
from lib.core.config import cfg from seetadet.core.config import cfg
def distort_image(img): def distort_image(img):
...@@ -28,7 +28,6 @@ def distort_image(img): ...@@ -28,7 +28,6 @@ def distort_image(img):
PIL.ImageEnhance.Contrast, PIL.ImageEnhance.Contrast,
PIL.ImageEnhance.Color, PIL.ImageEnhance.Color,
] ]
np.random.shuffle(transforms)
for transform in transforms: for transform in transforms:
if np.random.uniform() < 0.5: if np.random.uniform() < 0.5:
img = transform(img) img = transform(img)
...@@ -62,7 +61,7 @@ def get_image_with_target_size(target_size, img): ...@@ -62,7 +61,7 @@ def get_image_with_target_size(target_size, img):
) )
def resize_image(img, fx, fy): def resize_image(img, fx=1, fy=1):
return cv2.resize( return cv2.resize(
img, img,
dsize=None, dsize=None,
...@@ -79,7 +78,6 @@ def scale_image(img): ...@@ -79,7 +78,6 @@ def scale_image(img):
im_size_max = np.max(img.shape[:2]) im_size_max = np.max(img.shape[:2])
for target_size in cfg.TEST.SCALES: for target_size in cfg.TEST.SCALES:
im_scale = float(target_size) / float(im_size_min) im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than MAX_SIZE
if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
processed_ims.append( processed_ims.append(
...@@ -91,17 +89,16 @@ def scale_image(img): ...@@ -91,17 +89,16 @@ def scale_image(img):
)) ))
ims_scales.append(im_scale) ims_scales.append(im_scale)
else: else:
# Scale image along the longest side # Scale image into a square
im_size_max = np.max(img.shape[:2])
for target_size in cfg.TEST.SCALES: for target_size in cfg.TEST.SCALES:
im_scale = float(target_size) / float(im_size_max) im_scale_h = float(target_size) / img.shape[0]
im_scale_w = float(target_size) / img.shape[1]
processed_ims.append( processed_ims.append(
cv2.resize( cv2.resize(
img, img,
dsize=None, dsize=(target_size, target_size),
fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR, interpolation=cv2.INTER_LINEAR,
)) ))
ims_scales.append(im_scale) ims_scales.append([im_scale_h, im_scale_w])
return processed_ims, ims_scales return processed_ims, ims_scales
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# Codes are based on: # Codes are based on:
# #
# <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/platform/tf_logging.py> # <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/platform/tf_logging.py>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import inspect import inspect
import logging as _logging import logging as _logging
import os import os
import sys as _sys import sys as _sys
import threading import threading
_logger = None _logger = None
_is_root = True _is_root = True
_logger_lock = threading.Lock() _logger_lock = threading.Lock()
def get_logger(): def get_logger():
global _logger global _logger
# Use double-checked locking to avoid taking lock unnecessarily. # Use double-checked locking to avoid taking lock unnecessarily.
if _logger: if _logger:
return _logger return _logger
_logger_lock.acquire() _logger_lock.acquire()
try: try:
if _logger: if _logger:
return _logger return _logger
logger = _logging.getLogger('detectron') logger = _logging.getLogger('SeetaDet')
logger.setLevel('INFO') logger.setLevel('INFO')
logger.propagate = False logger.propagate = False
if True: if True:
# Determine whether we are in an interactive environment # Determine whether we are in an interactive environment
_interactive = False _interactive = False
try: try:
# This is only defined in interactive shells. # This is only defined in interactive shells.
if _sys.ps1: if _sys.ps1:
_interactive = True _interactive = True
except AttributeError: except AttributeError:
# Even now, we may be in an interactive shell with `python -i`. # Even now, we may be in an interactive shell with `python -i`.
_interactive = _sys.flags.interactive _interactive = _sys.flags.interactive
# If we are in an interactive environment (like Jupyter), set loglevel # If we are in an interactive environment (like Jupyter), set loglevel
# to INFO and pipe the output to stdout. # to INFO and pipe the output to stdout.
if _interactive: if _interactive:
logger.setLevel('INFO') logger.setLevel('INFO')
_logging_target = _sys.stdout _logging_target = _sys.stdout
else: else:
_logging_target = _sys.stderr _logging_target = _sys.stderr
# Add the output handler. # Add the output handler.
_handler = _logging.StreamHandler(_logging_target) _handler = _logging.StreamHandler(_logging_target)
_handler.setFormatter(_logging.Formatter('%(levelname)s %(message)s')) _handler.setFormatter(_logging.Formatter('%(levelname)s %(message)s'))
logger.addHandler(_handler) logger.addHandler(_handler)
_logger = logger _logger = logger
return _logger return _logger
finally: finally:
_logger_lock.release() _logger_lock.release()
def _detailed_msg(msg): def _detailed_msg(msg):
file, lineno = inspect.stack()[:3][2][1:3] file, lineno = inspect.stack()[:3][2][1:3]
return "{}:{}] {}".format(os.path.split(file)[-1], lineno, msg) return "{}:{}] {}".format(os.path.split(file)[-1], lineno, msg)
def log(level, msg, *args, **kwargs): def log(level, msg, *args, **kwargs):
get_logger().log(level, _detailed_msg(msg), *args, **kwargs) get_logger().log(level, _detailed_msg(msg), *args, **kwargs)
def debug(msg, *args, **kwargs): def debug(msg, *args, **kwargs):
if is_root(): if is_root():
get_logger().debug(_detailed_msg(msg), *args, **kwargs) get_logger().debug(_detailed_msg(msg), *args, **kwargs)
def error(msg, *args, **kwargs): def error(msg, *args, **kwargs):
get_logger().error(_detailed_msg(msg), *args, **kwargs) get_logger().error(_detailed_msg(msg), *args, **kwargs)
assert 0 assert 0
def fatal(msg, *args, **kwargs): def fatal(msg, *args, **kwargs):
get_logger().fatal(_detailed_msg(msg), *args, **kwargs) get_logger().fatal(_detailed_msg(msg), *args, **kwargs)
assert 0 assert 0
def info(msg, *args, **kwargs): def info(msg, *args, **kwargs):
if is_root(): if is_root():
get_logger().info(_detailed_msg(msg), *args, **kwargs) get_logger().info(_detailed_msg(msg), *args, **kwargs)
def warn(msg, *args, **kwargs): def warn(msg, *args, **kwargs):
if is_root(): if is_root():
get_logger().warn(_detailed_msg(msg), *args, **kwargs) get_logger().warn(_detailed_msg(msg), *args, **kwargs)
def warning(msg, *args, **kwargs): def warning(msg, *args, **kwargs):
if is_root(): if is_root():
get_logger().warning(_detailed_msg(msg), *args, **kwargs) get_logger().warning(_detailed_msg(msg), *args, **kwargs)
def get_verbosity(): def get_verbosity():
"""Return how much logging output will be produced.""" """Return how much logging output will be produced."""
return get_logger().getEffectiveLevel() return get_logger().getEffectiveLevel()
def set_verbosity(v): def set_verbosity(v):
"""Sets the threshold for what messages will be logged.""" """Sets the threshold for what messages will be logged."""
get_logger().setLevel(v) get_logger().setLevel(v)
def set_root_logger(is_root=True): def set_root_logger(is_root=True):
global _is_root global _is_root
_is_root = is_root _is_root = is_root
def is_root(): def is_root():
return _is_root return _is_root
...@@ -21,7 +21,7 @@ import cv2 ...@@ -21,7 +21,7 @@ import cv2
import numpy as np import numpy as np
import PIL.Image import PIL.Image
from lib.utils import boxes as box_util from seetadet.utils import boxes as box_util
def dismantle_masks(gt_boxes, gt_masks, num_images): def dismantle_masks(gt_boxes, gt_masks, num_images):
......
...@@ -17,44 +17,53 @@ from __future__ import absolute_import ...@@ -17,44 +17,53 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.core.config import cfg from seetadet.modules import det
from seetadet.utils import env
try: try:
from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms from seetadet.utils.cython_nms import cpu_nms
except ImportError as e: from seetadet.utils.cython_nms import cpu_soft_nms
print('Failed to import cpu nms. Error: {0}'.format(str(e))) except ImportError:
cpu_nms = cpu_soft_nms = print
try:
from lib.nms.gpu_nms import gpu_nms def gpu_nms(dets, thresh):
except ImportError as e: """Filter out the detections using GPU-NMS."""
print('Failed to import gpu nms. Error: {0}'.format(str(e))) if dets.shape[0] == 0:
return []
scores = dets[:, 4]
order = scores.argsort()[::-1]
sorted_dets = env.new_tensor(dets[order, :])
keep = det.nms(sorted_dets, iou_threshold=thresh).numpy()
return order[keep]
def nms(detections, thresh, force_cpu=False): def nms(dets, thresh):
"""Perform either CPU or GPU Hard-NMS.""" """Filter out the detections using NMS."""
if detections.shape[0] == 0: if dets.shape[0] == 0:
return [] return []
if cfg.USE_GPU_NMS and not force_cpu: if cpu_nms is print:
return gpu_nms(detections, thresh, device_id=cfg.GPU_ID) raise ImportError('Failed to load <cython_nms> library.')
else: return cpu_nms(dets, thresh)
return cpu_nms(detections, thresh)
def soft_nms( def soft_nms(
detections, dets,
thresh, thresh,
method='linear', method='linear',
sigma=0.5, sigma=0.5,
score_thresh=0.001, score_thresh=0.001,
): ):
"""Perform CPU Soft-NMS.""" """Filter out the detections using Soft-NMS."""
if detections.shape[0] == 0: if dets.shape[0] == 0:
return [] return []
if cpu_soft_nms is print:
raise ImportError('Failed to load <cython_nms> library.')
methods = {'hard': 0, 'linear': 1, 'gaussian': 2} methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
if method not in methods: if method not in methods:
raise ValueError('Unknown soft nms method:', method) raise ValueError('Unknown soft nms method:', method)
return cpu_soft_nms( return cpu_soft_nms(
detections, dets,
thresh, thresh,
methods[method], methods[method],
sigma, sigma,
......
...@@ -30,8 +30,8 @@ import matplotlib.pyplot as plt ...@@ -30,8 +30,8 @@ import matplotlib.pyplot as plt
from matplotlib.patches import Polygon from matplotlib.patches import Polygon
import numpy as np import numpy as np
from lib.utils.colormap import colormap from seetadet.utils.colormap import colormap
from lib.utils.boxes import expand_boxes from seetadet.utils.boxes import expand_boxes
plt.rcParams['pdf.fonttype'] = 42 # For editing in Adobe Illustrator plt.rcParams['pdf.fonttype'] = 42 # For editing in Adobe Illustrator
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import shutil
import setuptools
import setuptools.command.install
import sys
import subprocess
def clean():
"""Remove the work directories."""
if os.path.exists('build'):
shutil.rmtree('build')
if os.path.exists('seeta_det.egg-info'):
shutil.rmtree('seeta_det.egg-info')
def configure():
"""Prepare the package files."""
# Compile cxx sources
py_exec = sys.executable
if subprocess.call(
'cd csrc/cxx && '
'{} setup.py build_ext -b ../ --no-python-abi-suffix=0 -j 4 &&'
'{} setup.py clean'.format(py_exec, py_exec), shell=True
) > 0:
raise RuntimeError('Failed to build the cxx sources.')
# Compile pyx sources
if subprocess.call(
'cd csrc/pyx && '
'{} setup.py build_ext -b ../ --cython-c-in-temp -j 4 &&'
'{} setup.py clean'.format(py_exec, py_exec), shell=True,
) > 0:
raise RuntimeError('Failed to build the pyx sources.')
# Copy the pre-built libraries
for root, _, files in os.walk('csrc/install'):
root = root[len('csrc/install/'):]
for file in files:
src = os.path.join(root, file)
dest = src.replace('lib', 'seetadet')
if os.path.exists(dest):
os.remove(dest)
shutil.copy(os.path.join('csrc/install', src), dest)
shutil.rmtree('csrc/install')
class install(setuptools.command.install.install):
"""Old-style command to prevent from installing egg."""
def run(self):
setuptools.command.install.install.run(self)
def find_packages():
"""Return the python sources installed to package."""
packages = []
for root, _, files in os.walk('seetadet'):
if os.path.exists(os.path.join(root, '__init__.py')):
packages.append(root)
return packages
def find_package_data():
"""Return the external data installed to package."""
libraries = []
for root, _, files in os.walk('seetadet'):
root = root[len('seetadet/'):]
for file in files:
if file.endswith('.so') or file.endswith('.pyd'):
libraries.append(os.path.join(root, file))
return libraries
configure()
setuptools.setup(
name='seeta-det',
version='0.4.0',
description='SeetaDet: A platform implementing popular object detection algorithms.',
url='https://gitlab.seetatech.com/seetaresearch/SeetaDet',
author='SeetaTech',
license='BSD 2-Clause',
packages=find_packages(),
package_data={'seetadet': find_package_data()},
package_dir={'seetadet': 'seetadet'},
cmdclass={'install': install},
install_requires=['opencv-python', 'Pillow'],
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Developers',
'Intended Audience :: Education',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: BSD License',
'Programming Language :: C++',
'Programming Language :: Python',
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Mathematics',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Software Development',
'Topic :: Software Development :: Libraries',
'Topic :: Software Development :: Libraries :: Python Modules',
],
)
clean()
...@@ -21,10 +21,11 @@ import argparse ...@@ -21,10 +21,11 @@ import argparse
import dragon.vm.torch as torch import dragon.vm.torch as torch
import pprint import pprint
from lib.core.config import cfg from seetadet import onnx as _
from lib.core.coordinator import Coordinator from seetadet.core.config import cfg
from lib.modeling.detector import new_detector from seetadet.core.coordinator import Coordinator
from lib.utils import logger from seetadet.modeling.detector import new_detector
from seetadet.utils import logger
def parse_args(): def parse_args():
...@@ -71,8 +72,8 @@ if __name__ == '__main__': ...@@ -71,8 +72,8 @@ if __name__ == '__main__':
.format(coordinator.exports_dir())) .format(coordinator.exports_dir()))
detector = new_detector(cfg.GPU_ID, checkpoint) detector = new_detector(cfg.GPU_ID, checkpoint)
data = torch.zeros(*args.input_shape).byte() data = torch.zeros(*args.input_shape, dtype='uint8')
ims_info = torch.zeros(args.input_shape[0], 3).float() ims_info = torch.zeros(args.input_shape[0], 3, dtype='float32')
torch.onnx.export( torch.onnx.export(
model=detector, model=detector,
......
...@@ -21,11 +21,11 @@ import argparse ...@@ -21,11 +21,11 @@ import argparse
import dragon import dragon
import numpy import numpy
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.core.coordinator import Coordinator from seetadet.core.coordinator import Coordinator
from lib.core.train import train_net from seetadet.core.train import train_net
from lib.datasets.factory import get_imdb from seetadet.datasets.factory import get_dataset
from lib.utils import logger from seetadet.utils import logger
def parse_args(): def parse_args():
...@@ -79,12 +79,12 @@ if __name__ == '__main__': ...@@ -79,12 +79,12 @@ if __name__ == '__main__':
# Fix the random seed for reproducibility # Fix the random seed for reproducibility
numpy.random.seed(cfg.RNG_SEED) numpy.random.seed(cfg.RNG_SEED)
dragon.config.set_random_seed(cfg.RNG_SEED) dragon.random.set_seed(cfg.RNG_SEED)
# Inspect the database # Inspect the dataset
database = get_imdb(cfg.TRAIN.DATABASE) dataset = get_dataset(cfg.TRAIN.DATASET)
logger.info('Database({}): {} images will be used to train.' logger.info('Dataset({}): {} images will be used to train.'
.format(cfg.TRAIN.DATABASE, database.num_images)) .format(cfg.TRAIN.DATASET, dataset.num_images))
# Ready to train the network # Ready to train the network
logger.info('Output will be saved to `{:s}`' logger.info('Output will be saved to `{:s}`'
......
...@@ -20,12 +20,12 @@ sys.path.insert(0, '..') ...@@ -20,12 +20,12 @@ sys.path.insert(0, '..')
import argparse import argparse
import pprint import pprint
from lib.core import test_engine from seetadet.core import test_engine
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.core.coordinator import Coordinator from seetadet.core.coordinator import Coordinator
from lib.core.test import TestServer from seetadet.core.test import TestServer
from lib.datasets.factory import get_imdb from seetadet.datasets.factory import get_dataset
from lib.utils import logger from seetadet.utils import logger
def parse_args(): def parse_args():
...@@ -81,11 +81,11 @@ if __name__ == '__main__': ...@@ -81,11 +81,11 @@ if __name__ == '__main__':
if checkpoint is None: if checkpoint is None:
raise RuntimeError('The checkpoint of global step {} does not exist.'.format(args.iter)) raise RuntimeError('The checkpoint of global step {} does not exist.'.format(args.iter))
# Inspect the database # Inspect the dataset
database = get_imdb(cfg.TEST.DATABASE) dataset = get_dataset(cfg.TEST.DATASET)
cfg.TEST.PROTOCOL = 'dump' if args.dump else cfg.TEST.PROTOCOL cfg.TEST.PROTOCOL = 'dump' if args.dump else cfg.TEST.PROTOCOL
logger.info('Database({}): {} images will be used to test.' logger.info('Dataset({}): {} images will be used to test.'
.format(cfg.TEST.DATABASE, database.num_images)) .format(cfg.TEST.DATASET, dataset.num_images))
# Ready to test the network # Ready to test the network
output_dir = coordinator.results_dir(checkpoint, args.output_dir) output_dir = coordinator.results_dir(checkpoint, args.output_dir)
......
...@@ -20,8 +20,8 @@ sys.path.insert(0, '..') ...@@ -20,8 +20,8 @@ sys.path.insert(0, '..')
import argparse import argparse
import numpy import numpy
from lib.core.coordinator import Coordinator from seetadet.core.coordinator import Coordinator
from lib.utils import logger from seetadet.utils import logger
def parse_args(): def parse_args():
......
...@@ -22,11 +22,11 @@ import dragon ...@@ -22,11 +22,11 @@ import dragon
import numpy import numpy
import pprint import pprint
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.core.coordinator import Coordinator from seetadet.core.coordinator import Coordinator
from lib.core.train import train_net from seetadet.core.train import train_net
from lib.datasets.factory import get_imdb from seetadet.datasets.factory import get_dataset
from lib.utils import logger from seetadet.utils import logger
def parse_args(): def parse_args():
...@@ -59,7 +59,7 @@ def mpi_train(cfg_file, exp_dir): ...@@ -59,7 +59,7 @@ def mpi_train(cfg_file, exp_dir):
""" """
import subprocess import subprocess
args = 'mpirun --allow-run-as-root -n {} '.format(cfg.NUM_GPUS) args = 'mpirun --allow-run-as-root -n {} --bind-to none '.format(cfg.NUM_GPUS)
args += '{} {} '.format(sys.executable, 'mpi_train.py') args += '{} {} '.format(sys.executable, 'mpi_train.py')
args += '--cfg {} --exp_dir {} '.format(osp.abspath(cfg_file), exp_dir) args += '--cfg {} --exp_dir {} '.format(osp.abspath(cfg_file), exp_dir)
return subprocess.call(args, shell=True) return subprocess.call(args, shell=True)
...@@ -84,12 +84,12 @@ if __name__ == '__main__': ...@@ -84,12 +84,12 @@ if __name__ == '__main__':
# Fix the random seed for reproducibility # Fix the random seed for reproducibility
numpy.random.seed(cfg.RNG_SEED) numpy.random.seed(cfg.RNG_SEED)
dragon.config.set_random_seed(cfg.RNG_SEED) dragon.random.set_seed(cfg.RNG_SEED)
# Inspect the database # Inspect the dataset
database = get_imdb(cfg.TRAIN.DATABASE) dataset = get_dataset(cfg.TRAIN.DATASET)
logger.info('Database({}): {} images will be used to train.' logger.info('Dataset({}): {} images will be used to train.'
.format(cfg.TRAIN.DATABASE, database.num_images)) .format(cfg.TRAIN.DATASET, dataset.num_images))
# Ready to train the network # Ready to train the network
logger.info('Output will be saved to `{:s}`' logger.info('Output will be saved to `{:s}`'
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!