Commit f4ecc7c7 by Ting PAN

Change the code structure

1 parent d3ed62db
Showing with 1750 additions and 2619 deletions
------------------------------------------------------------------------ ------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet. The list of most significant changes made over time in SeetaDet.
SeetaDet 0.4.0 (20200408)
Dragon Minimum Required (Version 0.3.0.dev20200408)
Changes:
Preview Features:
- Optimize the code structure.
- DALI support for SSD, RetinaNet, and Faster-RCNN.
- Use KPLRecord instead of SeetaRecord.
Bugs fixed:
- Fix the frozen Affine issue.
------------------------------------------------------------------------
SeetaDet 0.3.0 (20191121) SeetaDet 0.3.0 (20191121)
Dragon Minimum Required (Version 0.3.0.dev20191121) Dragon Minimum Required (Version 0.3.0.dev20191121)
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
## WHAT's SeetaDet? ## WHAT's SeetaDet?
SeetaDet contains many useful object detectors, including R-CNN series, SSD, SeetaDet is a platform implementing popular object detection algorithms,
and the recent RetinaNet. including R-CNN series, SSD, and RetinaNet.
We have achieved the same or higher performance than the baseline reported by the original paper. We have achieved the same or higher performance than the baseline reported by the original paper.
...@@ -14,22 +14,33 @@ The torch-style codes help us to simplify the hierarchical pipeline of modern de ...@@ -14,22 +14,33 @@ The torch-style codes help us to simplify the hierarchical pipeline of modern de
## Requirements ## Requirements
seeta-dragon >= 0.3.0.dev20191121 seeta-dragon >= 0.3.0.dev20200408
## Installation ## Installation
#### 1. Install the required python packages #### Build From Source
If you prefer to develop modules as well as running experiments,
following commands will build but not install to ***site-packages***:
```bash ```bash
pip install cython pyyaml matplotlib cd SeetaDet && python setup.py build
pip install opencv-python Pillow
``` ```
#### 2. Compile the C Extensions #### Install From Source
Clone this repository to local disk and install:
```bash
cd SeetaDet && python setup.py install
```
#### Install From Git
You can also install it from remote repository:
```bash ```bash
cd SeetaDet/compile pip install git+https://gitlab.seetatech.com/seetaresearch/SeetaDet.git@master
bash ./make.sh
``` ```
## Quick Start ## Quick Start
...@@ -37,7 +48,7 @@ bash ./make.sh ...@@ -37,7 +48,7 @@ bash ./make.sh
#### Train a detection model #### Train a detection model
```bash ```bash
cd SeetaDet/tools cd tools
python train.py --cfg <MODEL_YAML> python train.py --cfg <MODEL_YAML>
``` ```
...@@ -46,20 +57,20 @@ We have provided the default YAML examples into ``SeetaDet/configs``. ...@@ -46,20 +57,20 @@ We have provided the default YAML examples into ``SeetaDet/configs``.
#### Test a detection model #### Test a detection model
```bash ```bash
cd SeetaDet/tools cd tools
python test.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> --iter <ITERATION> python test.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> --iter <ITERATION>
``` ```
Or Or
```bash ```bash
cd SeetaDet/tools cd tools
python test_all.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> python test_all.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR>
``` ```
#### Export a detection model to ONNX #### Export a detection model to ONNX
```bash ```bash
cd SeetaDet/tools cd tools
python export.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> --iter <ITERATION> python export.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> --iter <ITERATION>
``` ```
......
PROJECT(gpu_nms)
CMAKE_MINIMUM_REQUIRED(VERSION 3.0.2)
# ---------------- User Config ----------------
# Set your python "interpreter" if necessary
# if not, a default interpreter will be used
# here, provide several examples:
# set(PYTHON_EXECUTABLE /usr/bin/python) # Linux & OSX, Builtin Python
# set(PYTHON_EXECUTABLE /X/anaconda/bin/python) # Linux & OSX, Anaconda
# set(PYTHON_EXECUTABLE X:/Anaconda/python) # Win, Anaconda
# Set CUDA compiling architecture
# Remove "compute_70/sm_70" if using CUDA 8.0
set(CUDA_ARCH -gencode arch=compute_30,code=sm_30
-gencode arch=compute_35,code=sm_35
-gencode arch=compute_50,code=sm_50
-gencode arch=compute_60,code=sm_60
-gencode arch=compute_70,code=sm_70)
# ---------------- User Config ----------------
# ---[ Dependencies
include(${PROJECT_SOURCE_DIR}/cmake/FindPythonLibs.cmake)
include(${PROJECT_SOURCE_DIR}/cmake/FindNumPy.cmake)
FIND_PACKAGE(CUDA REQUIRED)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
message(STATUS "C++11 support has been enabled by default.")
# ---[ Config types
set(CMAKE_BUILD_TYPE Release CACHE STRING "set build type to release")
set(CMAKE_CONFIGURATION_TYPES Release CACHE STRING "set build type to release" FORCE)
# ---[ Includes
set(INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
include_directories(${INCLUDE_DIR})
include_directories(${PROJECT_SOURCE_DIR}/src)
include_directories(${PYTHON_INCLUDE_DIRS})
include_directories(${NUMPY_INCLUDE_DIR})
include_directories(${CUDA_INCLUDE_DIRS})
# ---[ libs
link_directories(${PYTHON_LIBRARIES})
# ---[ Install
set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR} CACHE STRING "set install prefix" FORCE)
set(CMAKE_SHARED_LIBRARY_PREFIX "")
# ---[ Flags
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${CUDA_ARCH}")
if(WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP /O2 /Oi /GL /Ot /Gy")
endif()
if(UNIX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -s -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -s -w -fPIC -O3 -m64 -std=c++11")
endif()
# ---[ Files
set(HEADER_FILES gpu_nms.h)
set(SRC_FILES gpu_nms.cpp nms_kernel.cu)
# ---[ Add Target
CUDA_ADD_LIBRARY(${PROJECT_NAME} SHARED ${HEADER_FILES} ${SRC_FILES})
# ---[ Link Libs
TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${CUDA_LIBRARIES} ${CUDA_cublas_LIBRARY} ${CUDA_curand_LIBRARY})
if(WIN32)
TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${PYTHON_LIBRARIES})
endif()
# ---[ Install Target
set_target_properties(${PROJECT_NAME} PROPERTIES OUTPUT_NAME "gpu_nms")
install (TARGETS ${PROJECT_NAME} DESTINATION ${PROJECT_BINARY_DIR}/../install/lib/nms)
# - Find the NumPy libraries
# This module finds if NumPy is installed, and sets the following variables
# indicating where it is.
#
# TODO: Update to provide the libraries and paths for linking npymath lib.
#
# NUMPY_FOUND - was NumPy found
# NUMPY_VERSION - the version of NumPy found as a string
# NUMPY_VERSION_MAJOR - the major version number of NumPy
# NUMPY_VERSION_MINOR - the minor version number of NumPy
# NUMPY_VERSION_PATCH - the patch version number of NumPy
# NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601
# NUMPY_INCLUDE_DIR - path to the NumPy include files
unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR)
if(PYTHONINTERP_FOUND)
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import numpy as n; print(n.__version__); print(n.get_include());"
RESULT_VARIABLE __result
OUTPUT_VARIABLE __output
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(__result MATCHES 0)
string(REGEX REPLACE ";" "\\\\;" __values ${__output})
string(REGEX REPLACE "\r?\n" ";" __values ${__values})
list(GET __values 0 NUMPY_VERSION)
list(GET __values 1 NUMPY_INCLUDE_DIR)
string(REGEX MATCH "^([0-9])+\\.([0-9])+\\.([0-9])+" __ver_check "${NUMPY_VERSION}")
if(NOT "${__ver_check}" STREQUAL "")
set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1})
set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2})
set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3})
math(EXPR NUMPY_VERSION_DECIMAL
"(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}")
string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR})
else()
unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR)
message(STATUS "Requested NumPy version and include path, but got instead:\n${__output}\n")
endif()
endif()
else()
message("Can not find Python interpretator.")
message(FATAL_ERROR "Do you set PYTHON_EXECUTABLE correctly?")
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(NumPy REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION
VERSION_VAR NUMPY_VERSION)
if(NUMPY_FOUND)
message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})")
endif()
\ No newline at end of file
# - Find python libraries
# This module finds the libraries corresponding to the Python interpeter
# FindPythonInterp provides.
# This code sets the following variables:
#
# PYTHONLIBS_FOUND - have the Python libs been found
# PYTHON_PREFIX - path to the Python installation
# PYTHON_LIBRARIES - path to the python library
# PYTHON_INCLUDE_DIRS - path to where Python.h is found
# PYTHON_MODULE_EXTENSION - lib extension, e.g. '.so' or '.pyd'
# PYTHON_MODULE_PREFIX - lib name prefix: usually an empty string
# PYTHON_SITE_PACKAGES - path to installation site-packages
# PYTHON_IS_DEBUG - whether the Python interpreter is a debug build
#
# Thanks to talljimbo for the patch adding the 'LDVERSION' config
# variable usage.
#=============================================================================
# Copyright 2001-2009 Kitware, Inc.
# Copyright 2012 Continuum Analytics, Inc.
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# * Neither the names of Kitware, Inc., the Insight Software Consortium,
# nor the names of their contributors may be used to endorse or promote
# products derived from this software without specific prior written
# permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#=============================================================================
# Checking for the extension makes sure that `LibsNew` was found and not just `Libs`.
if(PYTHONLIBS_FOUND AND PYTHON_MODULE_EXTENSION)
return()
endif()
# Use the Python interpreter to find the libs.
if(PythonLibsNew_FIND_REQUIRED)
find_package(PythonInterp ${PythonLibsNew_FIND_VERSION} REQUIRED)
else()
find_package(PythonInterp ${PythonLibsNew_FIND_VERSION})
endif()
if(NOT PYTHONINTERP_FOUND)
set(PYTHONLIBS_FOUND FALSE)
return()
endif()
# According to http://stackoverflow.com/questions/646518/python-how-to-detect-debug-interpreter
# testing whether sys has the gettotalrefcount function is a reliable, cross-platform
# way to detect a CPython debug interpreter.
#
# The library suffix is from the config var LDVERSION sometimes, otherwise
# VERSION. VERSION will typically be like "2.7" on unix, and "27" on windows.
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"from distutils import sysconfig as s;import sys;import struct;
print('.'.join(str(v) for v in sys.version_info));
print(sys.prefix);
print(s.get_python_inc(plat_specific=True));
print(s.get_python_lib(plat_specific=True));
print(s.get_config_var('SO'));
print(hasattr(sys, 'gettotalrefcount')+0);
print(struct.calcsize('@P'));
print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
print(s.get_config_var('LIBDIR') or '');
print(s.get_config_var('MULTIARCH') or '');
"
RESULT_VARIABLE _PYTHON_SUCCESS
OUTPUT_VARIABLE _PYTHON_VALUES
ERROR_VARIABLE _PYTHON_ERROR_VALUE)
if(NOT _PYTHON_SUCCESS MATCHES 0)
if(PythonLibsNew_FIND_REQUIRED)
message(FATAL_ERROR
"Python config failure:\n${_PYTHON_ERROR_VALUE}")
endif()
set(PYTHONLIBS_FOUND FALSE)
return()
endif()
# Convert the process output into a list
string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES})
string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES})
list(GET _PYTHON_VALUES 0 _PYTHON_VERSION_LIST)
list(GET _PYTHON_VALUES 1 PYTHON_PREFIX)
list(GET _PYTHON_VALUES 2 PYTHON_INCLUDE_DIR)
list(GET _PYTHON_VALUES 3 PYTHON_SITE_PACKAGES)
list(GET _PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION)
list(GET _PYTHON_VALUES 5 PYTHON_IS_DEBUG)
list(GET _PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P)
list(GET _PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX)
list(GET _PYTHON_VALUES 8 PYTHON_LIBDIR)
list(GET _PYTHON_VALUES 9 PYTHON_MULTIARCH)
# Make sure the Python has the same pointer-size as the chosen compiler
# Skip if CMAKE_SIZEOF_VOID_P is not defined
if(CMAKE_SIZEOF_VOID_P AND (NOT "${PYTHON_SIZEOF_VOID_P}" STREQUAL "${CMAKE_SIZEOF_VOID_P}"))
if(PythonLibsNew_FIND_REQUIRED)
math(EXPR _PYTHON_BITS "${PYTHON_SIZEOF_VOID_P} * 8")
math(EXPR _CMAKE_BITS "${CMAKE_SIZEOF_VOID_P} * 8")
message(FATAL_ERROR
"Python config failure: Python is ${_PYTHON_BITS}-bit, "
"chosen compiler is ${_CMAKE_BITS}-bit")
endif()
set(PYTHONLIBS_FOUND FALSE)
return()
endif()
# The built-in FindPython didn't always give the version numbers
string(REGEX REPLACE "\\." ";" _PYTHON_VERSION_LIST ${_PYTHON_VERSION_LIST})
list(GET _PYTHON_VERSION_LIST 0 PYTHON_VERSION_MAJOR)
list(GET _PYTHON_VERSION_LIST 1 PYTHON_VERSION_MINOR)
list(GET _PYTHON_VERSION_LIST 2 PYTHON_VERSION_PATCH)
# Make sure all directory separators are '/'
string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX})
string(REGEX REPLACE "\\\\" "/" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDE_DIR})
string(REGEX REPLACE "\\\\" "/" PYTHON_SITE_PACKAGES ${PYTHON_SITE_PACKAGES})
if(CMAKE_HOST_WIN32)
set(PYTHON_LIBRARY
"${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
# when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the
# original python installation. They may be found relative to PYTHON_INCLUDE_DIR.
if(NOT EXISTS "${PYTHON_LIBRARY}")
get_filename_component(_PYTHON_ROOT ${PYTHON_INCLUDE_DIR} DIRECTORY)
set(PYTHON_LIBRARY
"${_PYTHON_ROOT}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
endif()
# raise an error if the python libs are still not found.
if(NOT EXISTS "${PYTHON_LIBRARY}")
message(FATAL_ERROR "Python libraries not found")
endif()
else()
if(PYTHON_MULTIARCH)
set(_PYTHON_LIBS_SEARCH "${PYTHON_LIBDIR}/${PYTHON_MULTIARCH}" "${PYTHON_LIBDIR}")
else()
set(_PYTHON_LIBS_SEARCH "${PYTHON_LIBDIR}")
endif()
#message(STATUS "Searching for Python libs in ${_PYTHON_LIBS_SEARCH}")
# Probably this needs to be more involved. It would be nice if the config
# information the python interpreter itself gave us were more complete.
find_library(PYTHON_LIBRARY
NAMES "python${PYTHON_LIBRARY_SUFFIX}"
PATHS ${_PYTHON_LIBS_SEARCH}
NO_DEFAULT_PATH)
# If all else fails, just set the name/version and let the linker figure out the path.
if(NOT PYTHON_LIBRARY)
set(PYTHON_LIBRARY python${PYTHON_LIBRARY_SUFFIX})
endif()
endif()
MARK_AS_ADVANCED(
PYTHON_LIBRARY
PYTHON_INCLUDE_DIR
)
# We use PYTHON_INCLUDE_DIR, PYTHON_LIBRARY and PYTHON_DEBUG_LIBRARY for the
# cache entries because they are meant to specify the location of a single
# library. We now set the variables listed by the documentation for this
# module.
SET(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}")
SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}")
SET(PYTHON_DEBUG_LIBRARIES "${PYTHON_DEBUG_LIBRARY}")
find_package_message(PYTHON
"Found PythonLibs: ${PYTHON_LIBRARY}"
"${PYTHON_EXECUTABLE}${PYTHON_VERSION}")
set(PYTHONLIBS_FOUND TRUE)
void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
int boxes_dim, float nms_overlap_thresh, int device_id);
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
import numpy as np
cimport numpy as np
assert sizeof(int) == sizeof(np.int32_t)
cdef extern from "gpu_nms.h":
void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, float thresh, int device_id=0):
cdef int boxes_num = dets.shape[0]
cdef int boxes_dim = dets.shape[1]
cdef int num_out
cdef np.ndarray[np.int32_t, ndim=1] \
keep = np.zeros(boxes_num, dtype=np.int32)
cdef np.ndarray[np.float32_t, ndim=1] \
scores = dets[:, 4]
cdef np.ndarray[np.intp_t, ndim=1] \
order = scores.argsort()[::-1]
cdef np.ndarray[np.float32_t, ndim=2] \
sorted_dets = dets[order, :]
_nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
keep = keep[:num_out]
return list(order[keep])
#!/bin/sh
# Delete cache
rm -r build install *.c *.cpp
# Compile cpp modules
python setup.py build_ext --inplace
# Compile cuda modules
cd build && cmake .. && make install && cd ..
# Copy to the library root
cp -r install/lib ../
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// ------------------------------------------------------------
#include <vector>
#include "gpu_nms.h"
#define CUDA_CHECK(condition) \
/* Code block avoids redefinition of cudaError_t error */ \
do { \
cudaError_t error = condition; \
if (error != cudaSuccess) { \
\
} \
} while (0)
void SetDevice(int device_id) {
int current_device;
CUDA_CHECK(cudaGetDevice(&current_device));
if (current_device == device_id) return;
CUDA_CHECK(cudaSetDevice(device_id));
}
#define DIV_UP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define NMS_BLOCK_SIZE 64
template <typename T>
__device__ T iou(const T* A, const T* B) {
const T x1 = max(A[0], B[0]);
const T y1 = max(A[1], B[1]);
const T x2 = min(A[2], B[2]);
const T y2 = min(A[3], B[3]);
const T width = max((T)0, x2 - x1 + 1);
const T height = max((T)0, y2 - y1 + 1);
const T area = width * height;
const T A_area = (A[2] - A[0] + 1) * (A[3] - A[1] + 1);
const T B_area = (B[2] - B[0] + 1) * (B[3] - B[1] + 1);
return area / (A_area + B_area - area);
}
template <typename T>
__global__ void nms_mask(const int num_boxes, const T nms_thresh,
const T* boxes, unsigned long long* mask) {
const int i_start = blockIdx.x * NMS_BLOCK_SIZE;
const int di_end = min(num_boxes - i_start, NMS_BLOCK_SIZE);
const int j_start = blockIdx.y * NMS_BLOCK_SIZE;
const int dj_end = min(num_boxes - j_start, NMS_BLOCK_SIZE);
const int num_blocks = DIV_UP(num_boxes, NMS_BLOCK_SIZE);
const int bid = blockIdx.x;
const int tid = threadIdx.x;
__shared__ T boxes_i[NMS_BLOCK_SIZE * 4];
if (tid < di_end) {
boxes_i[tid * 4 + 0] = boxes[(i_start + tid) * 5 + 0];
boxes_i[tid * 4 + 1] = boxes[(i_start + tid) * 5 + 1];
boxes_i[tid * 4 + 2] = boxes[(i_start + tid) * 5 + 2];
boxes_i[tid * 4 + 3] = boxes[(i_start + tid) * 5 + 3];
}
__syncthreads();
if (tid < dj_end) {
const T* const box_j = boxes + (j_start + tid) * 5;
unsigned long long mask_j = 0;
const int di_start = (i_start == j_start) ? (tid + 1) : 0;
for (int di = di_start; di < di_end; ++di)
if (iou(box_j, boxes_i + di * 4) > nms_thresh)
mask_j |= 1ULL << di;
mask[(j_start + tid) * num_blocks + bid] = mask_j;
}
}
template <typename T>
void ApplyNMS(const int num_boxes, const int max_keeps, const float thresh,
const T* boxes, int* keep_indices, int& num_keep) {
const int num_blocks = DIV_UP(num_boxes, NMS_BLOCK_SIZE);
const dim3 blocks(num_blocks, num_blocks);
size_t mask_nbytes = num_boxes * num_blocks * sizeof(unsigned long long);
size_t boxes_nbytes = num_boxes * 5 * sizeof(T);
void* boxes_dev, *mask_dev;
CUDA_CHECK(cudaMalloc(&boxes_dev, boxes_nbytes));
CUDA_CHECK(cudaMalloc(&mask_dev, mask_nbytes));
CUDA_CHECK(cudaMemcpy(boxes_dev, boxes, boxes_nbytes, cudaMemcpyHostToDevice));
nms_mask<T> << <blocks, NMS_BLOCK_SIZE >> > (num_boxes, thresh,
(T*)boxes_dev,
(unsigned long long*)mask_dev);
CUDA_CHECK(cudaPeekAtLastError());
std::vector<unsigned long long> mask_host(num_boxes * num_blocks);
CUDA_CHECK(cudaMemcpy(&mask_host[0], mask_dev, mask_nbytes, cudaMemcpyDeviceToHost));
std::vector<unsigned long long> dead_bit(num_blocks);
memset(&dead_bit[0], 0, sizeof(unsigned long long) * num_blocks);
int num_selected = 0;
for (int i = 0; i < num_boxes; ++i) {
const int nblock = i / NMS_BLOCK_SIZE;
const int inblock = i % NMS_BLOCK_SIZE;
if (!(dead_bit[nblock] & (1ULL << inblock))) {
keep_indices[num_selected++] = i;
unsigned long long* mask_i = &mask_host[0] + i * num_blocks;
for (int j = nblock; j < num_blocks; ++j) dead_bit[j] |= mask_i[j];
if (num_selected == max_keeps) break;
}
}
num_keep = num_selected;
CUDA_CHECK(cudaFree(mask_dev));
CUDA_CHECK(cudaFree(boxes_dev));
}
void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
int boxes_dim, float nms_overlap_thresh, int device_id) {
// set the device to use
SetDevice(device_id);
// apply gpu nms
ApplyNMS<float>(boxes_num, boxes_num, nms_overlap_thresh,
boxes_host, keep_out, *num_out);
}
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from distutils.extension import Extension
from distutils.core import setup
from Cython.Distutils import build_ext
import numpy as np
numpy_include = np.get_include()
ext_modules = [
Extension(
"install.lib.utils.cython_bbox",
["bbox.pyx"],
extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
include_dirs = [numpy_include]),
Extension(
"install.lib.nms.cpu_nms",
["cpu_nms.pyx"],
extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
include_dirs = [numpy_include]),
Extension(
"install.deprecated.gpu_nms",
["gpu_nms.pyx"],
extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
language='c++',
include_dirs = [numpy_include]),
Extension(
'install.lib.pycocotools._mask',
['../lib/pycocotools/maskApi.c', '../lib/pycocotools/_mask.pyx'],
include_dirs=[numpy_include, 'pycocotools'],
extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99']),
]
setup(name='Detectron',ext_modules=ext_modules,cmdclass = {'build_ext': build_ext})
NUM_GPUS: 8 NUM_GPUS: 8
VIS: False VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: faster_rcnn TYPE: faster_rcnn
BACKBONE: resnet101.fpn BACKBONE: resnet101.fpn
CLASSES: ['__background__', CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'fire hydrant', 'stop sign', 'parking meter', 'bench',
'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife',
'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush'] 'teddy bear', 'hair drier', 'toothbrush']
NUM_CLASSES: 81 NUM_CLASSES: 81
SOLVER: SOLVER:
BASE_LR: 0.02 BASE_LR: 0.02
DECAY_STEPS: [60000, 80000] DECAY_STEPS: [60000, 80000]
MAX_STEPS: 90000 MAX_STEPS: 90000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_faster_rcnn SNAPSHOT_PREFIX: coco_faster_rcnn
FRCNN: FRCNN:
ROI_XFORM_METHOD: RoIAlign ROI_XFORM_METHOD: RoIAlign
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
TRAIN: TRAIN:
WEIGHTS: '/model/R-101.Affine.pth' WEIGHTS: '/model/R-101.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 2 USE_DIFF: False # Do not use crowd objects
USE_DIFF: False # Do not use crowd objects IMS_PER_BATCH: 2
BATCH_SIZE: 512 BATCH_SIZE: 512
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
TEST: TEST:
DATABASE: '/data/coco_2014_minival' DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000 RPN_POST_NMS_TOP_N: 1000
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
NMS: 0.5 NMS: 0.5
NUM_GPUS: 8 NUM_GPUS: 8
VIS: False VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: faster_rcnn TYPE: faster_rcnn
BACKBONE: resnet101.fpn BACKBONE: resnet101.fpn
CLASSES: ['__background__', CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'fire hydrant', 'stop sign', 'parking meter', 'bench',
'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife',
'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush'] 'teddy bear', 'hair drier', 'toothbrush']
NUM_CLASSES: 81 NUM_CLASSES: 81
SOLVER: SOLVER:
BASE_LR: 0.02 BASE_LR: 0.02
DECAY_STEPS: [120000, 160000] DECAY_STEPS: [120000, 160000]
MAX_STEPS: 180000 MAX_STEPS: 180000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_faster_rcnn SNAPSHOT_PREFIX: coco_faster_rcnn
FRCNN: FRCNN:
ROI_XFORM_METHOD: RoIAlign ROI_XFORM_METHOD: RoIAlign
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
TRAIN: TRAIN:
WEIGHTS: '/model/R-101.Affine.pth' WEIGHTS: '/model/R-101.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 2 USE_DIFF: False # Do not use crowd objects
USE_DIFF: False # Do not use crowd objects IMS_PER_BATCH: 2
BATCH_SIZE: 512 BATCH_SIZE: 512
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
TEST: TEST:
DATABASE: '/data/coco_2014_minival' DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000 RPN_POST_NMS_TOP_N: 1000
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
NMS: 0.5 NMS: 0.5
NUM_GPUS: 1 NUM_GPUS: 1
VIS: False VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: faster_rcnn TYPE: faster_rcnn
BACKBONE: resnet50.fpn BACKBONE: resnet50.fpn
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor'] 'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.002 BASE_LR: 0.002
DECAY_STEPS: [100000, 140000] DECAY_STEPS: [100000, 140000]
MAX_STEPS: 140000 MAX_STEPS: 140000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_faster_rcnn SNAPSHOT_PREFIX: voc_faster_rcnn
FRCNN: FRCNN:
ROI_XFORM_METHOD: RoIAlign ROI_XFORM_METHOD: RoIAlign
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 128 BATCH_SIZE: 128
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
TEST: TEST:
DATABASE: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_POST_NMS_TOP_N: 1000 RPN_POST_NMS_TOP_N: 1000
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
NMS: 0.45 NMS: 0.45
\ No newline at end of file
NUM_GPUS: 1 NUM_GPUS: 1
VIS: False VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: faster_rcnn TYPE: faster_rcnn
BACKBONE: vgg16.c4 BACKBONE: vgg16.c4
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor'] 'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.001 BASE_LR: 0.001
WEIGHT_DECAY: 0.0005 WEIGHT_DECAY: 0.0005
DECAY_STEPS: [100000, 140000] DECAY_STEPS: [100000, 140000]
MAX_STEPS: 140000 MAX_STEPS: 140000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_faster_rcnn SNAPSHOT_PREFIX: voc_faster_rcnn
RPN: RPN:
STRIDES: [16] STRIDES: [16]
SCALES: [8, 16, 32] # RField: [128, 256, 512] SCALES: [8, 16, 32] # RField: [128, 256, 512]
ASPECT_RATIOS: [0.5, 1.0, 2.0] ASPECT_RATIOS: [0.5, 1.0, 2.0]
FRCNN: FRCNN:
ROI_XFORM_METHOD: RoIPool ROI_XFORM_METHOD: RoIPool
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
MLP_HEAD_DIM: 4096 MLP_HEAD_DIM: 4096
TRAIN: TRAIN:
WEIGHTS: '/model/VGG16.RCNN.pth' WEIGHTS: '/model/VGG16.RCNN.pth'
DATABASE: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
RPN_MIN_SIZE: 16 RPN_MIN_SIZE: 16
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 128 BATCH_SIZE: 128
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
TEST: TEST:
DATABASE: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_MIN_SIZE: 16 RPN_MIN_SIZE: 16
RPN_POST_NMS_TOP_N: 300 RPN_POST_NMS_TOP_N: 300
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
NMS: 0.45 NMS: 0.45
\ No newline at end of file
NUM_GPUS: 4 NUM_GPUS: 4
VIS: False VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: retinanet TYPE: retinanet
BACKBONE: resnet50.fpn BACKBONE: resnet50.fpn
CLASSES: ['__background__', CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'fire hydrant', 'stop sign', 'parking meter', 'bench',
'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife',
'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush'] 'teddy bear', 'hair drier', 'toothbrush']
NUM_CLASSES: 81 NUM_CLASSES: 81
SOLVER: SOLVER:
BASE_LR: 0.02 BASE_LR: 0.01
DECAY_STEPS: [30000, 40000] DECAY_STEPS: [60000, 80000]
MAX_STEPS: 45000 MAX_STEPS: 90000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_retinanet_400 SNAPSHOT_PREFIX: coco_retinanet_400
FPN: FPN:
RPN_MIN_LEVEL: 3 RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7 RPN_MAX_LEVEL: 7
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 8 USE_DIFF: False # Do not use crowd objects
SCALES: [400] USE_COLOR_JITTER: True
MAX_SIZE: 666 IMS_PER_BATCH: 8
TEST: SCALES: [400]
DATABASE: '/data/coco_2014_minival' MAX_SIZE: 666
JSON_FILE: '/data/instances_minival2014.json' RANDOM_SCALES: [0.75, 1.0]
PROTOCOL: 'coco' TEST:
IMS_PER_BATCH: 1 DATASET: '/data/coco_2014_minival'
SCALES: [400] JSON_FILE: '/data/instances_minival2014.json'
MAX_SIZE: 666 PROTOCOL: 'coco'
IMS_PER_BATCH: 1
SCALES: [400]
MAX_SIZE: 666
NMS: 0.5 NMS: 0.5
\ No newline at end of file
NUM_GPUS: 4
VIS: False
ENABLE_TENSOR_BOARD: False
MODEL:
TYPE: retinanet
BACKBONE: resnet50.fpn
CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench',
'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife',
'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush']
NUM_CLASSES: 81
SOLVER:
BASE_LR: 0.02
WARM_UP_STEPS: 2000 # default: 500
DECAY_STEPS: [120000, 160000]
MAX_STEPS: 180000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_retinanet_400
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
DROPBLOCK:
DROP_ON: True
DECREMENT: 0.000005 # * 20000 = 0.1
TRAIN:
WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 8
SCALES: [400]
MAX_SIZE: 666
USE_SCALE_JITTER: True
USE_COLOR_JITTER: True
SCALE_JITTER_RANGE: [0.75, 1.33]
TEST:
DATABASE: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
IMS_PER_BATCH: 1
SCALES: [400]
MAX_SIZE: 666
NMS: 0.5
\ No newline at end of file
NUM_GPUS: 1
VIS: False
VIS_ON_FILE: False
MODEL:
TYPE: retinanet
BACKBONE: resnet18.fpn
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.01
DECAY_STEPS: [40000, 50000, 60000]
WARM_UP_STEPS: 2000
MAX_STEPS: 60000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_retinanet_300
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/model/R-18.Affine.pth'
DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
SCALE_JITTER_RANGE: [0.5, 2.0]
USE_SCALE_JITTER: True
USE_COLOR_JITTER: True
TEST:
DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1
SCALES: [300]
MAX_SIZE: 500
NMS: 0.45
\ No newline at end of file
NUM_GPUS: 1 NUM_GPUS: 1
VIS: False VIS: False
VIS_ON_FILE: False VIS_ON_FILE: False
MODEL: MODEL:
TYPE: retinanet TYPE: retinanet
BACKBONE: airnet.fpn BACKBONE: airnet.fpn
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor'] 'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.02 BASE_LR: 0.01
DECAY_STEPS: [40000, 50000, 60000] DECAY_STEPS: [40000, 50000, 60000]
MAX_STEPS: 60000 MAX_STEPS: 60000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_retinanet_300 SNAPSHOT_PREFIX: voc_retinanet_320
FPN: FPN:
RPN_MIN_LEVEL: 3 RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7 RPN_MAX_LEVEL: 7
TRAIN: TRAIN:
WEIGHTS: '/model/AirNet.Affine.pth' WEIGHTS: '/model/AirNet.Affine.pth'
DATABASE: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32 USE_COLOR_JITTER: True
SCALES: [300] IMS_PER_BATCH: 32
MAX_SIZE: 500 SCALES: [320]
SCALE_JITTER_RANGE: [0.5, 2.0] RANDOM_SCALES: [0.5, 1.0]
USE_SCALE_JITTER: True TEST:
USE_COLOR_JITTER: True DATASET: '/data/voc_2007_test'
TEST: PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
DATABASE: '/data/voc_2007_test' IMS_PER_BATCH: 1
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' SCALES: [320]
IMS_PER_BATCH: 1
SCALES: [300]
MAX_SIZE: 500
NMS: 0.45 NMS: 0.45
\ No newline at end of file
NUM_GPUS: 1 NUM_GPUS: 1
VIS: False VIS: False
VIS_ON_FILE: False VIS_ON_FILE: False
MODEL: MODEL:
TYPE: retinanet TYPE: retinanet
BACKBONE: resnet34.fpn BACKBONE: resnet34.fpn
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor'] 'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.01 BASE_LR: 0.01
DECAY_STEPS: [40000, 50000, 60000] DECAY_STEPS: [40000, 50000, 60000]
WARM_UP_STEPS: 2000 WARM_UP_STEPS: 2000
MAX_STEPS: 60000 MAX_STEPS: 60000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_retinanet_300 SNAPSHOT_PREFIX: voc_retinanet_320
FPN: FPN:
RPN_MIN_LEVEL: 3 RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7 RPN_MAX_LEVEL: 7
TRAIN: TRAIN:
WEIGHTS: '/model/R-34.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32 USE_COLOR_JITTER: True
SCALES: [300] IMS_PER_BATCH: 32
MAX_SIZE: 500 SCALES: [320]
SCALE_JITTER_RANGE: [0.5, 2.0] RANDOM_SCALES: [0.5, 2.0]
USE_SCALE_JITTER: True TEST:
USE_COLOR_JITTER: True DATASET: '/data/voc_2007_test'
TEST: PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
DATABASE: '/data/voc_2007_test' IMS_PER_BATCH: 1
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' SCALES: [320]
IMS_PER_BATCH: 1
SCALES: [300]
MAX_SIZE: 500
NMS: 0.45 NMS: 0.45
\ No newline at end of file
NUM_GPUS: 1 NUM_GPUS: 1
VIS: False VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: ssd TYPE: ssd
BACKBONE: airnet5b.mbox BACKBONE: airnet5b.mbox
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor'] 'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.001 BASE_LR: 0.001
DECAY_STEPS: [80000, 100000, 120000] DECAY_STEPS: [80000, 100000, 120000]
MAX_STEPS: 120000 MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_300 SNAPSHOT_PREFIX: voc_ssd_320
SSD: SSD:
RESIZE: NUM_CONVS: 2
HEIGHT: 300 MULTIBOX:
WIDTH: 300 STRIDES: [8, 16, 32]
MULTIBOX: MIN_SIZES: [30, 90, 150]
MIN_SIZES: [30, 90, 150] MAX_SIZES: [90, 150, 210]
MAX_SIZES: [90, 150, 210] ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]]
STRIDES: [8, 16, 32] TRAIN:
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]] WEIGHTS: '/model/AirNet.Affine.pth'
TRAIN: DATASET: '/data/voc_0712_trainval'
WEIGHTS: '/model/AirNet.Affine.pth' SCALES: [320]
DATABASE: '/data/voc_0712_trainval' RANDOM_SCALES: [0.25, 1.00]
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
TEST: TEST:
DATABASE: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8 IMS_PER_BATCH: 8
NMS_TOP_K: 400 SCALES: [320]
NMS: 0.45 NMS_TOP_K: 400
SCORE_THRESH: 0.01 NMS: 0.45
SCORE_THRESH: 0.01
DETECTIONS_PER_IM: 200 DETECTIONS_PER_IM: 200
\ No newline at end of file
NUM_GPUS: 1 NUM_GPUS: 1
VIS: False VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: ssd TYPE: ssd
BACKBONE: vgg16_reduced_300.mbox BACKBONE: vgg16_reduced_300.mbox
FREEZE_AT: 0 FREEZE_AT: 0
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor'] 'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.001 BASE_LR: 0.001
WARM_UP_FACTOR: 0. WEIGHT_DECAY: 0.0005
WEIGHT_DECAY: 0.0005 DECAY_STEPS: [80000, 100000, 120000]
DECAY_STEPS: [80000, 100000, 120000] MAX_STEPS: 120000
MAX_STEPS: 120000 SNAPSHOT_EVERY: 5000
SNAPSHOT_EVERY: 5000 SNAPSHOT_PREFIX: voc_ssd_300
SNAPSHOT_PREFIX: voc_ssd_300 SSD:
SSD: MULTIBOX:
RESIZE: STRIDES: [8, 16, 32, 64, 100, 300]
HEIGHT: 300 MIN_SIZES: [30, 60, 110, 162, 213, 264]
WIDTH: 300 MAX_SIZES: [60, 110, 162, 213, 264, 315]
MULTIBOX: ASPECT_RATIOS: [
STRIDES: [8, 16, 32, 64, 100, 300] [1, 2, 0.5],
MIN_SIZES: [30, 60, 110, 162, 213, 264] [1, 2, 0.5, 3, 0.33],
MAX_SIZES: [60, 110, 162, 213, 264, 315] [1, 2, 0.5, 3, 0.33],
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]] [1, 2, 0.5],
TRAIN: [1, 2, 0.5]
WEIGHTS: '/model/VGG16.SSD.pth' ]
DATABASE: '/data/voc_0712_trainval' TRAIN:
IMS_PER_BATCH: 32 WEIGHTS: '/model/VGG16.SSD.pth'
TEST: DATASET: '/data/voc_0712_trainval'
DATABASE: '/data/voc_2007_test' IMS_PER_BATCH: 32
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' SCALES: [300]
IMS_PER_BATCH: 8 RANDOM_SCALES: [0.25, 1.00]
NMS_TOP_K: 400 TEST:
NMS: 0.45 DATASET: '/data/voc_2007_test'
SCORE_THRESH: 0.01 PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
DETECTIONS_PER_IM: 200 IMS_PER_BATCH: 8
SCALES: [300]
NMS_TOP_K: 400
NMS: 0.45
SCORE_THRESH: 0.01
DETECTIONS_PER_IM: 200
...@@ -22,23 +22,29 @@ SOLVER: ...@@ -22,23 +22,29 @@ SOLVER:
SNAPSHOT_PREFIX: voc_ssd_320 SNAPSHOT_PREFIX: voc_ssd_320
SSD: SSD:
NUM_CONVS: 2 NUM_CONVS: 2
RESIZE:
HEIGHT: 320
WIDTH: 320
MULTIBOX: MULTIBOX:
STRIDES: [8, 16, 32, 64, 100, 300] STRIDES: [8, 16, 32, 64, 100, 300]
MIN_SIZES: [30, 60, 110, 162, 213, 264] MIN_SIZES: [30, 60, 110, 162, 213, 264]
MAX_SIZES: [60, 110, 162, 213, 264, 315] MAX_SIZES: [60, 110, 162, 213, 264, 315]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33], ASPECT_RATIOS: [
[1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]] [1, 2, 0.5],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5],
[1, 2, 0.5]
]
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
SCALES: [320]
RANDOM_SCALES: [0.25, 1.00]
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
TEST: TEST:
DATABASE: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8 IMS_PER_BATCH: 8
SCALES: [320]
NMS_TOP_K: 400 NMS_TOP_K: 400
NMS: 0.45 NMS: 0.45
SCORE_THRESH: 0.01 SCORE_THRESH: 0.01
......
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h"
#include "nms_op.h"
namespace dragon {
template <class Context> template <typename T>
void NonMaxSuppressionOp<Context>::DoRunWithType() {
int num_selected;
utils::detection::ApplyNMS(
Output(0)->count(),
Output(0)->count(),
iou_threshold_,
Input(0).template mutable_data<T, Context>(),
Output(0)->template mutable_data<int64_t, CPUContext>(),
num_selected, ctx()
);
Output(0)->Reshape({ num_selected });
}
template <class Context>
void NonMaxSuppressionOp<Context>::RunOnDevice() {
CHECK(Input(0).ndim() == 2 && Input(0).dim(1) == 5)
<< "\nThe dimensions of boxes should be (num_boxes, 5).";
Output(0)->Reshape({ Input(0).dim(0) });
DispatchHelper<TensorTypes<float>>::Call(this, Input(0));
}
DEPLOY_CPU(NonMaxSuppression);
#ifdef USE_CUDA
DEPLOY_CUDA(NonMaxSuppression);
#endif
OPERATOR_SCHEMA(NonMaxSuppression).NumInputs(1).NumOutputs(1);
NO_GRADIENT(NonMaxSuppression);
} // namespace dragon
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef SEETADET_CXX_OPERATORS_NMS_OP_H_
#define SEETADET_CXX_OPERATORS_NMS_OP_H_
#include "dragon/core/operator.h"
namespace dragon {
template <class Context>
class NonMaxSuppressionOp final : public Operator<Context> {
public:
NonMaxSuppressionOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
iou_threshold_(OpArg<float>("iou_threshold", 0.5f)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T>
void DoRunWithType();
protected:
float iou_threshold_;
};
} // namespace dragon
#endif // SEETADET_CXX_OPERATORS_NMS_OP_H_
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h"
#include "retinanet_decoder_op.h"
namespace dragon {
template <class Context> template <typename T>
void RetinaNetDecoderOp<Context>::DoRunWithType() {
using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox
int feat_h, feat_w;
int C = Input(-3).dim(2), A, K;
int total_proposals = 0;
int num_candidates, num_boxes, num_proposals;
auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>();
auto* y = Output(0)->template mutable_data<BT, BC>();
for (int n = 0; n < num_images_; ++n) {
BT im_h = im_info[0];
BT im_w = im_info[1];
BT im_scale_h = im_info[2];
BT im_scale_w = im_info[2];
if (Input(-1).dim(1) == 4) im_scale_w = im_info[3];
auto* scores = batch_scores + n * Input(-3).stride(0);
auto* deltas = batch_deltas + n * Input(-2).stride(0);
CHECK_EQ(strides_.size(), InputSize() - 3)
<< "\nGiven " << strides_.size() << " strides "
<< "and " << InputSize() - 3 << " features";
// Select the top-k candidates as proposals
num_boxes = Input(-3).dim(1);
num_candidates = Input(-3).count(1);
roi_indices_.resize(num_candidates);
num_candidates = 0;
for (int i = 0; i < roi_indices_.size(); ++i)
if (scores[i] > score_thr_)
roi_indices_[num_candidates++] = i;
scores_.resize(num_candidates);
for (int i = 0; i < num_candidates; ++i)
scores_[i] = scores[roi_indices_[i]];
num_proposals = std::min(
num_candidates,
(int)pre_nms_topn_
);
utils::math::ArgPartition(
num_candidates,
num_proposals,
true,
scores_.data(),
indices_
);
for (int i = 0; i < num_proposals; ++i)
indices_[i] = roi_indices_[indices_[i]];
// Decode the candidates
int base_offset = 0;
for (int i = 0; i < strides_.size(); i++) {
feat_h = Input(i).dim(2);
feat_w = Input(i).dim(3);
K = feat_h * feat_w;
A = int(ratios_.size() * scales_.size());
anchors_.resize((size_t)(A * 4));
utils::detection::GenerateAnchors(
strides_[i],
(int)ratios_.size(),
(int)scales_.size(),
ratios_.data(),
scales_.data(),
anchors_.data()
);
utils::detection::GenerateGridAnchors(
num_proposals, C, A,
feat_h, feat_w,
strides_[i],
base_offset,
anchors_.data(),
indices_.data(),
y
);
base_offset += (A * K);
}
utils::detection::GenerateMCProposals(
num_proposals,
num_boxes, C,
n,
im_h,
im_w,
im_scale_h,
im_scale_w,
scores,
deltas,
indices_.data(),
y
);
total_proposals += num_proposals;
y += (num_proposals * 7);
im_info += Input(-1).dim(1);
}
Output(0)->Reshape({ total_proposals, 7 });
}
template <class Context>
void RetinaNetDecoderOp<Context>::RunOnDevice() {
num_images_ = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_
<< " groups info, got "
<< Input(-1).dim(0) << ".";
Output(0)->Reshape({ num_images_ * pre_nms_topn_, 7 });
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
}
DEPLOY_CPU(RetinaNetDecoder);
#ifdef USE_CUDA
DEPLOY_CUDA(RetinaNetDecoder);
#endif
OPERATOR_SCHEMA(RetinaNetDecoder)
.NumInputs(3, INT_MAX)
.NumOutputs(1, INT_MAX);
} // namespace dragon
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef SEETADET_CXX_OPERATORS_RETINANET_DECODER_OP_H_
#define SEETADET_CXX_OPERATORS_RETINANET_DECODER_OP_H_
#include "dragon/core/operator.h"
namespace dragon {
template <class Context>
class RetinaNetDecoderOp final : public Operator<Context> {
public:
RetinaNetDecoderOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
strides_(OpArgs<int64_t>("strides")),
ratios_(OpArgs<float>("ratios")),
scales_(OpArgs<float>("scales")),
pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)),
score_thr_(OpArg<float>("score_thresh", 0.05f)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T>
void DoRunWithType();
protected:
float score_thr_;
vec64_t strides_, indices_, roi_indices_;
vector<float> ratios_, scales_, scores_, anchors_;
int64_t num_images_, pre_nms_topn_;
};
} // namespace dragon
#endif // SEETADET_CXX_OPERATORS_RETINANET_DECODER_OP_H_
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h"
#include "rpn_decoder_op.h"
namespace dragon {
template <class Context> template <typename T>
void RPNDecoderOp<Context>::DoRunWithType() {
using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox
int feat_h, feat_w, K, A;
int total_rois = 0, num_rois;
int num_candidates, num_proposals;
auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>();
auto* y = Output(0)->template mutable_data<BT, BC>();
for (int n = 0; n < num_images_; ++n) {
const BT im_h = im_info[0];
const BT im_w = im_info[1];
const BT scale = im_info[2];
const BT min_box_h = min_size_ * scale;
const BT min_box_w = min_size_ * scale;
auto* scores = batch_scores + n * Input(-3).stride(0);
auto* deltas = batch_deltas + n * Input(-2).stride(0);
if (strides_.size() == 1) {
// Case 1: single stride
feat_h = Input(0).dim(2);
feat_w = Input(0).dim(3);
K = feat_h * feat_w;
A = int(ratios_.size() * scales_.size());
// Select the Top-K candidates as proposals
num_candidates = A * K;
num_proposals = std::min(
num_candidates,
(int)pre_nms_topn_
);
utils::math::ArgPartition(
num_candidates,
num_proposals,
true, scores, indices_
);
// Decode the candidates
anchors_.resize((size_t)(A * 4));
proposals_.Reshape({ num_proposals, 5 });
utils::detection::GenerateAnchors(
strides_[0],
(int)ratios_.size(),
(int)scales_.size(),
ratios_.data(),
scales_.data(),
anchors_.data()
);
utils::detection::GenerateGridAnchors(
num_proposals, A,
feat_h, feat_w,
strides_[0],
0,
anchors_.data(),
indices_.data(),
proposals_.template mutable_data<BT, BC>()
);
utils::detection::GenerateSSProposals(
K, num_proposals,
im_h, im_w,
min_box_h, min_box_w,
scores,
deltas,
indices_.data(),
proposals_.template mutable_data<BT, BC>()
);
// Sort, NMS and Retrieve
utils::detection::SortProposals(
0,
num_proposals - 1,
num_proposals,
proposals_.template mutable_data<BT, BC>()
);
utils::detection::ApplyNMS(
num_proposals,
post_nms_topn_,
nms_thr_,
proposals_.template mutable_data<BT, Context>(),
roi_indices_.data(),
num_rois, ctx()
);
utils::detection::RetrieveRoIs(
num_rois,
n,
proposals_.template data<BT, BC>(),
roi_indices_.data(),
y
);
} else if (strides_.size() > 1) {
// Case 2: multiple strides
CHECK_EQ(strides_.size(), InputSize() - 3)
<< "\nGiven " << strides_.size() << " strides "
<< "and " << InputSize() - 3 << " feature inputs";
CHECK_EQ(strides_.size(), scales_.size())
<< "\nGiven " << strides_.size() << " strides "
<< "and " << scales_.size() << " scales";
// Select the top-k candidates as proposals
num_candidates = Input(-3).dim(1);
num_proposals = std::min(
num_candidates,
(int)pre_nms_topn_
);
utils::math::ArgPartition(
num_candidates,
num_proposals,
true, scores, indices_
);
// Decode the candidates
int base_offset = 0;
proposals_.Reshape({ num_proposals, 5 });
auto* proposals = proposals_
.template mutable_data<BT, BC>();
for (int i = 0; i < strides_.size(); i++) {
feat_h = Input(i).dim(2);
feat_w = Input(i).dim(3);
K = feat_h * feat_w;
A = (int)ratios_.size();
anchors_.resize((size_t)(A * 4));
utils::detection::GenerateAnchors(
strides_[i],
(int)ratios_.size(),
1,
ratios_.data(),
scales_.data(),
anchors_.data()
);
utils::detection::GenerateGridAnchors(
num_proposals, A,
feat_h, feat_w,
strides_[i],
base_offset,
anchors_.data(),
indices_.data(),
proposals
);
base_offset += (A * K);
}
utils::detection::GenerateMSProposals(
num_candidates,
num_proposals,
im_h, im_w,
min_box_h, min_box_w,
scores,
deltas,
&indices_[0],
proposals
);
// Sort, NMS and Retrieve
utils::detection::SortProposals(
0,
num_proposals - 1,
num_proposals,
proposals
);
utils::detection::ApplyNMS(
num_proposals,
post_nms_topn_,
nms_thr_,
proposals_.template mutable_data<BT, Context>(),
roi_indices_.data(),
num_rois, ctx()
);
utils::detection::RetrieveRoIs(
num_rois,
n,
proposals,
roi_indices_.data(),
y
);
} else {
LOG(FATAL) << "Excepted at least one stride for proposals.";
}
total_rois += num_rois;
y += (num_rois * 5);
im_info += Input(-1).dim(1);
}
Output(0)->Reshape({ total_rois, 5 });
// Distribute rois into K bins
if (OutputSize() > 1) {
CHECK_EQ(max_level_ - min_level_ + 1, OutputSize())
<< "\nExcepted " << OutputSize() << " outputs for levels "
"between [" << min_level_ << ", " << max_level_ << "].";
vector<BT*> ys(OutputSize());
vector<vec64_t> bins(OutputSize());
Tensor RoIs; RoIs.ReshapeLike(*Output(0));
auto* rois = RoIs.template mutable_data<BT, BC>();
ctx()->template Copy<BT, BC, BC>(
Output(0)->count(),
rois, Output(0)->template data<BT, BC>()
);
utils::detection::CollectRoIs(
total_rois,
min_level_,
max_level_,
canonical_level_,
canonical_scale_,
rois, bins
);
for (int i = 0; i < OutputSize(); i++) {
Output(i)->Reshape({ std::max((int)bins[i].size(), 1), 5 });
ys[i] = Output(i)->template mutable_data<BT, BC>();
}
utils::detection::DistributeRoIs(bins, rois, ys);
}
}
template <class Context>
void RPNDecoderOp<Context>::RunOnDevice() {
num_images_ = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_
<< " groups info, got "
<< Input(-1).dim(0) << ".";
roi_indices_.resize(post_nms_topn_);
Output(0)->Reshape({ num_images_ * post_nms_topn_, 5 });
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
}
DEPLOY_CPU(RPNDecoder);
#ifdef USE_CUDA
DEPLOY_CUDA(RPNDecoder);
#endif
OPERATOR_SCHEMA(RPNDecoder)
.NumInputs(3, INT_MAX)
.NumOutputs(1, INT_MAX);
} // namespace dragon
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef SEETADET_CXX_OPERATORS_RPN_DECODER_OP_H_
#define SEETADET_CXX_OPERATORS_RPN_DECODER_OP_H_
#include "dragon/core/operator.h"
namespace dragon {
template <class Context>
class RPNDecoderOp final : public Operator<Context> {
public:
RPNDecoderOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
strides_(OpArgs<int64_t>("strides")),
ratios_(OpArgs<float>("ratios")),
scales_(OpArgs<float>("scales")),
pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)),
post_nms_topn_(OpArg<int64_t>("post_nms_top_n", 300)),
nms_thr_(OpArg<float>("nms_thresh", 0.7f)),
min_size_(OpArg<int64_t>("min_size", 16)),
min_level_(OpArg<int64_t>("min_level", 2)),
max_level_(OpArg<int64_t>("max_level", 5)),
canonical_level_(OpArg<int64_t>("canonical_level", 4)),
canonical_scale_(OpArg<int64_t>("canonical_scale", 224)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T>
void DoRunWithType();
protected:
float nms_thr_;
vec64_t strides_, indices_, roi_indices_;
vector<float> ratios_, scales_, scores_, anchors_;
int64_t min_size_, pre_nms_topn_, post_nms_topn_;
int64_t num_images_, min_level_, max_level_;
int64_t canonical_level_, canonical_scale_;
Tensor proposals_;
};
} // namespace dragon
#endif // SEETADET_CXX_OPERATORS_RPN_DECODER_OP_H_
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Build cxx sources."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from distutils.core import setup
from dragon.tools import cpp_extension
if cpp_extension.CUDA_HOME is not None and \
cpp_extension._cuda.is_available():
Extension = cpp_extension.CUDAExtension
else:
Extension = cpp_extension.CppExtension
ext_modules = [
Extension(
name='install.lib.modules._C',
sources=[
'utils/detection_utils.cc',
'utils/detection_utils.cu',
'operators/nms_op.cc',
'operators/retinanet_decoder_op.cc',
'operators/rpn_decoder_op.cc',
],
),
]
setup(
name='SeetaDet',
ext_modules=ext_modules,
cmdclass={'build_ext': cpp_extension.BuildExtension}
)
#include <dragon/core/context.h>
#include "detection_utils.h"
namespace dragon {
namespace utils {
namespace detection {
template <typename T>
T IoU(const T A[], const T B[]) {
if (A[0] > B[2] || A[1] > B[3] ||
A[2] < B[0] || A[3] < B[1]) return 0;
const T x1 = std::max(A[0], B[0]);
const T y1 = std::max(A[1], B[1]);
const T x2 = std::min(A[2], B[2]);
const T y2 = std::min(A[3], B[3]);
const T width = std::max((T)0, x2 - x1 + 1);
const T height = std::max((T)0, y2 - y1 + 1);
const T area = width * height;
const T A_area = (A[2] - A[0] + 1) * (A[3] - A[1] + 1);
const T B_area = (B[2] - B[0] + 1) * (B[3] - B[1] + 1);
return area / (A_area + B_area - area);
}
template <> void ApplyNMS<float, CPUContext>(
const int num_boxes,
const int max_keeps,
const float thresh,
const float* boxes,
int64_t* keep_indices,
int& num_keep,
CPUContext* ctx) {
int count = 0;
std::vector<char> is_dead(num_boxes);
for (int i = 0; i < num_boxes; ++i) is_dead[i] = 0;
for (int i = 0; i < num_boxes; ++i) {
if (is_dead[i]) continue;
keep_indices[count++] = i;
if (count == max_keeps) break;
for (int j = i + 1; j < num_boxes; ++j)
if (!is_dead[j] && IoU(&boxes[i * 5],
&boxes[j * 5]) > thresh)
is_dead[j] = 1;
}
num_keep = count;
}
} // namespace detection
} // namespace utils
} // namespace dragon
#ifdef USE_CUDA
#include <dragon/core/context_cuda.h>
#include "detection_utils.h"
namespace dragon {
namespace utils {
namespace detection {
#define DIV_UP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define NUM_THREADS 64
namespace {
template <typename T>
__device__ bool _CheckIoU(
const T* a,
const T* b,
const float thresh) {
const T x1 = max(a[0], b[0]);
const T y1 = max(a[1], b[1]);
const T x2 = min(a[2], b[2]);
const T y2 = min(a[3], b[3]);
const T width = max(T(0), x2 - x1 + 1);
const T height = max(T(0), y2 - y1 + 1);
const T inter = width * height;
const T Sa = (a[2] - a[0] + T(1)) * (a[3] - a[1] + T(1));
const T Sb = (b[2] - b[0] + T(1)) * (b[3] - b[1] + T(1));
return inter > thresh * (Sa + Sb - inter);
}
template <typename T>
__global__ void _NonMaxSuppression(
const int num_blocks,
const int num_boxes,
const T thresh,
const T* dev_boxes,
uint64_t* dev_mask) {
const int row_start = blockIdx.y;
const int col_start = blockIdx.x;
if (row_start > col_start) return;
const int row_size = min(num_boxes - row_start * NUM_THREADS, NUM_THREADS);
const int col_size = min(num_boxes - col_start * NUM_THREADS, NUM_THREADS);
__shared__ T block_boxes[NUM_THREADS * 4];
if (threadIdx.x < col_size) {
const int c1 = threadIdx.x * 4;
const int c2 = (col_start * NUM_THREADS + threadIdx.x) * 5;
block_boxes[c1] = dev_boxes[c2];
block_boxes[c1 + 1] = dev_boxes[c2 + 1];
block_boxes[c1 + 2] = dev_boxes[c2 + 2];
block_boxes[c1 + 3] = dev_boxes[c2 + 3];
}
__syncthreads();
if (threadIdx.x < row_size) {
const int index = row_start * NUM_THREADS + threadIdx.x;
const T* dev_box = dev_boxes + index * 5;
unsigned long long val = 0;
const int start = (row_start == col_start) ? (threadIdx.x + 1) : 0;
for (int i = start; i < col_size; ++i) {
if (_CheckIoU(dev_box, block_boxes + i * 4, thresh)) {
val |= 1ULL << i;
}
}
dev_mask[index * num_blocks + col_start] = val;
}
}
} // namespace
template <> void ApplyNMS<float, CUDAContext>(
const int num_boxes,
const int max_keeps,
const float thresh,
const float* boxes,
int64_t* keep_indices,
int& num_keep,
CUDAContext* ctx) {
const int num_blocks = DIV_UP(num_boxes, NUM_THREADS);
vector<uint64_t> mask_host(num_boxes * num_blocks);
auto* mask_dev = (uint64_t*)ctx->New(mask_host.size() * sizeof(uint64_t));
_NonMaxSuppression
<<< dim3(num_blocks, num_blocks), NUM_THREADS,
0, ctx->cuda_stream() >>>(
num_blocks,
num_boxes,
thresh,
boxes,
mask_dev
);
CUDA_CHECK(cudaMemcpyAsync(
mask_host.data(),
mask_dev,
mask_host.size() * sizeof(uint64_t),
cudaMemcpyDeviceToHost,
ctx->cuda_stream()
));
ctx->FinishDeviceComputation();
vector<uint64_t> dead_bit(num_blocks);
memset(&dead_bit[0], 0, sizeof(uint64_t) * num_blocks);
int num_selected = 0;
for (int i = 0; i < num_boxes; ++i) {
const int nblock = i / NUM_THREADS;
const int inblock = i % NUM_THREADS;
if (!(dead_bit[nblock] & (1ULL << inblock))) {
keep_indices[num_selected++] = i;
auto* mask_i = &mask_host[0] + i * num_blocks;
for (int j = nblock; j < num_blocks; ++j) dead_bit[j] |= mask_i[j];
if (num_selected == max_keeps) break;
}
}
num_keep = num_selected;
ctx->Delete(mask_dev);
}
} // namespace detection
} // namespace utils
} // namespace dragon
#endif // USE_CUDA
# -------------------------------------------------------- # --------------------------------------------------------
# Fast R-CNN # Fast R-CNN
# Copyright (c) 2015 Microsoft # Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details] # Licensed under The MIT License [see LICENSE for details]
# Written by Sergey Karayev # Written by Sergey Karayev
# -------------------------------------------------------- # --------------------------------------------------------
cimport cython cimport cython
import numpy as np import numpy as np
cimport numpy as np cimport numpy as np
DTYPE = np.float DTYPE = np.float
ctypedef np.float_t DTYPE_t ctypedef np.float_t DTYPE_t
@cython.boundscheck(False) @cython.boundscheck(False)
def bbox_overlaps( def bbox_overlaps(
np.ndarray[DTYPE_t, ndim=2] boxes, np.ndarray[DTYPE_t, ndim=2] boxes,
np.ndarray[DTYPE_t, ndim=2] query_boxes): np.ndarray[DTYPE_t, ndim=2] query_boxes):
""" """
Parameters Parameters
---------- ----------
boxes: (N, 4) ndarray of float boxes: (N, 4) ndarray of float
query_boxes: (K, 4) ndarray of float query_boxes: (K, 4) ndarray of float
Returns Returns
------- -------
overlaps: (N, K) ndarray of overlap between boxes and query_boxes overlaps: (N, K) ndarray of overlap between boxes and query_boxes
""" """
cdef unsigned int N = boxes.shape[0] cdef unsigned int N = boxes.shape[0]
cdef unsigned int K = query_boxes.shape[0] cdef unsigned int K = query_boxes.shape[0]
cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
cdef DTYPE_t iw, ih, box_area cdef DTYPE_t iw, ih, box_area
cdef DTYPE_t ua cdef DTYPE_t ua
cdef unsigned int k, n cdef unsigned int k, n
with nogil: with nogil:
for k in range(K): for k in range(K):
box_area = ( box_area = (
(query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
(query_boxes[k, 3] - query_boxes[k, 1] + 1) (query_boxes[k, 3] - query_boxes[k, 1] + 1)
) )
for n in range(N): for n in range(N):
iw = ( iw = (
min(boxes[n, 2], query_boxes[k, 2]) - min(boxes[n, 2], query_boxes[k, 2]) -
max(boxes[n, 0], query_boxes[k, 0]) + 1 max(boxes[n, 0], query_boxes[k, 0]) + 1
) )
if iw > 0: if iw > 0:
ih = ( ih = (
min(boxes[n, 3], query_boxes[k, 3]) - min(boxes[n, 3], query_boxes[k, 3]) -
max(boxes[n, 1], query_boxes[k, 1]) + 1 max(boxes[n, 1], query_boxes[k, 1]) + 1
) )
if ih > 0: if ih > 0:
ua = float( ua = float(
(boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 2] - boxes[n, 0] + 1) *
(boxes[n, 3] - boxes[n, 1] + 1) + (boxes[n, 3] - boxes[n, 1] + 1) +
box_area - iw * ih box_area - iw * ih
) )
overlaps[n, k] = iw * ih / ua overlaps[n, k] = iw * ih / ua
return overlaps return overlaps
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Compile the cython extensions."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from distutils.extension import Extension
from distutils.core import setup
import os
from Cython.Distutils import build_ext
import numpy as np
ext_modules = [
Extension(
'install.lib.utils.cython_bbox',
['cython_bbox.pyx'],
extra_compile_args=['-w'],
include_dirs=[np.get_include()]
),
Extension(
'install.lib.utils.cython_nms',
['cython_nms.pyx'],
extra_compile_args=['-w'],
include_dirs=[np.get_include()]
),
Extension(
'install.lib.pycocotools._mask',
['maskApi.c', '_mask.pyx'],
include_dirs=[np.get_include(), os.path.dirname(os.path.abspath(__file__))],
extra_compile_args=['-w']
),
]
setup(
name='SeetaDet',
ext_modules=ext_modules,
cmdclass={'build_ext': build_ext},
)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.data_transformer import DataTransformer
from lib.datasets.factory import get_imdb
from lib.utils import logger
from lib.utils.blob import im_list_to_blob
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_WORKERS,
})
def __call__(self):
outputs = self.data_batch.get()
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class DataBatch(mp.Process):
"""Prefetch the batch of data."""
def __init__(self, **kwargs):
"""Construct a ``DataBatch``.
Parameters
----------
dataset : lambda
The creator of a dataset.
classes : Sequence[str]
The class names.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=0
The number of chunks to split.
batch_size : int, optional, default=2
The size of a mini-batch.
num_transformers : int, optional, default=3
The number of workers to transform data.
"""
super(DataBatch, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and kwargs.get(
'phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
kwargs['group_size'] = group_size
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1)
self.daemon = True
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size)
self.Q21 = mp.Queue(num_batches * self._batch_size)
self.Q22 = mp.Queue(num_batches * self._batch_size)
self.Q3 = mp.Queue(num_batches)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
num_parts=num_parts, part_idx=part_idx, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.Q1
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._seed += (i + rank * self._num_transformers)
transformer.q_in = self.Q1
transformer.q1_out, transformer.q2_out = self.Q21, self.Q22
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
# Initialize batch-producer
self.start()
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for process in processes:
process.terminate()
process.join()
terminate([self])
logger.info('Terminate DataBatch.')
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def get(self):
"""Get a batch.
Returns
-------
dict
The batch dict.
"""
return self.Q3.get()
def run(self):
"""Start the process to produce batches."""
def produce(q_in):
processed_ims, ims_info, all_boxes = [], [], []
for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
im, im_scale, gt_boxes = q_in.get()
processed_ims.append(im)
ims_info.append(list(im.shape[:2]) + [im_scale])
im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
im_boxes[:, :gt_boxes.shape[1]], im_boxes[:, -1] = gt_boxes, image_index
all_boxes.append(im_boxes)
return {
'data': im_list_to_blob(processed_ims),
'ims_info': np.array(ims_info, dtype=np.float32),
'gt_boxes': np.concatenate(all_boxes, axis=0),
}
# Two queues to implement aspect-grouping
# This is necessary to reduce the gpu memory
# from fetching a huge square batch blob
q1, q2 = self.Q21, self.Q22
# Main prefetch loop
while True:
if q1.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q1))
elif q2.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q2))
q1, q2 = q2, q1 # Uniform sampling trick
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.mask_rcnn.data_transformer import DataTransformer
from lib.datasets.factory import get_imdb
from lib.utils import logger
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import mask_list_to_blob
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_WORKERS,
})
def __call__(self):
outputs = self.data_batch.get()
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class DataBatch(mp.Process):
"""Prefetch the batch of data."""
def __init__(self, **kwargs):
"""Construct a ``DataBatch``.
Parameters
----------
dataset : lambda
The creator of a dataset.
classes : Sequence[str]
The class names.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=0
The number of chunks to split.
batch_size : int, optional, default=2
The size of a mini-batch.
num_transformers : int, optional, default=3
The number of workers to transform data.
"""
super(DataBatch, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and kwargs.get(
'phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
kwargs['group_size'] = group_size
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1)
self.daemon = True
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size)
self.Q21 = mp.Queue(num_batches * self._batch_size)
self.Q22 = mp.Queue(num_batches * self._batch_size)
self.Q3 = mp.Queue(num_batches)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
num_parts=num_parts, part_idx=part_idx, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.Q1
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._seed += (i + rank * self._num_transformers)
transformer.q_in = self.Q1
transformer.q1_out, transformer.q2_out = self.Q21, self.Q22
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
# Initialize batch-producer
self.start()
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for process in processes:
process.terminate()
process.join()
terminate([self])
logger.info('Terminate DataBatch.')
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def get(self):
"""Get a batch.
Returns
-------
dict
The batch dict.
"""
return self.Q3.get()
def run(self):
"""Start the process to produce batches."""
def produce(q_in):
processed_ims, ims_info = [], []
packed_boxes, packed_masks = [], []
for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
im, im_scale, gt_boxes, gt_masks = q_in.get()
processed_ims.append(im)
ims_info.append(list(im.shape[:2]) + [im_scale])
im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
im_boxes[:, :gt_boxes.shape[1]], im_boxes[:, -1] = gt_boxes, image_index
packed_boxes.append(im_boxes)
packed_masks.append(gt_masks)
return {
'data': im_list_to_blob(processed_ims),
'ims_info': np.array(ims_info, 'float32'),
'gt_boxes': np.concatenate(packed_boxes, 0),
'gt_masks': mask_list_to_blob(packed_masks),
}
# Two queues to implement aspect-grouping
# This is necessary to reduce the gpu memory
# from fetching a huge square batch blob
q1, q2 = self.Q21, self.Q22
# Main prefetch loop
while True:
if q1.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q1))
elif q2.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q2))
q1, q2 = q2, q1 # Uniform sampling trick
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import importlib
_STORE = collections.defaultdict(dict)
###########################################
# #
# Body #
# #
###########################################
# ResNet
for D in [18, 34, 50, 101, 152, 200, 269]:
_STORE['BODY']['resnet{}'.format(D)] = \
'lib.modeling.resnet.make_resnet_{}'.format(D)
# VGG
for D in [16, 19]:
for T in ['', '_reduced_300', '_reduced_512']:
_STORE['BODY']['vgg{}{}'.format(D, T)] = \
'lib.modeling.vgg.make_vgg_{}{}'.format(D, T)
# AirNet
for D in ['', '3b', '4b', '5b']:
_STORE['BODY']['airnet{}'.format(D)] = \
'lib.modeling.airnet.make_airnet_{}'.format(D)
# MobileNet
for D in ['a1', 'v2']:
_STORE['BODY']['mobilenet_{}'.format(D)] = \
'lib.modeling.mobilenet.make_mobilenet_{}'.format(D)
def get_template_func(name, sets, desc):
name = name.lower()
if name not in sets:
raise ValueError(
'The {} for {} was not registered.\n'
'Registered modules: [{}]'
.format(name, desc, ', '.join(sets.keys()))
)
module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1]
try:
module = importlib.import_module(module_name)
return getattr(module, func_name)
except ImportError as e:
raise ValueError('Can not import module from: ' + module_name)
def get_body_func(name):
return get_template_func(
name, _STORE['BODY'], 'Body')
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define some basic structures."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch import nn
from lib.core.config import cfg
class Affine(object):
"""Affine transformation with weight and bias fixed."""
def __new__(cls, dim_in, bias=True, inplace=True):
return nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
class Conv1x1(object):
"""1x1 convolution."""
def __new__(cls, dim_in, dim_out, stride=1, bias=False):
return nn.Conv2d(
dim_in,
dim_out,
kernel_size=1,
stride=stride,
bias=bias,
)
class Conv3x3(object):
"""3x3 convolution."""
def __new__(cls, dim_in, dim_out, stride=1, dilation=1, bias=False):
return nn.Conv2d(
dim_in,
dim_out,
kernel_size=3,
stride=stride,
padding=1 * dilation,
bias=bias,
)
class CrossEntropyLoss(object):
"""Cross entropy loss."""
def __new__(cls):
return nn.CrossEntropyLoss(ignore_index=-1)
class Identity(nn.Module):
"""Pass input to the output."""
def __init__(self, *args, **kwargs):
super(Identity, self).__init__()
_, _ = args, kwargs
def forward(self, x):
return x
class SigmoidFocalLoss(object):
"""Sigmoid focal loss."""
def __new__(cls):
return nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
)
class SmoothL1Loss(object):
"""Smoothed l1 loss."""
def __new__(cls, beta=1.):
return nn.SmoothL1Loss(
beta=beta,
reduction='batch_size',
)
def is_conv2d(module):
"""Return a bool indicating the module is a Conv2d."""
return isinstance(module, nn.Conv2d) or \
isinstance(module, nn.DepthwiseConv2d)
AvgPool2d = nn.AvgPool2d
BatchNorm2d = nn.BatchNorm2d
BCEWithLogitsLoss = nn.BCEWithLogitsLoss
Conv2d = nn.Conv2d
ConvTranspose2d = nn.ConvTranspose2d
DepthwiseConv2d = nn.DepthwiseConv2d
Linear = nn.Linear
MaxPool2d = nn.MaxPool2d
Module = nn.Module
ModuleList = nn.ModuleList
Sequential = nn.Sequential
ReLU = nn.ReLU
Sigmoid = nn.Sigmoid
Softmax = nn.Softmax
Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The views and conclusions contained in the software and documentation are those
of the authors and should not be interpreted as representing official policies,
either expressed or implied, of the FreeBSD Project.
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.datasets.factory import get_imdb
from lib.ssd.data_transformer import DataTransformer
from lib.utils import logger
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_WORKERS,
})
def __call__(self):
outputs = self.data_batch.get()
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class DataBatch(mp.Process):
"""Prefetch the batch of data."""
def __init__(self, **kwargs):
"""Construct a ``DataBatch``.
Parameters
----------
dataset : lambda
The creator of a dataset.
classes : Sequence[str]
The class names.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=0
The number of chunks to split.
batch_size : int, optional, default=2
The size of a mini-batch.
num_transformers : int, optional, default=3
The number of workers to transform data.
"""
super(DataBatch, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and kwargs.get(
'phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
kwargs['group_size'] = group_size
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 32)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1)
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size)
self.Q2 = mp.Queue(num_batches * self._batch_size)
self.Q3 = mp.Queue(num_batches)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
num_parts=num_parts, part_idx=part_idx, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.Q1
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._seed += (i + rank * self._num_transformers)
transformer.q_in, transformer.q_out = self.Q1, self.Q2
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
# Initialize batch-producer
self.start()
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for process in processes:
process.terminate()
process.join()
terminate([self])
logger.info('Terminate DataBatch.')
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def get(self):
"""Get a batch.
Returns
-------
dict
The batch dict.
"""
return self.Q3.get()
def run(self):
"""Start the process to produce batches."""
image_batch_shape = (
cfg.TRAIN.IMS_PER_BATCH,
cfg.SSD.RESIZE.HEIGHT,
cfg.SSD.RESIZE.WIDTH, 3,
)
# Main prefetch loop
while True:
boxes_to_pack = []
img, gt_boxes = self.Q2.get()
ims_blob = np.zeros(image_batch_shape, img.dtype)
for i in range(cfg.TRAIN.IMS_PER_BATCH):
ims_blob[i] = img
boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
boxes[:, :gt_boxes.shape[1]], boxes[:, -1] = gt_boxes, i
boxes_to_pack.append(boxes)
if i != (cfg.TRAIN.IMS_PER_BATCH - 1):
img, gt_boxes = self.Q2.get()
self.Q3.put({
'data': ims_blob,
'gt_boxes': np.concatenate(boxes_to_pack),
})
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/ppwwyyxx/tensorpack/blob/master/examples/FasterRCNN/utils/np_box_ops.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from lib.utils import cython_bbox
def intersection(boxes1, boxes2):
"""Compute pairwise intersection areas between boxes.
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes
boxes2: a numpy array with shape [M, 4] holding M boxes
Returns:
a numpy array with shape [N*M] representing pairwise intersection area
"""
[y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
[y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
intersect_heights = np.maximum(
np.zeros(all_pairs_max_ymin.shape),
all_pairs_min_ymax - all_pairs_max_ymin)
all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
intersect_widths = np.maximum(
np.zeros(all_pairs_max_xmin.shape),
all_pairs_min_xmax - all_pairs_max_xmin)
return intersect_heights * intersect_widths
def iou(boxes1, boxes2):
"""Computes pairwise intersection-over-union between box collections.
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding M boxes.
Returns:
a numpy array with shape [N, M] representing pairwise iou scores.
"""
intersect = intersection(boxes1, boxes2)
area1 = boxes_area(boxes1)
area2 = boxes_area(boxes2)
union = \
np.expand_dims(area1, axis=1) + \
np.expand_dims(area2, axis=0) - intersect
return intersect / union
def ioa1(boxes1, boxes2):
"""Computes pairwise intersection-over-area between box collections.
Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
their intersection area over box2's area. Note that ioa is not symmetric,
that is, IOA(box1, box2) != IOA(box2, box1).
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding N boxes.
Returns:
a numpy array with shape [N, M] representing pairwise ioa scores.
"""
intersect = intersection(boxes1, boxes2)
areas = np.expand_dims(boxes_area(boxes1), axis=1)
return intersect / areas
def ioa2(boxes1, boxes2):
"""Computes pairwise intersection-over-area between box collections.
Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
their intersection area over box2's area. Note that ioa is not symmetric,
that is, IOA(box1, box2) != IOA(box2, box1).
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding N boxes.
Returns:
a numpy array with shape [N, M] representing pairwise ioa scores.
"""
intersect = intersection(boxes1, boxes2)
areas = np.expand_dims(boxes_area(boxes2), axis=0)
return intersect / areas
def bbox_overlaps(boxes1, boxes2):
"""Compute the overlaps between two group of boxes."""
return cython_bbox.bbox_overlaps(
np.ascontiguousarray(boxes1, dtype=np.float),
np.ascontiguousarray(boxes2, dtype=np.float),
)
def bbox_transform(ex_rois, gt_rois, weights=(1., 1., 1., 1.)):
"""Transform the boxes to the regression targets."""
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
wx, wy, ww, wh = weights
targets = [wx * (gt_ctr_x - ex_ctr_x) / ex_widths]
targets += [wy * (gt_ctr_y - ex_ctr_y) / ex_heights]
targets += [ww * np.log(gt_widths / ex_widths)]
targets += [wh * np.log(gt_heights / ex_heights)]
return np.vstack(targets).transpose()
def bbox_transform_inv(boxes, deltas, weights=(1., 1., 1., 1.)):
"""Decode the final boxes according to the deltas."""
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.
heights = boxes[:, 3] - boxes[:, 1] + 1.
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
wx, wy, ww, wh = weights
dx = deltas[:, 0::4] / wx
dy = deltas[:, 1::4] / wy
dw = deltas[:, 2::4] / ww
dh = deltas[:, 3::4] / wh
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w # x1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h # y1
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w # x2
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h # y2
return pred_boxes
def boxes_area(boxes):
"""Compute the area of an array of boxes."""
w = (boxes[:, 2] - boxes[:, 0] + 1)
h = (boxes[:, 3] - boxes[:, 1] + 1)
areas = w * h
assert np.all(areas >= 0), 'Negative areas founds'
return areas
def clip_boxes(boxes, im_shape):
# x1 >= 0
boxes[:, 0] = np.maximum(np.minimum(boxes[:, 0], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1] = np.maximum(np.minimum(boxes[:, 1], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2] = np.maximum(np.minimum(boxes[:, 2], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3] = np.maximum(np.minimum(boxes[:, 3], im_shape[0] - 1), 0)
return boxes
def clip_tiled_boxes(boxes, im_shape):
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
def expand_boxes(boxes, scale):
"""Expand an array of boxes by a given scale."""
w_half = (boxes[:, 2] - boxes[:, 0]) * .5
h_half = (boxes[:, 3] - boxes[:, 1]) * .5
x_c = (boxes[:, 2] + boxes[:, 0]) * .5
y_c = (boxes[:, 3] + boxes[:, 1]) * .5
w_half *= scale
h_half *= scale
boxes_exp = np.zeros(boxes.shape)
boxes_exp[:, 0] = x_c - w_half
boxes_exp[:, 2] = x_c + w_half
boxes_exp[:, 1] = y_c - h_half
boxes_exp[:, 3] = y_c + h_half
return boxes_exp
def flip_boxes(boxes, width):
"""Flip the boxes horizontally."""
flip_boxes = boxes.copy()
old_x1 = boxes[:, 0].copy()
old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - old_x1 - 1
return flip_boxes
def filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min size."""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
def dismantle_boxes(gt_boxes, num_images):
"""Dismantle the packed ground-truth boxes."""
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == i)[0]
][:, :-1] for i in range(num_images)
]
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon
from dragon.core.framework import tensor_util
from dragon.core.util import six
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
def feed_tensor(tensor, array):
tensor_util.set_array(tensor, array)
def get_param_groups(module, bias_lr=1., bias_decay=0.):
"""Separate weight and bias into parameters groups.
Parameters
----------
module : dragon.vm.torch.nn.Module
The module to collect parameters.
bias_lr : float, optional, default=1.
The lr multiplier of bias.
bias_decay : float, optional, default=0.
The decay multiplier of bias.
Returns
-------
Sequence[ParamGroup]
The parameter groups.
"""
param_groups = [
{
'params': [],
'lr_mult': 1.,
'decay_mult': 1.,
},
{
'params': [],
'lr_mult': bias_lr,
'decay_mult': bias_decay,
}
]
for name, param in module.named_parameters():
gi = 0 if 'weight' in name and param.dim() > 1 else 1
param_groups[gi]['params'].append(param)
if len(param_groups[1]['params']) == 0:
param_groups.pop() # Remove empty group
return param_groups
def get_workspace():
"""Return the current default workspace.
Returns
-------
dragon.Workspace
The default workspace.
"""
return dragon.get_workspace()
def new_placeholder(device=None):
"""Create a new tensor to feed data.
Parameters
----------
device : int, optional
The device index.
Returns
-------
dragon.vm.torch.Tensor
The placeholder tensor.
"""
value = torch.zeros(1)
if device is not None:
return value.cuda(device)
return value
def new_tensor(data, enforce_cpu=False):
"""Create a new tensor from the data.
Parameters
----------
data : array_like
The data value.
enforce_cpu : bool, optional, default=False
**True** to enforce the cpu storage.
Returns
-------
dragon.vm.torch.Tensor
The tensor taken with the data.
"""
if isinstance(data, np.ndarray):
tensor = torch.from_numpy(data)
elif isinstance(data, torch.Tensor):
tensor = data
else:
tensor = torch.tensor(data)
if not enforce_cpu:
tensor = tensor.cuda(cfg.GPU_ID)
return tensor
def new_workspace(merge_default=True):
"""Create a new workspace.
Parameters
----------
merge_default : bool, optional, default=True
**True** to merge tensors from default workspace.
Returns
-------
dragon.Workspace
The new workspace.
"""
workspace = dragon.Workspace()
if merge_default:
workspace.merge_from(get_workspace())
return workspace
def reset_workspace(workspace=None, merge_default=True):
"""Reset a workspace and return a new one.
Parameters
----------
workspace : dragon.Workspace, optional
The workspace to reset.
merge_default : bool, optional, default=True
**True** to merge tensors from default workspace.
Returns
-------
dragon.Workspace
The new workspace.
"""
if workspace is not None:
workspace.Clear() # Block the GIL
return new_workspace(merge_default)
class Graph(object):
"""Simple sequential graph to accelerate inference.
Graph reduces the overhead of python functions
under eager execution. Such cost will be at least 15ms
for common backbones, which limits to about 60FPS.
For more details, see the eager mechanism of Dragon.
"""
def __init__(self, inputs, outputs, constants=None):
def canonicalize(input_dict):
if input_dict is None:
return {}
for k, v in input_dict.items():
input_dict[k] = v.name if hasattr(v, 'name') else v
return input_dict
self.placeholders = {}
self._inputs = canonicalize(inputs)
self._outputs = canonicalize(outputs)
self._constants = canonicalize(constants)
self._workspace = get_workspace()
self._tracer = torch.jit.get_tracer()
@property
def workspace(self):
return self._workspace
@workspace.setter
def workspace(self, value):
self._workspace = value
def forward(self, **kwargs):
# Assign inputs
for name, tensor in self._inputs.items():
value = kwargs.get(name, None)
tensor_util.set_array(tensor, value)
# Replay the traced expressions
self._tracer.replay()
# Collect outputs
# 1) Target results
# 2) Constant values
outputs = collections.OrderedDict()
for name, tensor in self._outputs.items():
outputs[name] = tensor_util.to_array(tensor, True)
for name, value in self._constants.items():
outputs[name] = value
return outputs
def __call__(self, **kwargs):
with self._workspace.as_default():
return self.forward(**kwargs)
# Aliases
pickle = six.moves.pickle
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
"""Make record file for COCO dataset.""" """Make record file for COCO dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
import shutil import shutil
...@@ -37,8 +41,8 @@ if __name__ == '__main__': ...@@ -37,8 +41,8 @@ if __name__ == '__main__':
record_file=os.path.join(COCO_ROOT, 'coco_2014_trainval35k'), record_file=os.path.join(COCO_ROOT, 'coco_2014_trainval35k'),
images_path=[os.path.join(COCO_ROOT, 'images/train2014'), images_path=[os.path.join(COCO_ROOT, 'images/train2014'),
os.path.join(COCO_ROOT, 'images/val2014')], os.path.join(COCO_ROOT, 'images/val2014')],
splits_path=[os.path.join(COCO_ROOT, 'ImageSets'), splits_path=[os.path.join(COCO_ROOT, 'splits'),
os.path.join(COCO_ROOT, 'ImageSets')], os.path.join(COCO_ROOT, 'splits')],
mask_file='build/coco_2014_trainval35k_mask.pkl', mask_file='build/coco_2014_trainval35k_mask.pkl',
splits=['train', 'valminusminival'], splits=['train', 'valminusminival'],
) )
...@@ -48,7 +52,7 @@ if __name__ == '__main__': ...@@ -48,7 +52,7 @@ if __name__ == '__main__':
record_file=os.path.join(COCO_ROOT, 'coco_2014_minival'), record_file=os.path.join(COCO_ROOT, 'coco_2014_minival'),
images_path=os.path.join(COCO_ROOT, 'images/val2014'), images_path=os.path.join(COCO_ROOT, 'images/val2014'),
mask_file='build/coco_2014_minival_mask.pkl', mask_file='build/coco_2014_minival_mask.pkl',
splits_path=os.path.join(COCO_ROOT, 'ImageSets'), splits_path=os.path.join(COCO_ROOT, 'splits'),
splits=['minival'], splits=['minival'],
) )
......
...@@ -86,7 +86,7 @@ def make_record( ...@@ -86,7 +86,7 @@ def make_record(
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
writer = dragon.io.SeetaRecordWriter( writer = dragon.io.KPLRecordWriter(
path=record_file, path=record_file,
protocol={ protocol={
'id': 'string', 'id': 'string',
...@@ -133,6 +133,6 @@ def make_record( ...@@ -133,6 +133,6 @@ def make_record(
writer.close() writer.close()
end_time = time.time() end_time = time.time()
data_size = os.path.getsize(record_file + '/data.data') * 1e-6 data_size = os.path.getsize(record_file + '/root.data') * 1e-6
print('{} images take {:.2f} MB in {:.2f} sec.' print('{} images take {:.2f} MB in {:.2f} sec.'
.format(total_line, data_size, end_time - start_time)) .format(total_line, data_size, end_time - start_time))
...@@ -20,11 +20,11 @@ except: ...@@ -20,11 +20,11 @@ except:
import pickle as cPickle import pickle as cPickle
sys.path.insert(0, '../..') sys.path.insert(0, '../..')
from lib.pycocotools.coco import COCO from seetadet.pycocotools.coco import COCO
from lib.pycocotools import mask_utils from seetadet.pycocotools import mask_utils
class imdb(object): class COCOWrapper(object):
def __init__(self, image_set, year, data_dir): def __init__(self, image_set, year, data_dir):
self._year = year self._year = year
self._image_set = image_set self._image_set = image_set
...@@ -120,8 +120,6 @@ class imdb(object): ...@@ -120,8 +120,6 @@ class imdb(object):
# running out of the image bound # running out of the image bound
# Do not use them or decoding error is inevitable # Do not use them or decoding error is inevitable
mask_bytes = mask_utils.poly2bytes(obj['segmentation'], height, width) mask_bytes = mask_utils.poly2bytes(obj['segmentation'], height, width)
if not isinstance(mask_bytes, bytes):
print(type(mask_bytes))
if obj['area'] > 0 and x2 > x1 and y2 > y1: if obj['area'] > 0 and x2 > x1 and y2 > y1:
obj['clean_bbox'] = [x1, y1, x2, y2] obj['clean_bbox'] = [x1, y1, x2, y2]
valid_objects.append({ valid_objects.append({
...@@ -146,10 +144,11 @@ class imdb(object): ...@@ -146,10 +144,11 @@ class imdb(object):
def make_mask(split, year, data_dir): def make_mask(split, year, data_dir):
coco = imdb(split, year, data_dir) coco = COCOWrapper(split, year, data_dir)
print('Preparing to make split: {}, total {} images'.format(split, coco.num_images)) print('Preparing to make split: {}, total {} images'
if not osp.exists(osp.join(coco._data_path, 'ImageSets')): .format(split, coco.num_images))
os.makedirs(osp.join(coco._data_path, 'ImageSets')) if not osp.exists(osp.join(coco._data_path, 'splits')):
os.makedirs(osp.join(coco._data_path, 'splits'))
gt_recs = OrderedDict() gt_recs = OrderedDict()
for i in range(coco.num_images): for i in range(coco.num_images):
...@@ -157,14 +156,14 @@ def make_mask(split, year, data_dir): ...@@ -157,14 +156,14 @@ def make_mask(split, year, data_dir):
h, w, objects = coco.annotation_at(i) h, w, objects = coco.annotation_at(i)
gt_recs[filename] = objects gt_recs[filename] = objects
with open(osp.join('build', with open(osp.join('build', 'coco_' + year + '_' + split + '_mask.pkl'), 'wb') as f:
'coco_' + year + '_' + split + '_mask.pkl'), 'wb') as f: cPickle.dump(gt_recs, f, cPickle.HIGHEST_PROTOCOL)
cPickle.dump(gt_recs, f, cPickle.HIGHEST_PROTOCOL)
with open(osp.join(coco._data_path, 'ImageSets', split + '.txt'), 'w') as f: with open(osp.join(coco._data_path, 'splits', split + '.txt'), 'w') as f:
for i in range(coco.num_images): for i in range(coco.num_images):
filename = (coco.image_path_at(i).split('/')[-1]).split('.')[0] filename = (coco.image_path_at(i).split('/')[-1]).split('.')[0]
if i != coco.num_images - 1: filename += '\n' if i != coco.num_images - 1:
filename += '\n'
f.write(filename) f.write(filename)
......
...@@ -26,6 +26,6 @@ if __name__ == '__main__': ...@@ -26,6 +26,6 @@ if __name__ == '__main__':
record_file=osp.join(data_root, 'rotated_train'), record_file=osp.join(data_root, 'rotated_train'),
images_path=[osp.join(data_root, 'JPEGImages')], images_path=[osp.join(data_root, 'JPEGImages')],
annotations_path=[osp.join(data_root, 'Annotations')], annotations_path=[osp.join(data_root, 'Annotations')],
imagesets_path=[osp.join(data_root, 'ImageSets')], splits_path=[osp.join(data_root, 'ImageSets')],
splits=['train'] splits=['train']
) )
...@@ -57,7 +57,7 @@ def make_record( ...@@ -57,7 +57,7 @@ def make_record(
record_file, record_file,
images_path, images_path,
annotations_path, annotations_path,
imagesets_path, splits_path,
splits splits
): ):
if os.path.exists(record_file): if os.path.exists(record_file):
...@@ -68,15 +68,15 @@ def make_record( ...@@ -68,15 +68,15 @@ def make_record(
images_path = [images_path] images_path = [images_path]
if not isinstance(annotations_path, list): if not isinstance(annotations_path, list):
annotations_path = [annotations_path] annotations_path = [annotations_path]
if not isinstance(imagesets_path, list): if not isinstance(splits_path, list):
imagesets_path = [imagesets_path] splits_path = [splits_path]
assert len(splits) == len(imagesets_path) assert len(splits) == len(splits_path)
assert len(splits) == len(images_path) assert len(splits) == len(images_path)
assert len(splits) == len(annotations_path) assert len(splits) == len(annotations_path)
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
writer = dragon.io.SeetaRecordWriter( writer = dragon.io.KPLRecordWriter(
path=record_file, path=record_file,
protocol={ protocol={
'id': 'string', 'id': 'string',
...@@ -99,31 +99,37 @@ def make_record( ...@@ -99,31 +99,37 @@ def make_record(
} }
) )
count, total_line = 0, 0 # Scan all available entries
start_time = time.time() print('Scan entries...')
entries = []
for db_idx, split in enumerate(splits): for i, split in enumerate(splits):
split_file = os.path.join(imagesets_path[db_idx], split + '.txt') split_file = os.path.join(splits_path[i], split + '.txt')
assert os.path.exists(split_file)
with open(split_file, 'r') as f: with open(split_file, 'r') as f:
lines = f.readlines() lines = f.readlines()
total_line += len(lines)
for line in lines: for line in lines:
count += 1
if count % 2000 == 0:
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(
count, total_line, now_time - start_time))
filename = line.strip() filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + '.jpg') img_file = os.path.join(images_path[i], filename + '.jpg')
xml_file = os.path.join(annotations_path[db_idx], filename + '.xml') ann_file = os.path.join(annotations_path[i], filename + '.xml')
writer.write(make_example(image_file, xml_file)) entries.append((img_file, ann_file))
# Parse and write into record file
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
start_time = time.time()
for i, (img_file, ann_file) in enumerate(entries):
if i > 0 and i % 2000 == 0:
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(
i, len(entries), now_time - start_time))
writer.write(make_example(img_file, ann_file))
now_time = time.time() now_time = time.time()
print('{} / {} in {:.2f} sec'.format(count, total_line, now_time - start_time)) print('{} / {} in {:.2f} sec'.format(
len(entries), len(entries), now_time - start_time))
writer.close() writer.close()
end_time = time.time() end_time = time.time()
data_size = os.path.getsize(record_file + '/data.data') * 1e-6 data_size = os.path.getsize(record_file + '/root.data') * 1e-6
print('{} images take {:.2f} MB in {:.2f} sec.' print('{} images take {:.2f} MB in {:.2f} sec.'
.format(total_line, data_size, end_time - start_time)) .format(len(entries), data_size, end_time - start_time))
...@@ -28,7 +28,7 @@ if __name__ == '__main__': ...@@ -28,7 +28,7 @@ if __name__ == '__main__':
osp.join(voc_root, 'VOCdevkit2012/VOC2012/JPEGImages')], osp.join(voc_root, 'VOCdevkit2012/VOC2012/JPEGImages')],
annotations_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'), annotations_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/Annotations')], osp.join(voc_root, 'VOCdevkit2012/VOC2012/Annotations')],
imagesets_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'), splits_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/ImageSets/Main')], osp.join(voc_root, 'VOCdevkit2012/VOC2012/ImageSets/Main')],
splits=['trainval', 'trainval'] splits=['trainval', 'trainval']
) )
...@@ -37,6 +37,6 @@ if __name__ == '__main__': ...@@ -37,6 +37,6 @@ if __name__ == '__main__':
record_file=osp.join(voc_root, 'voc_2007_test'), record_file=osp.join(voc_root, 'voc_2007_test'),
images_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'), images_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'),
annotations_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'), annotations_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
imagesets_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'), splits_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
splits=['test'] splits=['test']
) )
...@@ -26,11 +26,17 @@ def make_example(image_file, xml_file): ...@@ -26,11 +26,17 @@ def make_example(image_file, xml_file):
tree = ET.parse(xml_file) tree = ET.parse(xml_file)
filename = os.path.split(xml_file)[-1] filename = os.path.split(xml_file)[-1]
objs = tree.findall('object') objs = tree.findall('object')
size = tree.find('size')
example = {'id': filename.split('.')[0], 'object': []} example = {'id': filename.split('.')[0], 'object': []}
with open(image_file, 'rb') as f: with open(image_file, 'rb') as f:
img_bytes = bytes(f.read()) img_bytes = bytes(f.read())
img = cv2.imdecode(np.frombuffer(img_bytes, 'uint8'), 1) if size is not None:
example['height'], example['width'], example['depth'] = img.shape example['height'] = int(size.find('height').text)
example['width'] = int(size.find('width').text)
example['depth'] = int(size.find('depth').text)
else:
img = cv2.imdecode(np.frombuffer(img_bytes, 'uint8'), 3)
example['height'], example['width'], example['depth'] = img.shape
example['content'] = img_bytes example['content'] = img_bytes
for ix, obj in enumerate(objs): for ix, obj in enumerate(objs):
bbox = obj.find('bndbox') bbox = obj.find('bndbox')
...@@ -53,7 +59,7 @@ def make_record( ...@@ -53,7 +59,7 @@ def make_record(
record_file, record_file,
images_path, images_path,
annotations_path, annotations_path,
imagesets_path, splits_path,
splits splits
): ):
if os.path.exists(record_file): if os.path.exists(record_file):
...@@ -64,15 +70,13 @@ def make_record( ...@@ -64,15 +70,13 @@ def make_record(
images_path = [images_path] images_path = [images_path]
if not isinstance(annotations_path, list): if not isinstance(annotations_path, list):
annotations_path = [annotations_path] annotations_path = [annotations_path]
if not isinstance(imagesets_path, list): if not isinstance(splits_path, list):
imagesets_path = [imagesets_path] splits_path = [splits_path]
assert len(splits) == len(imagesets_path) assert len(splits) == len(splits_path)
assert len(splits) == len(images_path) assert len(splits) == len(images_path)
assert len(splits) == len(annotations_path) assert len(splits) == len(annotations_path)
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) writer = dragon.io.KPLRecordWriter(
writer = dragon.io.SeetaRecordWriter(
path=record_file, path=record_file,
protocol={ protocol={
'id': 'string', 'id': 'string',
...@@ -91,31 +95,36 @@ def make_record( ...@@ -91,31 +95,36 @@ def make_record(
} }
) )
count, total_line = 0, 0 # Scan all available entries
start_time = time.time() print('Scan entries...')
entries = []
for db_idx, split in enumerate(splits): for i, split in enumerate(splits):
split_file = os.path.join(imagesets_path[db_idx], split + '.txt') split_file = os.path.join(splits_path[i], split + '.txt')
assert os.path.exists(split_file)
with open(split_file, 'r') as f: with open(split_file, 'r') as f:
lines = f.readlines() lines = f.readlines()
total_line += len(lines)
for line in lines: for line in lines:
count += 1
if count % 2000 == 0:
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(
count, total_line, now_time - start_time))
filename = line.strip() filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + '.jpg') img_file = os.path.join(images_path[i], filename + '.jpg')
xml_file = os.path.join(annotations_path[db_idx], filename + '.xml') ann_file = os.path.join(annotations_path[i], filename + '.xml')
writer.write(make_example(image_file, xml_file)) entries.append((img_file, ann_file))
# Parse and write into record file
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
start_time = time.time()
for i, (img_file, ann_file) in enumerate(entries):
if i > 0 and i % 2000 == 0:
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(
i, len(entries), now_time - start_time))
writer.write(make_example(img_file, ann_file))
now_time = time.time() now_time = time.time()
print('{} / {} in {:.2f} sec'.format(count, total_line, now_time - start_time)) print('{} / {} in {:.2f} sec'.format(
len(entries), len(entries), now_time - start_time))
writer.close() writer.close()
end_time = time.time() end_time = time.time()
data_size = os.path.getsize(record_file + '/data.data') * 1e-6 data_size = os.path.getsize(record_file + '/root.data') * 1e-6
print('{} images take {:.2f} MB in {:.2f} sec.' print('{} images take {:.2f} MB in {:.2f} sec.'
.format(total_line, data_size, end_time - start_time)) .format(len(entries), data_size, end_time - start_time))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from seetadet.algo.faster_rcnn.anchor_target import AnchorTarget
from seetadet.algo.faster_rcnn.data_loader import DataLoader
from seetadet.algo.faster_rcnn.proposal import Proposal
from seetadet.algo.faster_rcnn.proposal_target import ProposalTarget
from seetadet.algo.faster_rcnn.utils import generate_grid_anchors
from seetadet.algo.faster_rcnn.utils import map_blobs_by_levels
from seetadet.algo.faster_rcnn.utils import map_rois_to_levels
from seetadet.algo.faster_rcnn.utils import map_returns_to_blobs
...@@ -16,11 +16,11 @@ from __future__ import print_function ...@@ -16,11 +16,11 @@ from __future__ import print_function
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
from lib.core.config import cfg from seetadet.algo.faster_rcnn.generate_anchors import generate_anchors
from lib.faster_rcnn.generate_anchors import generate_anchors from seetadet.algo.faster_rcnn.utils import generate_grid_anchors
from lib.faster_rcnn.utils import generate_grid_anchors from seetadet.core.config import cfg
from lib.utils import boxes as box_util from seetadet.utils import boxes as box_util
from lib.utils.framework import new_tensor from seetadet.utils.env import new_tensor
class AnchorTarget(object): class AnchorTarget(object):
...@@ -62,9 +62,7 @@ class AnchorTarget(object): ...@@ -62,9 +62,7 @@ class AnchorTarget(object):
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care # Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
labels_wide = -np.ones((num_images, num_anchors,), 'float32') labels_wide = -np.ones((num_images, num_anchors,), 'float32')
bbox_targets_wide = np.zeros((num_images, num_anchors, 4), 'float32') bbox_indices_wide, bbox_anchors_wide, bbox_targets_wide = [], [], []
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
for ix in range(num_images): for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label, ...) # GT boxes (x1, y1, x2, y2, label, ...)
...@@ -95,13 +93,13 @@ class AnchorTarget(object): ...@@ -95,13 +93,13 @@ class AnchorTarget(object):
np.arange(overlaps.shape[1])] np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
# fg label: for each gt, anchor with highest overlap # Foreground: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1 labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU # Foreground: above threshold IoU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
# bg label: below threshold IOU # Background: below threshold IoU
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many # Subsample positive labels if we have too many
...@@ -112,6 +110,11 @@ class AnchorTarget(object): ...@@ -112,6 +110,11 @@ class AnchorTarget(object):
labels[disable_inds] = -1 labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0] fg_inds = np.where(labels == 1)[0]
# Retract the clamping if we don't have one
if len(fg_inds) == 0:
labels[gt_argmax_overlaps] = 1
fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many # Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0] bg_inds = np.where(labels == 0)[0]
...@@ -119,51 +122,27 @@ class AnchorTarget(object): ...@@ -119,51 +122,27 @@ class AnchorTarget(object):
disable_inds = npr.choice(bg_inds, len(bg_inds) - num_bg, False) disable_inds = npr.choice(bg_inds, len(bg_inds) - num_bg, False)
labels[disable_inds] = -1 labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), 'float32') labels_wide[ix, inds_inside] = labels
bbox_targets[fg_inds, :] = \ bbox_anchors_wide.append(anchors[fg_inds])
bbox_indices_wide.append(inds_inside[fg_inds] + (num_anchors * ix))
bbox_targets_wide.append(
box_util.bbox_transform( box_util.bbox_transform(
anchors[fg_inds, :], anchors[fg_inds],
gt_boxes[argmax_overlaps[fg_inds], :4], gt_boxes[argmax_overlaps[fg_inds], :4],
) )
bbox_inside_weights = np.zeros((num_inside, 4), 'float32') )
bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), 'float32') if self.num_strides == 1:
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
labels_wide[ix, inds_inside] = labels # label
bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
if self.num_strides > 1:
labels = labels_wide.reshape((num_images, num_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
else:
A = self.base_anchors[0].shape[0] A = self.base_anchors[0].shape[0]
height, width = features[0].shape[-2:] height, width = features[0].shape[-2:]
labels = labels_wide \ labels_wide = labels_wide \
.reshape((num_images, height, width, A)) \ .reshape((num_images, height, width, A)) \
.transpose(0, 3, 1, 2) \ .transpose(0, 3, 1, 2) \
.reshape((num_images, num_anchors)) .reshape((num_images, num_anchors))
bbox_targets = bbox_targets_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_inside_weights = bbox_inside_weights_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_outside_weights = bbox_outside_weights_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
return { return {
'labels': new_tensor(labels), 'labels': new_tensor(labels_wide),
'bbox_targets': new_tensor(bbox_targets), 'bbox_indices': new_tensor(np.concatenate(bbox_indices_wide)),
'bbox_inside_weights': new_tensor(bbox_inside_weights), 'bbox_targets': new_tensor(np.concatenate(bbox_targets_wide).astype('float32')),
'bbox_outside_weights': new_tensor(bbox_outside_weights), 'bbox_anchors': new_tensor(np.concatenate(bbox_anchors_wide).astype('float32')),
} }
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from seetadet.algo.faster_rcnn import data_transformer
from seetadet.core.config import cfg
from seetadet.datasets.factory import get_dataset
from seetadet.utils import logger
from seetadet.utils.blob import im_list_to_blob
class DataLoader(object):
"""Load mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
dataset = get_dataset(cfg.TRAIN.DATASET)
if cfg.USE_DALI:
from seetadet.dali import rcnn_pipeline as pipe
self.iterator = pipe.new_iterator(dataset.source)
else:
self.iterator = Iterator(**{
'dataset': dataset.cls,
'source': dataset.source,
'classes': dataset.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_THREADS - 1,
})
def __call__(self):
outputs = self.iterator.next()
if isinstance(outputs['data'], np.ndarray):
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class Iterator(mp.Process):
"""Iterator to return the batch of data."""
def __init__(self, **kwargs):
super(Iterator, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and \
kwargs.get('phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self.daemon = True
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.q_in = mp.Queue(num_batches * self._batch_size)
self.q1_out = mp.Queue(num_batches * self._batch_size)
self.q2_out = mp.Queue(num_batches * self._batch_size)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
part_idx=part_idx, num_parts=num_parts, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.q_in
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
p = data_transformer.DataTransformer(**kwargs)
p._seed += (i + rank * self._num_transformers)
p.q_in = self.q_in
p.q1_out, p.q2_out = self.q1_out, self.q2_out
p.start()
self._transformers.append(p)
time.sleep(0.1)
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for p in processes:
p.terminate()
p.join()
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def next(self):
"""Return the next batch of data."""
return self.__next__()
def __iter__(self):
"""Return the iterator self."""
return self
def __next__(self):
"""Return the next batch of data."""
q_out = None
# Two queues to implement aspect-grouping
# This is necessary to reduce the gpu memory
# from fetching a huge square batch blob
while q_out is None:
if self.q1_out.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
q_out = self.q1_out
elif self.q2_out.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
q_out = self.q2_out
self.q1_out, self.q2_out = self.q2_out, self.q1_out
images, images_info, boxes_to_pack = [], [], []
for i in range(cfg.TRAIN.IMS_PER_BATCH):
image, image_scale, boxes = q_out.get()
images.append(image)
images_info.append(list(image.shape[:2]) + [image_scale])
gt_boxes = np.zeros((boxes.shape[0], boxes.shape[1] + 1), 'float32')
gt_boxes[:, :boxes.shape[1]], gt_boxes[:, -1] = boxes, i
boxes_to_pack.append(gt_boxes)
return {
'data': im_list_to_blob(images),
'ims_info': np.array(images_info, dtype=np.float32),
'gt_boxes': np.concatenate(boxes_to_pack),
}
...@@ -15,19 +15,19 @@ from __future__ import print_function ...@@ -15,19 +15,19 @@ from __future__ import print_function
import multiprocessing import multiprocessing
import cv2
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.datasets.example import Example from seetadet.datasets.example import Example
from lib.utils import boxes as box_util from seetadet.utils import boxes as box_util
from lib.utils.blob import prep_im_for_blob from seetadet.utils.blob import prep_im_for_blob
from lib.utils.image import get_image_with_target_size
class DataTransformer(multiprocessing.Process): class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(DataTransformer, self).__init__() super(DataTransformer, self).__init__()
self._scales = cfg.TRAIN.SCALES
self._max_size = cfg.TRAIN.MAX_SIZE
self._seed = cfg.RNG_SEED self._seed = cfg.RNG_SEED
self._use_flipped = cfg.TRAIN.USE_FLIPPED self._use_flipped = cfg.TRAIN.USE_FLIPPED
self._use_diff = cfg.TRAIN.USE_DIFF self._use_diff = cfg.TRAIN.USE_DIFF
...@@ -37,13 +37,7 @@ class DataTransformer(multiprocessing.Process): ...@@ -37,13 +37,7 @@ class DataTransformer(multiprocessing.Process):
self.q_in = self.q1_out = self.q2_out = None self.q_in = self.q1_out = self.q2_out = None
self.daemon = True self.daemon = True
def make_roi_dict( def make_roi_dict(self, example, im_scale, apply_flip=False):
self,
example,
im_scale,
apply_flip=False,
offsets=None,
):
objects, n_objects = example.objects, 0 objects, n_objects = example.objects, 0
height, width = example.height, example.width height, width = example.height, example.width
if not self._use_diff: if not self._use_diff:
...@@ -86,15 +80,6 @@ class DataTransformer(multiprocessing.Process): ...@@ -86,15 +80,6 @@ class DataTransformer(multiprocessing.Process):
# Scale the boxes to the detecting scale # Scale the boxes to the detecting scale
roi_dict['boxes'] *= im_scale roi_dict['boxes'] *= im_scale
# Apply the offsets from scale jitter
if offsets is not None:
roi_dict['boxes'][:, 0::2] += offsets[0]
roi_dict['boxes'][:, 1::2] += offsets[1]
roi_dict['boxes'][:, :] = np.minimum(
np.maximum(roi_dict['boxes'][:, :], 0),
[offsets[2][1] - 1, offsets[2][0] - 1] * 2,
)
return roi_dict return roi_dict
def get(self, example): def get(self, example):
...@@ -102,9 +87,8 @@ class DataTransformer(multiprocessing.Process): ...@@ -102,9 +87,8 @@ class DataTransformer(multiprocessing.Process):
img = example.image img = example.image
# Scale # Scale
max_size = cfg.TRAIN.MAX_SIZE target_size = self._scales[np.random.randint(len(self._scales))]
target_size = cfg.TRAIN.SCALES[np.random.randint(len(cfg.TRAIN.SCALES))] img, im_scale = prep_im_for_blob(img, target_size, self._max_size)
img, im_scale, jitter = prep_im_for_blob(img, target_size, max_size)
# Flip # Flip
apply_flip = False apply_flip = False
...@@ -113,19 +97,8 @@ class DataTransformer(multiprocessing.Process): ...@@ -113,19 +97,8 @@ class DataTransformer(multiprocessing.Process):
img = img[:, ::-1] img = img[:, ::-1]
apply_flip = True apply_flip = True
# Random Crop or RandomPad
offsets = None
if cfg.TRAIN.MAX_SIZE > 0:
if jitter != 1:
# To a rectangle (scale, max_size)
target_size = (np.array(img.shape[:2]) / jitter).astype(np.int32)
img, offsets = get_image_with_target_size(target_size, img)
else:
# To a square (target_size, target_size)
img, offsets = get_image_with_target_size([target_size] * 2, img)
# Example -> RoIDict # Example -> RoIDict
roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets) roi_dict = self.make_roi_dict(example, im_scale, apply_flip)
# Post-Process for gt boxes # Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}] # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
......
...@@ -17,11 +17,11 @@ import collections ...@@ -17,11 +17,11 @@ import collections
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.algo.faster_rcnn.generate_anchors import generate_anchors
from lib.faster_rcnn.generate_anchors import generate_anchors from seetadet.algo.faster_rcnn.utils import generate_grid_anchors
from lib.faster_rcnn.utils import generate_grid_anchors from seetadet.core.config import cfg
from lib.nms import nms_wrapper from seetadet.utils import boxes as box_util
from lib.utils import boxes as box_util from seetadet.utils import nms
class Proposal(object): class Proposal(object):
...@@ -67,8 +67,8 @@ class Proposal(object): ...@@ -67,8 +67,8 @@ class Proposal(object):
# Prepare for the outputs # Prepare for the outputs
batch_rois = [] batch_rois = []
cls_prob = cls_prob.numpy(True) cls_prob = cls_prob.numpy()
bbox_pred = bbox_pred.numpy(True) bbox_pred = bbox_pred.numpy()
if self.num_strides > 1: if self.num_strides > 1:
# (?, 4, A * K) -> (?, A * K, 4) # (?, 4, A * K) -> (?, A * K, 4)
bbox_pred = bbox_pred.transpose((0, 2, 1)) bbox_pred = bbox_pred.transpose((0, 2, 1))
...@@ -113,7 +113,7 @@ class Proposal(object): ...@@ -113,7 +113,7 @@ class Proposal(object):
# Apply nms (e.g. threshold = 0.7) # Apply nms (e.g. threshold = 0.7)
# Take after_nms_topN (e.g. 300) # Take after_nms_topN (e.g. 300)
# Return the top proposals (-> RoIs top) # Return the top proposals (-> RoIs top)
keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh) keep = nms.gpu_nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_top_n > 0: if post_nms_top_n > 0:
keep = keep[:post_nms_top_n] keep = keep[:post_nms_top_n]
proposals = proposals[keep, :] proposals = proposals[keep, :]
......
...@@ -18,12 +18,10 @@ import collections ...@@ -18,12 +18,10 @@ import collections
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
from lib.core.config import cfg from seetadet.algo.faster_rcnn import utils as rcnn_util
from lib.faster_rcnn.utils import map_blobs_to_outputs from seetadet.core.config import cfg
from lib.faster_rcnn.utils import map_returns_to_blobs from seetadet.utils import boxes as box_util
from lib.faster_rcnn.utils import map_rois_to_levels from seetadet.utils.env import new_tensor
from lib.utils import boxes as box_util
from lib.utils.framework import new_tensor
class ProposalTarget(object): class ProposalTarget(object):
...@@ -35,10 +33,8 @@ class ProposalTarget(object): ...@@ -35,10 +33,8 @@ class ProposalTarget(object):
self.num_classes = cfg.MODEL.NUM_CLASSES self.num_classes = cfg.MODEL.NUM_CLASSES
self.defaults = collections.OrderedDict([ self.defaults = collections.OrderedDict([
('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')), ('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
('labels', np.array([-1], 'float32')), ('labels', np.array([-1], 'int64')),
('bbox_targets', np.zeros((1, self.num_classes * 4), 'float32')), ('bbox_targets', np.zeros((1, 4), 'float32')),
('bbox_inside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_outside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
]) ])
def __call__(self, rpn_rois, gt_boxes): def __call__(self, rpn_rois, gt_boxes):
...@@ -63,86 +59,65 @@ class ProposalTarget(object): ...@@ -63,86 +59,65 @@ class ProposalTarget(object):
# Sample a batch of RoIs for training # Sample a batch of RoIs for training
rois_per_image = cfg.TRAIN.BATCH_SIZE rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
map_returns_to_blobs( rcnn_util.map_returns_to_blobs(
sample_rois( sample_rois(
rois, rois,
gt_boxes, gt_boxes,
rois_per_image, rois_per_image,
fg_rois_per_image, fg_rois_per_image,
self.num_classes,
), blobs, keys, ), blobs, keys,
) )
# Stack into continuous blobs # Stack into continuous blobs
for k, v in blobs.items(): blobs = dict((k, np.concatenate(blobs[k])) for k in blobs.keys())
blobs[k] = np.concatenate(blobs[k], 0)
if self.num_strides > 1: if self.num_strides > 1:
# Distribute RoIs into pyramids # Distribute RoIs into pyramids
min_lvl = cfg.FPN.ROI_MIN_LEVEL min_lvl = cfg.FPN.ROI_MIN_LEVEL
max_lvl = cfg.FPN.ROI_MAX_LEVEL max_lvl = cfg.FPN.ROI_MAX_LEVEL
k = max_lvl - min_lvl + 1 num_levels = max_lvl - min_lvl + 1
levels = map_rois_to_levels(blobs['rois'], min_lvl, max_lvl) levels = rcnn_util.map_rois_to_levels(blobs['rois'], min_lvl, max_lvl)
outputs = map_blobs_to_outputs( lvl_blobs = rcnn_util.map_blobs_by_levels(
blobs, blobs,
self.defaults, self.defaults,
[np.where(levels == (i + min_lvl))[0] for i in range(k)], [np.where(levels == (i + min_lvl))[0] for i in range(num_levels)],
) )
return { blobs = dict((k, np.concatenate(lvl_blobs[k])) for k in blobs.keys())
'rois': [new_tensor(outputs['rois'][i]) for i in range(k)], rois_wide = [lvl_blobs['rois'][i] for i in range(num_levels)]
'labels': new_tensor(np.concatenate(outputs['labels'], 0)),
'bbox_targets': new_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': new_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': new_tensor(np.vstack(outputs['bbox_outside_weights'])),
}
else: else:
# Return RoIs directly for CX-stride # Return RoIs directly for specified stride
return { rois_wide = [blobs['rois']]
'rois': [new_tensor(blobs['rois'])],
'labels': new_tensor(blobs['labels']), # Select the foreground RoIs only for bbox branch
'bbox_targets': new_tensor(blobs['bbox_targets']), fg_inds = np.where(blobs['labels'] > 0)[0]
'bbox_inside_weights': new_tensor(blobs['bbox_inside_weights']), cls_inds = np.arange(len(blobs['rois'])) * self.num_classes
'bbox_outside_weights': new_tensor(blobs['bbox_outside_weights']),
} return {
'rois': [new_tensor(rois) for rois in rois_wide],
'labels': new_tensor(blobs['labels']),
def get_targets(ex_rois, gt_rois, gt_labels, num_classes): 'bbox_indices': new_tensor(cls_inds[fg_inds] + blobs['labels'][fg_inds]),
"""Compute bounding-box regression targets for an image.""" 'bbox_targets': new_tensor(blobs['bbox_targets'][fg_inds].astype('float32')),
assert ex_rois.shape[0] == gt_rois.shape[0] 'bbox_anchors': new_tensor(blobs['rois'][fg_inds, 1:].astype('float32')),
assert ex_rois.shape[1] == 4 }
assert gt_rois.shape[1] == 4
# Compute bbox regression targets
fg_inds = np.where(gt_labels > 0)[0] def sample_rois(all_rois, gt_boxes, num_rois, num_fg_rois):
targets = box_util.bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
bbox_targets = np.zeros((ex_rois.shape[0], 4 * num_classes), 'float32')
inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
for i in fg_inds:
start = int(4 * gt_labels[i])
bbox_targets[i, start:start + 4] = targets[i]
inside_weights[i, start:start + 4] = (1., 1., 1., 1.)
outside_weights = np.array(inside_weights > 0).astype('float32')
return bbox_targets, inside_weights, outside_weights
def sample_rois(
all_rois,
gt_boxes,
num_rois,
num_fg_rois,
num_classes,
):
"""Sample a batch of RoIs comprising foreground and background examples.""" """Sample a batch of RoIs comprising foreground and background examples."""
overlaps = box_util.bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4]) overlaps = box_util.bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1) gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1) max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4] labels = gt_boxes[gt_assignment, 4].astype('int64')
# Select foreground RoIs as those with >= FG_THRESH overlap # Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] fg_thresh = cfg.TRAIN.FG_THRESH
fg_rois_per_this_image = int(min(num_fg_rois, fg_inds.size)) fg_inds = np.where(max_overlaps >= fg_thresh)[0]
while fg_inds.size == 0:
fg_thresh -= 0.01
fg_inds = np.where(max_overlaps >= fg_thresh)[0]
# Sample foreground regions without replacement # Sample foreground regions without replacement
if fg_inds.size > 0: fg_rois_per_this_image = int(min(num_fg_rois, fg_inds.size))
fg_inds = npr.choice(fg_inds, fg_rois_per_this_image, False) fg_inds = npr.choice(fg_inds, fg_rois_per_this_image, False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
...@@ -160,15 +135,14 @@ def sample_rois( ...@@ -160,15 +135,14 @@ def sample_rois(
rois, labels = all_rois[keep_inds], labels[keep_inds] rois, labels = all_rois[keep_inds], labels[keep_inds]
# Clamp labels for the background RoIs to 0 # Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0 labels[fg_rois_per_this_image:] = 0
# Clamp the image indices for the background RoIs to -1
rois[fg_rois_per_this_image:][0] = -1
# Compute the target from RoIs # Compute the target from RoIs
outputs = [rois, labels] return [
outputs += get_targets( rois,
rois[:, 1:5],
gt_boxes[gt_assignment[keep_inds], :4],
labels, labels,
num_classes, box_util.bbox_transform(
) rois[:, 1:5],
return outputs gt_boxes[gt_assignment[keep_inds], :4],
cfg.BBOX_REG_WEIGHTS,
)
]
...@@ -13,17 +13,18 @@ from __future__ import absolute_import ...@@ -13,17 +13,18 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import types
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.core.config import cfg
from lib.modeling.detector import new_detector from seetadet.modeling.detector import new_detector
from lib.nms import nms_wrapper from seetadet.utils import boxes as box_util
from lib.utils import boxes as box_util from seetadet.utils import nms as nms_util
from lib.utils import framework from seetadet.utils import time_util
from lib.utils import time_util from seetadet.utils.blob import im_list_to_blob
from lib.utils.blob import im_list_to_blob from seetadet.utils.image import scale_image
from lib.utils.image import scale_image
def im_detect(detector, raw_image): def im_detect(detector, raw_image):
...@@ -31,49 +32,41 @@ def im_detect(detector, raw_image): ...@@ -31,49 +32,41 @@ def im_detect(detector, raw_image):
ims, ims_scale = scale_image(raw_image) ims, ims_scale = scale_image(raw_image)
# Prepare blobs # Prepare blobs
blobs = {'data': im_list_to_blob(ims)} data = im_list_to_blob(ims)
blobs['ims_info'] = np.array([ ims_info = np.array([list(data.shape[1:3]) + [im_scale]
list(blobs['data'].shape[1:3]) + [im_scale] for im_scale in ims_scale], dtype=np.float32)
for im_scale in ims_scale
], dtype=np.float32)
# Do Forward # Do Forward
if not hasattr(detector, 'graph'): data = torch.from_numpy(data)
with framework.new_workspace().as_default(): ims_info = torch.from_numpy(ims_info)
data = torch.from_numpy(blobs['data'])
ims_info = torch.from_numpy(blobs['ims_info']) if not hasattr(detector, 'script_forward'):
with torch.no_grad(): def script_forward(self, data, ims_info):
with torch.jit.Tracer(retain_ops=True): return self.forward({'data': data, 'ims_info': ims_info})
inputs = {'data': data, 'ims_info': ims_info} detector.script_forward = torch.jit.trace(
outputs = detector.forward(inputs) func=types.MethodType(script_forward, detector),
detector.graph = \ example_inputs=[data, ims_info],
framework.Graph(inputs, { )
'rois': outputs['rois'],
'cls_prob': outputs['cls_prob'], outputs = detector.script_forward(data, ims_info)
'bbox_pred': outputs['bbox_pred'] outputs = dict((k, outputs[k].numpy()) for k in outputs.keys())
})
outputs = detector.graph(**blobs)
# Decode results # Decode results
rois = outputs['rois'] all_scores, all_boxes = [], []
scores, boxes, batch_inds = [], [], []
pred_boxes = \ pred_boxes = \
box_util.bbox_transform_inv( box_util.bbox_transform_inv(
rois[:, 1:5], outputs['rois'][:, 1:5],
outputs['bbox_pred'], outputs['bbox_pred'],
cfg.BBOX_REG_WEIGHTS, cfg.BBOX_REG_WEIGHTS,
) )
for i in range(len(ims)): for i in range(len(ims)):
inds = np.where(rois[:, 0].astype(np.int32) == i)[0] inds = np.where(outputs['rois'][:, 0].astype(np.int32) == i)[0]
im_boxes = pred_boxes[inds] / ims_scale[i] boxes = pred_boxes[inds] / ims_scale[i]
scores.append(outputs['cls_prob'][inds]) all_scores.append(outputs['cls_prob'][inds])
boxes.append(box_util.clip_tiled_boxes(im_boxes, raw_image.shape)) all_boxes.append(box_util.clip_tiled_boxes(boxes, raw_image.shape))
return ( return np.vstack(all_scores), np.vstack(all_boxes)
np.vstack(scores) if len(ims) > 0 else scores[0],
np.vstack(boxes) if len(ims) > 0 else boxes[0],
)
def test_net(weights, num_classes, q_in, q_out, device): def test_net(weights, num_classes, q_in, q_out, device):
...@@ -84,7 +77,7 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -84,7 +77,7 @@ def test_net(weights, num_classes, q_in, q_out, device):
while True: while True:
idx, raw_image = q_in.get() idx, raw_image = q_in.get()
if raw_image is None: if idx < 0:
break break
boxes_this_image = [[]] boxes_this_image = [[]]
...@@ -101,17 +94,16 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -101,17 +94,16 @@ def test_net(weights, num_classes, q_in, q_out, device):
(cls_boxes, cls_scores[:, np.newaxis]) (cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False) ).astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS: if cfg.TEST.USE_SOFT_NMS:
keep = nms_wrapper.soft_nms( keep = nms_util.soft_nms(
cls_detections, cls_detections,
thresh=cfg.TEST.NMS, thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD, method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA, sigma=cfg.TEST.SOFT_NMS_SIGMA,
) )
else: else:
keep = nms_wrapper.nms( keep = nms_util.nms(
cls_detections, cls_detections,
thresh=cfg.TEST.NMS, thresh=cfg.TEST.NMS,
force_cpu=True,
) )
cls_detections = cls_detections[keep, :] cls_detections = cls_detections[keep, :]
boxes_this_image.append(cls_detections) boxes_this_image.append(cls_detections)
...@@ -119,11 +111,8 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -119,11 +111,8 @@ def test_net(weights, num_classes, q_in, q_out, device):
q_out.put(( q_out.put((
idx, idx,
{ dict([('im_detect', _t['im_detect'].average_time),
'im_detect': _t['im_detect'].average_time, ('misc', _t['misc'].average_time)]),
'misc': _t['misc'].average_time, dict([('boxes', boxes_this_image)]),
},
{
'boxes': boxes_this_image,
},
)) ))
...@@ -16,7 +16,7 @@ from __future__ import print_function ...@@ -16,7 +16,7 @@ from __future__ import print_function
import collections import collections
import numpy as np import numpy as np
from lib.core.config import cfg from seetadet.core.config import cfg
def generate_grid_anchors(features, base_anchors, strides): def generate_grid_anchors(features, base_anchors, strides):
...@@ -75,7 +75,7 @@ def map_rois_to_levels(rois, k_min, k_max): ...@@ -75,7 +75,7 @@ def map_rois_to_levels(rois, k_min, k_max):
return np.clip(target_levels, k_min, k_max) return np.clip(target_levels, k_min, k_max)
def map_blobs_to_outputs(blobs, defaults, lvl_inds): def map_blobs_by_levels(blobs, defaults, lvl_inds):
"""Map blobs to outputs according to fpn indices.""" """Map blobs to outputs according to fpn indices."""
outputs = collections.defaultdict(list) outputs = collections.defaultdict(list)
for inds in lvl_inds: for inds in lvl_inds:
......
...@@ -13,10 +13,7 @@ from __future__ import absolute_import ...@@ -13,10 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
# Import custom modules from seetadet.algo.faster_rcnn.anchor_target import AnchorTarget
from lib.modeling.fast_rcnn import FastRCNN from seetadet.algo.faster_rcnn.proposal import Proposal
from lib.modeling.fpn import FPN from seetadet.algo.mask_rcnn.data_loader import DataLoader
from lib.modeling.mask_rcnn import MaskRCNN from seetadet.algo.mask_rcnn.proposal_target import ProposalTarget
from lib.modeling.retinanet import RetinaNet
from lib.modeling.rpn import RPN
from lib.modeling.ssd import SSD
This diff is collapsed. Click to expand it.
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!